summaryrefslogtreecommitdiff
path: root/vp8
diff options
context:
space:
mode:
authorJohn Koleszar <jkoleszar@google.com>2010-11-05 12:30:33 -0400
committerJohn Koleszar <jkoleszar@google.com>2010-11-05 12:30:33 -0400
commit7a590c902b9a77d9792d3a2497d28302eb0e0834 (patch)
treeb1f735eee5d5a6fbc633b11eecf90dc47f8d7e42 /vp8
parentf4020e2338a1786b1db0f67075ceb7d9c01be6a3 (diff)
parent5551ef0ef4fd3271330fa5a2fbdfe70d4d2a1d2e (diff)
downloadlibvpx-7a590c902b9a77d9792d3a2497d28302eb0e0834.tar
libvpx-7a590c902b9a77d9792d3a2497d28302eb0e0834.tar.gz
libvpx-7a590c902b9a77d9792d3a2497d28302eb0e0834.tar.bz2
libvpx-7a590c902b9a77d9792d3a2497d28302eb0e0834.zip
Merge remote branch 'origin/master' into experimental
Conflicts: configure ivfenc.c vp8/common/alloccommon.c vp8/common/onyxc_int.h vp8/vp8_cx_iface.c
Diffstat (limited to 'vp8')
-rw-r--r--vp8/common/alloccommon.c124
-rw-r--r--vp8/common/alloccommon.h11
-rw-r--r--vp8/common/arm/arm_systemdependent.c136
-rw-r--r--vp8/common/arm/armv6/bilinearfilter_v6.asm11
-rw-r--r--vp8/common/arm/armv6/copymem16x16_v6.asm11
-rw-r--r--vp8/common/arm/armv6/copymem8x4_v6.asm11
-rw-r--r--vp8/common/arm/armv6/copymem8x8_v6.asm11
-rw-r--r--vp8/common/arm/armv6/dc_only_idct_add_v6.asm67
-rw-r--r--vp8/common/arm/armv6/filter_v6.asm70
-rw-r--r--vp8/common/arm/armv6/idct_v6.asm43
-rw-r--r--vp8/common/arm/armv6/iwalsh_v6.asm27
-rw-r--r--vp8/common/arm/armv6/loopfilter_v6.asm11
-rw-r--r--vp8/common/arm/armv6/recon_v6.asm11
-rw-r--r--vp8/common/arm/armv6/simpleloopfilter_v6.asm188
-rw-r--r--vp8/common/arm/armv6/sixtappredict8x4_v6.asm48
-rw-r--r--vp8/common/arm/bilinearfilter_arm.c33
-rw-r--r--vp8/common/arm/filter_arm.c132
-rw-r--r--vp8/common/arm/idct_arm.h35
-rw-r--r--vp8/common/arm/loopfilter_arm.c41
-rw-r--r--vp8/common/arm/loopfilter_arm.h15
-rw-r--r--vp8/common/arm/neon/bilinearpredict16x16_neon.asm11
-rw-r--r--vp8/common/arm/neon/bilinearpredict4x4_neon.asm11
-rw-r--r--vp8/common/arm/neon/bilinearpredict8x4_neon.asm11
-rw-r--r--vp8/common/arm/neon/bilinearpredict8x8_neon.asm11
-rw-r--r--vp8/common/arm/neon/buildintrapredictorsmby_neon.asm11
-rw-r--r--vp8/common/arm/neon/copymem16x16_neon.asm11
-rw-r--r--vp8/common/arm/neon/copymem8x4_neon.asm11
-rw-r--r--vp8/common/arm/neon/copymem8x8_neon.asm11
-rw-r--r--vp8/common/arm/neon/dc_only_idct_add_neon.asm49
-rw-r--r--vp8/common/arm/neon/iwalsh_neon.asm11
-rw-r--r--vp8/common/arm/neon/loopfilter_neon.asm409
-rw-r--r--vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm205
-rw-r--r--vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm188
-rw-r--r--vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm11
-rw-r--r--vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm11
-rw-r--r--vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm231
-rw-r--r--vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm235
-rw-r--r--vp8/common/arm/neon/mbloopfilter_neon.asm519
-rw-r--r--vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm257
-rw-r--r--vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm236
-rw-r--r--vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm296
-rw-r--r--vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm303
-rw-r--r--vp8/common/arm/neon/recon16x16mb_neon.asm11
-rw-r--r--vp8/common/arm/neon/recon2b_neon.asm11
-rw-r--r--vp8/common/arm/neon/recon4b_neon.asm11
-rw-r--r--vp8/common/arm/neon/recon_neon.c29
-rw-r--r--vp8/common/arm/neon/reconb_neon.asm11
-rw-r--r--vp8/common/arm/neon/save_neon_reg.asm11
-rw-r--r--vp8/common/arm/neon/shortidct4x4llm_1_neon.asm11
-rw-r--r--vp8/common/arm/neon/shortidct4x4llm_neon.asm11
-rw-r--r--vp8/common/arm/neon/sixtappredict16x16_neon.asm11
-rw-r--r--vp8/common/arm/neon/sixtappredict4x4_neon.asm11
-rw-r--r--vp8/common/arm/neon/sixtappredict8x4_neon.asm11
-rw-r--r--vp8/common/arm/neon/sixtappredict8x8_neon.asm11
-rw-r--r--vp8/common/arm/recon_arm.c108
-rw-r--r--vp8/common/arm/recon_arm.h20
-rw-r--r--vp8/common/arm/reconintra4x4_arm.c408
-rw-r--r--vp8/common/arm/reconintra_arm.c15
-rw-r--r--vp8/common/arm/subpixel_arm.h15
-rw-r--r--vp8/common/arm/systemdependent.c148
-rw-r--r--vp8/common/arm/vpx_asm_offsets.c74
-rw-r--r--vp8/common/bigend.h11
-rw-r--r--vp8/common/blockd.c29
-rw-r--r--vp8/common/blockd.h129
-rw-r--r--vp8/common/boolcoder.h11
-rw-r--r--vp8/common/codec_common_interface.h11
-rw-r--r--vp8/common/coefupdateprobs.h11
-rw-r--r--vp8/common/common.h11
-rw-r--r--vp8/common/common_types.h11
-rw-r--r--vp8/common/context.c11
-rw-r--r--vp8/common/debugmodes.c21
-rw-r--r--vp8/common/defaultcoefcounts.h83
-rw-r--r--vp8/common/dma_desc.h11
-rw-r--r--vp8/common/duck_io.h11
-rw-r--r--vp8/common/entropy.c11
-rw-r--r--vp8/common/entropy.h37
-rw-r--r--vp8/common/entropymode.c17
-rw-r--r--vp8/common/entropymode.h15
-rw-r--r--vp8/common/entropymv.c27
-rw-r--r--vp8/common/entropymv.h11
-rw-r--r--vp8/common/extend.c43
-rw-r--r--vp8/common/extend.h11
-rw-r--r--vp8/common/filter_c.c87
-rw-r--r--vp8/common/findnearmv.c15
-rw-r--r--vp8/common/findnearmv.h11
-rw-r--r--vp8/common/fourcc.hpp11
-rw-r--r--vp8/common/g_common.h11
-rw-r--r--vp8/common/generic/systemdependent.c39
-rw-r--r--vp8/common/header.h11
-rw-r--r--vp8/common/idct.h31
-rw-r--r--vp8/common/idctllm.c44
-rw-r--r--vp8/common/invtrans.c18
-rw-r--r--vp8/common/invtrans.h11
-rw-r--r--vp8/common/littlend.h11
-rw-r--r--vp8/common/loopfilter.c143
-rw-r--r--vp8/common/loopfilter.h28
-rw-r--r--vp8/common/loopfilter_filters.c105
-rw-r--r--vp8/common/mac_specs.h11
-rw-r--r--vp8/common/mbpitch.c19
-rw-r--r--vp8/common/modecont.c23
-rw-r--r--vp8/common/modecont.h11
-rw-r--r--vp8/common/modecontext.c231
-rw-r--r--vp8/common/mv.h11
-rw-r--r--vp8/common/onyx.h11
-rw-r--r--vp8/common/onyxc_int.h69
-rw-r--r--vp8/common/onyxd.h11
-rw-r--r--vp8/common/partialgfupdate.h11
-rw-r--r--vp8/common/postproc.c538
-rw-r--r--vp8/common/postproc.h51
-rw-r--r--vp8/common/ppc/copy_altivec.asm11
-rw-r--r--vp8/common/ppc/filter_altivec.asm11
-rw-r--r--vp8/common/ppc/filter_bilinear_altivec.asm11
-rw-r--r--vp8/common/ppc/idctllm_altivec.asm11
-rw-r--r--vp8/common/ppc/loopfilter_altivec.c11
-rw-r--r--vp8/common/ppc/loopfilter_filters_altivec.asm11
-rw-r--r--vp8/common/ppc/platform_altivec.asm11
-rw-r--r--vp8/common/ppc/recon_altivec.asm11
-rw-r--r--vp8/common/ppc/systemdependent.c11
-rw-r--r--vp8/common/ppflags.h30
-rw-r--r--vp8/common/pragmas.h11
-rw-r--r--vp8/common/predictdc.c11
-rw-r--r--vp8/common/predictdc.h11
-rw-r--r--vp8/common/preproc.h11
-rw-r--r--vp8/common/preprocif.h11
-rw-r--r--vp8/common/proposed.h11
-rw-r--r--vp8/common/quant_common.c11
-rw-r--r--vp8/common/quant_common.h11
-rw-r--r--vp8/common/recon.c58
-rw-r--r--vp8/common/recon.h38
-rw-r--r--vp8/common/reconinter.c105
-rw-r--r--vp8/common/reconinter.h11
-rw-r--r--vp8/common/reconintra.c65
-rw-r--r--vp8/common/reconintra.h11
-rw-r--r--vp8/common/reconintra4x4.c87
-rw-r--r--vp8/common/reconintra4x4.h11
-rw-r--r--vp8/common/segmentation_common.h15
-rw-r--r--vp8/common/setupintrarecon.c19
-rw-r--r--vp8/common/setupintrarecon.h11
-rw-r--r--vp8/common/subpixel.h11
-rw-r--r--vp8/common/swapyv12buffer.c11
-rw-r--r--vp8/common/swapyv12buffer.h11
-rw-r--r--vp8/common/systemdependent.h11
-rw-r--r--vp8/common/textblit.c90
-rw-r--r--vp8/common/threading.h18
-rw-r--r--vp8/common/treecoder.c17
-rw-r--r--vp8/common/treecoder.h15
-rw-r--r--vp8/common/type_aliases.h25
-rw-r--r--vp8/common/vfwsetting.hpp11
-rw-r--r--vp8/common/vpx_ref_build_prefix.h11
-rw-r--r--vp8/common/vpxblit.h11
-rw-r--r--vp8/common/vpxblit_c64.h11
-rw-r--r--vp8/common/vpxerrors.h11
-rw-r--r--vp8/common/x86/boolcoder.cxx11
-rw-r--r--vp8/common/x86/idct_x86.h17
-rw-r--r--vp8/common/x86/idctllm_mmx.asm91
-rw-r--r--vp8/common/x86/idctllm_sse2.asm708
-rw-r--r--vp8/common/x86/iwalsh_mmx.asm13
-rw-r--r--vp8/common/x86/iwalsh_sse2.asm13
-rw-r--r--vp8/common/x86/loopfilter_mmx.asm219
-rw-r--r--vp8/common/x86/loopfilter_sse2.asm2238
-rw-r--r--vp8/common/x86/loopfilter_x86.c52
-rw-r--r--vp8/common/x86/loopfilter_x86.h11
-rw-r--r--vp8/common/x86/postproc_mmx.asm21
-rw-r--r--vp8/common/x86/postproc_mmx.c11
-rw-r--r--vp8/common/x86/postproc_sse2.asm27
-rw-r--r--vp8/common/x86/postproc_x86.h11
-rw-r--r--vp8/common/x86/recon_mmx.asm11
-rw-r--r--vp8/common/x86/recon_sse2.asm13
-rw-r--r--vp8/common/x86/recon_x86.h11
-rw-r--r--vp8/common/x86/subpixel_mmx.asm57
-rw-r--r--vp8/common/x86/subpixel_sse2.asm546
-rw-r--r--vp8/common/x86/subpixel_ssse3.asm1554
-rw-r--r--vp8/common/x86/subpixel_x86.h44
-rw-r--r--vp8/common/x86/vp8_asm_stubs.c317
-rw-r--r--vp8/common/x86/x86_systemdependent.c30
-rw-r--r--vp8/decoder/arm/arm_dsystemdependent.c66
-rw-r--r--vp8/decoder/arm/armv5/dequantize_v5.asm11
-rw-r--r--vp8/decoder/arm/armv6/dboolhuff_v6.asm11
-rw-r--r--vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm218
-rw-r--r--vp8/decoder/arm/armv6/dequant_idct_v6.asm196
-rw-r--r--vp8/decoder/arm/armv6/dequantdcidct_v6.asm202
-rw-r--r--vp8/decoder/arm/armv6/dequantidct_v6.asm183
-rw-r--r--vp8/decoder/arm/armv6/dequantize_v6.asm11
-rw-r--r--vp8/decoder/arm/armv6/idct_blk_v6.c151
-rw-r--r--vp8/decoder/arm/dboolhuff_arm.h16
-rw-r--r--vp8/decoder/arm/dequantize_arm.c15
-rw-r--r--vp8/decoder/arm/dequantize_arm.h63
-rw-r--r--vp8/decoder/arm/detokenize.asm320
-rw-r--r--vp8/decoder/arm/detokenize_arm.h22
-rw-r--r--vp8/decoder/arm/detokenizearm_sjl.c730
-rw-r--r--vp8/decoder/arm/detokenizearm_v6.asm364
-rw-r--r--vp8/decoder/arm/dsystemdependent.c44
-rw-r--r--vp8/decoder/arm/neon/dboolhuff_neon.asm11
-rw-r--r--vp8/decoder/arm/neon/dequant_idct_neon.asm (renamed from vp8/decoder/arm/neon/dequantidct_neon.asm)78
-rw-r--r--vp8/decoder/arm/neon/dequantdcidct_neon.asm133
-rw-r--r--vp8/decoder/arm/neon/dequantizeb_neon.asm11
-rw-r--r--vp8/decoder/arm/neon/idct_blk_neon.c115
-rw-r--r--vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm79
-rw-r--r--vp8/decoder/arm/neon/idct_dequant_dc_0_2x_neon.asm69
-rw-r--r--vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm206
-rw-r--r--vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm198
-rw-r--r--vp8/decoder/dboolhuff.c120
-rw-r--r--vp8/decoder/dboolhuff.h155
-rw-r--r--vp8/decoder/decodemv.c666
-rw-r--r--vp8/decoder/decodemv.h11
-rw-r--r--vp8/decoder/decoderthreading.h22
-rw-r--r--vp8/decoder/decodframe.c489
-rw-r--r--vp8/decoder/demode.c149
-rw-r--r--vp8/decoder/demode.h32
-rw-r--r--vp8/decoder/dequantize.c74
-rw-r--r--vp8/decoder/dequantize.h88
-rw-r--r--vp8/decoder/detokenize.c220
-rw-r--r--vp8/decoder/detokenize.h21
-rw-r--r--vp8/decoder/generic/dsystemdependent.c36
-rw-r--r--vp8/decoder/idct_blk.c124
-rw-r--r--vp8/decoder/onyxd_if.c317
-rw-r--r--vp8/decoder/onyxd_if_sjl.c398
-rw-r--r--vp8/decoder/onyxd_int.h65
-rw-r--r--vp8/decoder/reconintra_mt.c982
-rw-r--r--vp8/decoder/reconintra_mt.h26
-rw-r--r--vp8/decoder/threading.c1059
-rw-r--r--vp8/decoder/treereader.h11
-rw-r--r--vp8/decoder/x86/dequantize_mmx.asm155
-rw-r--r--vp8/decoder/x86/dequantize_x86.h54
-rw-r--r--vp8/decoder/x86/idct_blk_mmx.c151
-rw-r--r--vp8/decoder/x86/idct_blk_sse2.c114
-rw-r--r--vp8/decoder/x86/onyxdxv.c11
-rw-r--r--vp8/decoder/x86/x86_dsystemdependent.c31
-rw-r--r--vp8/decoder/xprintf.c163
-rw-r--r--vp8/decoder/xprintf.h32
-rw-r--r--vp8/encoder/arm/arm_csystemdependent.c139
-rw-r--r--vp8/encoder/arm/armv5te/boolhuff_armv5te.asm (renamed from vp8/encoder/arm/neon/boolhuff_armv7.asm)22
-rw-r--r--vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm (renamed from vp8/encoder/arm/neon/vp8_packtokens_armv7.asm)31
-rw-r--r--vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm (renamed from vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm)31
-rw-r--r--vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm (renamed from vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm)31
-rw-r--r--vp8/encoder/arm/armv6/walsh_v6.asm11
-rw-r--r--vp8/encoder/arm/boolhuff_arm.c11
-rw-r--r--vp8/encoder/arm/csystemdependent.c159
-rw-r--r--vp8/encoder/arm/dct_arm.h15
-rw-r--r--vp8/encoder/arm/encodemb_arm.c11
-rw-r--r--vp8/encoder/arm/encodemb_arm.h13
-rw-r--r--vp8/encoder/arm/mcomp_arm.c1662
-rw-r--r--vp8/encoder/arm/neon/fastfdct4x4_neon.asm11
-rw-r--r--vp8/encoder/arm/neon/fastfdct8x4_neon.asm11
-rw-r--r--vp8/encoder/arm/neon/fastquantizeb_neon.asm11
-rw-r--r--vp8/encoder/arm/neon/sad16_neon.asm11
-rw-r--r--vp8/encoder/arm/neon/sad8_neon.asm11
-rw-r--r--vp8/encoder/arm/neon/shortfdct_neon.asm11
-rw-r--r--vp8/encoder/arm/neon/subtract_neon.asm11
-rw-r--r--vp8/encoder/arm/neon/variance_neon.asm11
-rw-r--r--vp8/encoder/arm/neon/vp8_memcpy_neon.asm11
-rw-r--r--vp8/encoder/arm/neon/vp8_mse16x16_neon.asm11
-rw-r--r--vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm11
-rw-r--r--vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm11
-rw-r--r--vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm29
-rw-r--r--vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm11
-rw-r--r--vp8/encoder/arm/picklpf_arm.c11
-rw-r--r--vp8/encoder/arm/quantize_arm.c13
-rw-r--r--vp8/encoder/arm/quantize_arm.h18
-rw-r--r--vp8/encoder/arm/variance_arm.h25
-rw-r--r--vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c11
-rw-r--r--vp8/encoder/bitstream.c39
-rw-r--r--vp8/encoder/bitstream.h25
-rw-r--r--vp8/encoder/block.h39
-rw-r--r--vp8/encoder/boolhuff.c11
-rw-r--r--vp8/encoder/boolhuff.h11
-rw-r--r--vp8/encoder/dct.c209
-rw-r--r--vp8/encoder/dct.h18
-rw-r--r--vp8/encoder/encodeframe.c447
-rw-r--r--vp8/encoder/encodeintra.c47
-rw-r--r--vp8/encoder/encodeintra.h11
-rw-r--r--vp8/encoder/encodemb.c1080
-rw-r--r--vp8/encoder/encodemb.h13
-rw-r--r--vp8/encoder/encodemv.c13
-rw-r--r--vp8/encoder/encodemv.h11
-rw-r--r--vp8/encoder/ethreading.c84
-rw-r--r--vp8/encoder/firstpass.c641
-rw-r--r--vp8/encoder/firstpass.h12
-rw-r--r--vp8/encoder/generic/csystemdependent.c29
-rw-r--r--vp8/encoder/mcomp.c426
-rw-r--r--vp8/encoder/mcomp.h19
-rw-r--r--vp8/encoder/modecosts.c11
-rw-r--r--vp8/encoder/modecosts.h11
-rw-r--r--vp8/encoder/onyx_if.c1103
-rw-r--r--vp8/encoder/onyx_int.h85
-rw-r--r--vp8/encoder/parms.cpp11
-rw-r--r--vp8/encoder/pickinter.c144
-rw-r--r--vp8/encoder/pickinter.h11
-rw-r--r--vp8/encoder/picklpf.c82
-rw-r--r--vp8/encoder/ppc/csystemdependent.c11
-rw-r--r--vp8/encoder/ppc/encodemb_altivec.asm11
-rw-r--r--vp8/encoder/ppc/fdct_altivec.asm11
-rw-r--r--vp8/encoder/ppc/rdopt_altivec.asm11
-rw-r--r--vp8/encoder/ppc/sad_altivec.asm11
-rw-r--r--vp8/encoder/ppc/variance_altivec.asm11
-rw-r--r--vp8/encoder/ppc/variance_subpixel_altivec.asm11
-rw-r--r--vp8/encoder/preproc.c11
-rw-r--r--vp8/encoder/psnr.c11
-rw-r--r--vp8/encoder/psnr.h11
-rw-r--r--vp8/encoder/quantize.c290
-rw-r--r--vp8/encoder/quantize.h20
-rw-r--r--vp8/encoder/ratectrl.c35
-rw-r--r--vp8/encoder/ratectrl.h11
-rw-r--r--vp8/encoder/rdopt.c440
-rw-r--r--vp8/encoder/rdopt.h11
-rw-r--r--vp8/encoder/sad_c.c155
-rw-r--r--vp8/encoder/segmentation.c (renamed from vp8/common/segmentation_common.c)35
-rw-r--r--vp8/encoder/segmentation.h16
-rw-r--r--vp8/encoder/ssim.c11
-rw-r--r--vp8/encoder/temporal_filter.c651
-rw-r--r--vp8/encoder/temporal_filter.h19
-rw-r--r--vp8/encoder/tokenize.c209
-rw-r--r--vp8/encoder/tokenize.h23
-rw-r--r--vp8/encoder/treewriter.c11
-rw-r--r--vp8/encoder/treewriter.h11
-rw-r--r--vp8/encoder/variance.h110
-rw-r--r--vp8/encoder/variance_c.c118
-rw-r--r--vp8/encoder/x86/csystemdependent.c289
-rw-r--r--vp8/encoder/x86/dct_mmx.asm407
-rw-r--r--vp8/encoder/x86/dct_sse2.asm401
-rw-r--r--vp8/encoder/x86/dct_x86.h38
-rw-r--r--vp8/encoder/x86/encodemb_x86.h24
-rw-r--r--vp8/encoder/x86/encodeopt.asm53
-rw-r--r--vp8/encoder/x86/fwalsh_sse2.asm231
-rw-r--r--vp8/encoder/x86/mcomp_x86.h20
-rw-r--r--vp8/encoder/x86/preproc_mmx.c11
-rw-r--r--vp8/encoder/x86/quantize_mmx.asm168
-rw-r--r--vp8/encoder/x86/quantize_sse2.asm388
-rwxr-xr-xvp8/encoder/x86/quantize_ssse3.asm114
-rw-r--r--vp8/encoder/x86/quantize_x86.h41
-rw-r--r--vp8/encoder/x86/sad_mmx.asm39
-rw-r--r--vp8/encoder/x86/sad_sse2.asm45
-rw-r--r--vp8/encoder/x86/sad_sse3.asm179
-rw-r--r--vp8/encoder/x86/sad_sse4.asm353
-rw-r--r--vp8/encoder/x86/sad_ssse3.asm55
-rw-r--r--vp8/encoder/x86/subtract_mmx.asm17
-rw-r--r--vp8/encoder/x86/subtract_sse2.asm356
-rw-r--r--vp8/encoder/x86/variance_impl_mmx.asm31
-rw-r--r--vp8/encoder/x86/variance_impl_sse2.asm21
-rw-r--r--vp8/encoder/x86/variance_mmx.c139
-rw-r--r--vp8/encoder/x86/variance_sse2.c218
-rw-r--r--vp8/encoder/x86/variance_x86.h64
-rw-r--r--vp8/encoder/x86/x86_csystemdependent.c195
-rw-r--r--vp8/exports_dec2
-rw-r--r--vp8/exports_enc2
-rw-r--r--vp8/vp8_common.mk40
-rw-r--r--vp8/vp8_cx_iface.c115
-rw-r--r--vp8/vp8_dx_iface.c65
-rw-r--r--vp8/vp8cx.mk29
-rw-r--r--vp8/vp8cx_arm.mk31
-rw-r--r--vp8/vp8dx.mk23
-rw-r--r--vp8/vp8dx_arm.mk48
352 files changed, 20247 insertions, 16686 deletions
diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c
index 12d83aa1b..5ab8e29ab 100644
--- a/vp8/common/alloccommon.c
+++ b/vp8/common/alloccommon.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -23,46 +24,39 @@ extern void vp8_init_scan_order_mask();
void vp8_update_mode_info_border(MODE_INFO *mi, int rows, int cols)
{
int i;
- vpx_memset(mi - cols - 1, 0, sizeof(MODE_INFO) * cols + 1);
+ vpx_memset(mi - cols - 2, 0, sizeof(MODE_INFO) * (cols + 1));
for (i = 0; i < rows; i++)
{
vpx_memset(&mi[i*cols-1], 0, sizeof(MODE_INFO));
}
}
+
void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
{
+ int i;
+
+ for (i = 0; i < NUM_YV12_BUFFERS; i++)
+ vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]);
+
vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame);
- vp8_yv12_de_alloc_frame_buffer(&oci->new_frame);
- vp8_yv12_de_alloc_frame_buffer(&oci->last_frame);
- vp8_yv12_de_alloc_frame_buffer(&oci->golden_frame);
- vp8_yv12_de_alloc_frame_buffer(&oci->alt_ref_frame);
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
- vpx_free(oci->above_context[Y1CONTEXT]);
- vpx_free(oci->above_context[UCONTEXT]);
- vpx_free(oci->above_context[VCONTEXT]);
- vpx_free(oci->above_context[Y2CONTEXT]);
+ vpx_free(oci->above_context);
vpx_free(oci->mip);
- oci->above_context[Y1CONTEXT] = 0;
- oci->above_context[UCONTEXT] = 0;
- oci->above_context[VCONTEXT] = 0;
- oci->above_context[Y2CONTEXT] = 0;
+ oci->above_context = 0;
oci->mip = 0;
- // Structure used to minitor GF useage
- if (oci->gf_active_flags != 0)
- vpx_free(oci->gf_active_flags);
-
- oci->gf_active_flags = 0;
}
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
{
+ int i;
+
vp8_de_alloc_frame_buffers(oci);
- // our internal buffers are always multiples of 16
+ /* our internal buffers are always multiples of 16 */
if ((width & 0xf) != 0)
width += 16 - (width & 0xf);
@@ -70,32 +64,28 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
height += 16 - (height & 0xf);
- if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0)
+ for (i = 0; i < NUM_YV12_BUFFERS; i++)
{
- vp8_de_alloc_frame_buffers(oci);
- return ALLOC_FAILURE;
- }
-
+ oci->fb_idx_ref_cnt[0] = 0;
- if (vp8_yv12_alloc_frame_buffer(&oci->new_frame, width, height, VP8BORDERINPIXELS) < 0)
- {
- vp8_de_alloc_frame_buffers(oci);
- return ALLOC_FAILURE;
+ if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0)
+ {
+ vp8_de_alloc_frame_buffers(oci);
+ return ALLOC_FAILURE;
+ }
}
- if (vp8_yv12_alloc_frame_buffer(&oci->last_frame, width, height, VP8BORDERINPIXELS) < 0)
- {
- vp8_de_alloc_frame_buffers(oci);
- return ALLOC_FAILURE;
- }
+ oci->new_fb_idx = 0;
+ oci->lst_fb_idx = 1;
+ oci->gld_fb_idx = 2;
+ oci->alt_fb_idx = 3;
- if (vp8_yv12_alloc_frame_buffer(&oci->golden_frame, width, height, VP8BORDERINPIXELS) < 0)
- {
- vp8_de_alloc_frame_buffers(oci);
- return ALLOC_FAILURE;
- }
+ oci->fb_idx_ref_cnt[0] = 1;
+ oci->fb_idx_ref_cnt[1] = 1;
+ oci->fb_idx_ref_cnt[2] = 1;
+ oci->fb_idx_ref_cnt[3] = 1;
- if (vp8_yv12_alloc_frame_buffer(&oci->alt_ref_frame, width, height, VP8BORDERINPIXELS) < 0)
+ if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0)
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
@@ -122,33 +112,9 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
oci->mi = oci->mip + oci->mode_info_stride + 1;
- oci->above_context[Y1CONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 4 , 1);
-
- if (!oci->above_context[Y1CONTEXT])
- {
- vp8_de_alloc_frame_buffers(oci);
- return ALLOC_FAILURE;
- }
+ oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
- oci->above_context[UCONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 2 , 1);
-
- if (!oci->above_context[UCONTEXT])
- {
- vp8_de_alloc_frame_buffers(oci);
- return ALLOC_FAILURE;
- }
-
- oci->above_context[VCONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 2 , 1);
-
- if (!oci->above_context[VCONTEXT])
- {
- vp8_de_alloc_frame_buffers(oci);
- return ALLOC_FAILURE;
- }
-
- oci->above_context[Y2CONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols , 1);
-
- if (!oci->above_context[Y2CONTEXT])
+ if (!oci->above_context)
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
@@ -156,20 +122,6 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
vp8_update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols);
- // Structures used to minitor GF usage
- if (oci->gf_active_flags != 0)
- vpx_free(oci->gf_active_flags);
-
- oci->gf_active_flags = (unsigned char *)vpx_calloc(oci->mb_rows * oci->mb_cols, 1);
-
- if (!oci->gf_active_flags)
- {
- vp8_de_alloc_frame_buffers(oci);
- return ALLOC_FAILURE;
- }
-
- oci->gf_active_count = oci->mb_rows * oci->mb_cols;
-
return 0;
}
void vp8_setup_version(VP8_COMMON *cm)
@@ -227,10 +179,10 @@ void vp8_create_common(VP8_COMMON *oci)
oci->clr_type = REG_YUV;
oci->clamp_type = RECON_CLAMP_REQUIRED;
- // Initialise reference frame sign bias structure to defaults
+ /* Initialise reference frame sign bias structure to defaults */
vpx_memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
- // Default disable buffer to buffer copying
+ /* Default disable buffer to buffer copying */
oci->copy_buffer_to_gf = 0;
oci->copy_buffer_to_arf = 0;
}
diff --git a/vp8/common/alloccommon.h b/vp8/common/alloccommon.h
index 73c7383c7..ea93c2522 100644
--- a/vp8/common/alloccommon.h
+++ b/vp8/common/alloccommon.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/arm/arm_systemdependent.c b/vp8/common/arm/arm_systemdependent.c
new file mode 100644
index 000000000..83921f807
--- /dev/null
+++ b/vp8/common/arm/arm_systemdependent.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
+#include "g_common.h"
+#include "pragmas.h"
+#include "subpixel.h"
+#include "loopfilter.h"
+#include "recon.h"
+#include "idct.h"
+#include "onyxc_int.h"
+
+extern void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x);
+
+extern void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x);
+
+void vp8_arch_arm_common_init(VP8_COMMON *ctx)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+ VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
+ int flags = arm_cpu_caps();
+ int has_edsp = flags & HAS_EDSP;
+ int has_media = flags & HAS_MEDIA;
+ int has_neon = flags & HAS_NEON;
+ rtcd->flags = flags;
+
+ /* Override default functions with fastest ones for this CPU. */
+#if HAVE_ARMV6
+ if (has_media)
+ {
+ rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_armv6;
+ rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_armv6;
+ rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_armv6;
+ rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_armv6;
+ rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_armv6;
+ rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_armv6;
+ rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_armv6;
+ rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_armv6;
+
+ rtcd->idct.idct1 = vp8_short_idct4x4llm_1_v6;
+ rtcd->idct.idct16 = vp8_short_idct4x4llm_v6_dual;
+ rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_v6;
+ rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_v6;
+
+ rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_armv6;
+ rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_armv6;
+ rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
+ rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_armv6;
+ rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6;
+ rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_armv6;
+ rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6;
+ rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_armv6;
+
+ rtcd->recon.copy16x16 = vp8_copy_mem16x16_v6;
+ rtcd->recon.copy8x8 = vp8_copy_mem8x8_v6;
+ rtcd->recon.copy8x4 = vp8_copy_mem8x4_v6;
+ rtcd->recon.recon = vp8_recon_b_armv6;
+ rtcd->recon.recon2 = vp8_recon2b_armv6;
+ rtcd->recon.recon4 = vp8_recon4b_armv6;
+ }
+#endif
+
+#if HAVE_ARMV7
+ if (has_neon)
+ {
+ rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_neon;
+ rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_neon;
+ rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_neon;
+ rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_neon;
+ rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_neon;
+ rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_neon;
+ rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_neon;
+ rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_neon;
+
+ rtcd->idct.idct1 = vp8_short_idct4x4llm_1_neon;
+ rtcd->idct.idct16 = vp8_short_idct4x4llm_neon;
+ rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_neon;
+ rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_neon;
+
+ rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_neon;
+ rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_neon;
+ rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_neon;
+ rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_neon;
+ rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_neon;
+ rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_neon;
+ rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_neon;
+ rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_neon;
+
+ rtcd->recon.copy16x16 = vp8_copy_mem16x16_neon;
+ rtcd->recon.copy8x8 = vp8_copy_mem8x8_neon;
+ rtcd->recon.copy8x4 = vp8_copy_mem8x4_neon;
+ rtcd->recon.recon = vp8_recon_b_neon;
+ rtcd->recon.recon2 = vp8_recon2b_neon;
+ rtcd->recon.recon4 = vp8_recon4b_neon;
+ rtcd->recon.recon_mb = vp8_recon_mb_neon;
+
+ }
+#endif
+
+#endif
+
+#if HAVE_ARMV6
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (has_media)
+#endif
+ {
+ vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
+ vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
+ }
+#endif
+
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (has_neon)
+#endif
+ {
+ vp8_build_intra_predictors_mby_ptr =
+ vp8_build_intra_predictors_mby_neon;
+ vp8_build_intra_predictors_mby_s_ptr =
+ vp8_build_intra_predictors_mby_s_neon;
+ }
+#endif
+}
diff --git a/vp8/common/arm/armv6/bilinearfilter_v6.asm b/vp8/common/arm/armv6/bilinearfilter_v6.asm
index 4428cf8ff..09d7338d9 100644
--- a/vp8/common/arm/armv6/bilinearfilter_v6.asm
+++ b/vp8/common/arm/armv6/bilinearfilter_v6.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/armv6/copymem16x16_v6.asm b/vp8/common/arm/armv6/copymem16x16_v6.asm
index 00e97397c..fca91a0db 100644
--- a/vp8/common/arm/armv6/copymem16x16_v6.asm
+++ b/vp8/common/arm/armv6/copymem16x16_v6.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/armv6/copymem8x4_v6.asm b/vp8/common/arm/armv6/copymem8x4_v6.asm
index 94473ca65..d8362ef05 100644
--- a/vp8/common/arm/armv6/copymem8x4_v6.asm
+++ b/vp8/common/arm/armv6/copymem8x4_v6.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/armv6/copymem8x8_v6.asm b/vp8/common/arm/armv6/copymem8x8_v6.asm
index 7cfa53389..c6a60c610 100644
--- a/vp8/common/arm/armv6/copymem8x8_v6.asm
+++ b/vp8/common/arm/armv6/copymem8x8_v6.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/armv6/dc_only_idct_add_v6.asm b/vp8/common/arm/armv6/dc_only_idct_add_v6.asm
new file mode 100644
index 000000000..e0660e9fd
--- /dev/null
+++ b/vp8/common/arm/armv6/dc_only_idct_add_v6.asm
@@ -0,0 +1,67 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+ EXPORT |vp8_dc_only_idct_add_v6|
+
+ AREA |.text|, CODE, READONLY
+
+;void vp8_dc_only_idct_add_v6(short input_dc, unsigned char *pred_ptr,
+; unsigned char *dst_ptr, int pitch, int stride)
+; r0 input_dc
+; r1 pred_ptr
+; r2 dest_ptr
+; r3 pitch
+; sp stride
+
+|vp8_dc_only_idct_add_v6| PROC
+ stmdb sp!, {r4 - r7, lr}
+
+ add r0, r0, #4 ; input_dc += 4
+ ldr r12, c0x0000FFFF
+ ldr r4, [r1], r3
+ ldr r6, [r1], r3
+ and r0, r12, r0, asr #3 ; input_dc >> 3 + mask
+ ldr lr, [sp, #20]
+ orr r0, r0, r0, lsl #16 ; a1 | a1
+
+ uxtab16 r5, r0, r4 ; a1+2 | a1+0
+ uxtab16 r4, r0, r4, ror #8 ; a1+3 | a1+1
+ uxtab16 r7, r0, r6
+ uxtab16 r6, r0, r6, ror #8
+ usat16 r5, #8, r5
+ usat16 r4, #8, r4
+ usat16 r7, #8, r7
+ usat16 r6, #8, r6
+ orr r5, r5, r4, lsl #8
+ orr r7, r7, r6, lsl #8
+ ldr r4, [r1], r3
+ ldr r6, [r1]
+ str r5, [r2], lr
+ str r7, [r2], lr
+
+ uxtab16 r5, r0, r4
+ uxtab16 r4, r0, r4, ror #8
+ uxtab16 r7, r0, r6
+ uxtab16 r6, r0, r6, ror #8
+ usat16 r5, #8, r5
+ usat16 r4, #8, r4
+ usat16 r7, #8, r7
+ usat16 r6, #8, r6
+ orr r5, r5, r4, lsl #8
+ orr r7, r7, r6, lsl #8
+ str r5, [r2], lr
+ str r7, [r2]
+
+ ldmia sp!, {r4 - r7, pc}
+
+ ENDP ; |vp8_dc_only_idct_add_v6|
+
+; Constant Pool
+c0x0000FFFF DCD 0x0000FFFF
+ END
diff --git a/vp8/common/arm/armv6/filter_v6.asm b/vp8/common/arm/armv6/filter_v6.asm
index a7863fc94..03b5bccd7 100644
--- a/vp8/common/arm/armv6/filter_v6.asm
+++ b/vp8/common/arm/armv6/filter_v6.asm
@@ -1,15 +1,17 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_filter_block2d_first_pass_armv6|
EXPORT |vp8_filter_block2d_second_pass_armv6|
+ EXPORT |vp8_filter4_block2d_second_pass_armv6|
EXPORT |vp8_filter_block2d_first_pass_only_armv6|
EXPORT |vp8_filter_block2d_second_pass_only_armv6|
@@ -191,6 +193,64 @@
ENDP
+;---------------------------------
+; r0 short *src_ptr,
+; r1 unsigned char *output_ptr,
+; r2 unsigned int output_pitch,
+; r3 unsigned int cnt,
+; stack const short *vp8_filter
+;---------------------------------
+|vp8_filter4_block2d_second_pass_armv6| PROC
+ stmdb sp!, {r4 - r11, lr}
+
+ ldr r11, [sp, #36] ; vp8_filter address
+ mov r7, r3, lsl #16 ; height is top part of counter
+
+ ldr r4, [r11] ; load up packed filter coefficients
+ add lr, r1, r3 ; save final destination pointer
+ ldr r5, [r11, #4]
+ ldr r6, [r11, #8]
+
+ pkhbt r12, r5, r4 ; pack the filter differently
+ pkhbt r11, r6, r5
+ mov r4, #0x40 ; rounding factor (for smlad{x})
+
+|height_loop_2nd_4|
+ ldrd r8, [r0, #-4] ; load the data
+ orr r7, r7, r3, lsr #1 ; loop counter
+
+|width_loop_2nd_4|
+ ldr r10, [r0, #4]!
+ smladx r6, r9, r12, r4 ; apply filter
+ pkhbt r8, r9, r8
+ smlad r5, r8, r12, r4
+ pkhbt r8, r10, r9
+ smladx r6, r10, r11, r6
+ sub r7, r7, #1
+ smlad r5, r8, r11, r5
+
+ mov r8, r9 ; shift the data for the next loop
+ mov r9, r10
+
+ usat r6, #8, r6, asr #7 ; shift and clamp
+ usat r5, #8, r5, asr #7
+
+ strb r5, [r1], r2 ; the result is transposed back and stored
+ tst r7, #0xff
+ strb r6, [r1], r2
+
+ bne width_loop_2nd_4
+
+ subs r7, r7, #0x10000
+ add r0, r0, #16 ; update src for next loop
+ sub r1, lr, r7, lsr #16 ; update dst for next loop
+
+ bne height_loop_2nd_4
+
+ ldmia sp!, {r4 - r11, pc}
+
+ ENDP
+
;------------------------------------
; r0 unsigned char *src_ptr
; r1 unsigned char *output_ptr,
diff --git a/vp8/common/arm/armv6/idct_v6.asm b/vp8/common/arm/armv6/idct_v6.asm
index 25c5165ec..27215afcd 100644
--- a/vp8/common/arm/armv6/idct_v6.asm
+++ b/vp8/common/arm/armv6/idct_v6.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
@@ -14,8 +15,6 @@
EXPORT |vp8_short_idct4x4llm_v6_scott|
EXPORT |vp8_short_idct4x4llm_v6_dual|
- EXPORT |vp8_dc_only_idct_armv6|
-
AREA |.text|, CODE, READONLY
;********************************************************************************
@@ -343,34 +342,4 @@ loop2_dual
ldmia sp!, {r4 - r11, pc} ; replace vars, return restore
ENDP
-
-; sjl added 10/17/08
-;void dc_only_idct_armv6(short input_dc, short *output, int pitch)
-|vp8_dc_only_idct_armv6| PROC
- stmdb sp!, {r4 - r6, lr}
-
- add r0, r0, #0x4
- add r4, r1, r2 ; output + shortpitch
- mov r0, r0, ASR #0x3 ;aka a1
- add r5, r1, r2, LSL #1 ; output + shortpitch * 2
- pkhbt r0, r0, r0, lsl #16 ; a1 | a1
- add r6, r5, r2 ; output + shortpitch * 3
-
- str r0, [r1, #0]
- str r0, [r1, #4]
-
- str r0, [r4, #0]
- str r0, [r4, #4]
-
- str r0, [r5, #0]
- str r0, [r5, #4]
-
- str r0, [r6, #0]
- str r0, [r6, #4]
-
-
- ldmia sp!, {r4 - r6, pc}
-
- ENDP ; |vp8_dc_only_idct_armv6|
-
END
diff --git a/vp8/common/arm/armv6/iwalsh_v6.asm b/vp8/common/arm/armv6/iwalsh_v6.asm
index 87475681f..463bff0f5 100644
--- a/vp8/common/arm/armv6/iwalsh_v6.asm
+++ b/vp8/common/arm/armv6/iwalsh_v6.asm
@@ -1,14 +1,15 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp8_short_inv_walsh4x4_armv6|
- EXPORT |vp8_short_inv_walsh4x4_1_armv6|
+ EXPORT |vp8_short_inv_walsh4x4_v6|
+ EXPORT |vp8_short_inv_walsh4x4_1_v6|
ARM
REQUIRE8
@@ -16,8 +17,8 @@
AREA |.text|, CODE, READONLY ; name this block of code
-;short vp8_short_inv_walsh4x4_armv6(short *input, short *output)
-|vp8_short_inv_walsh4x4_armv6| PROC
+;short vp8_short_inv_walsh4x4_v6(short *input, short *output)
+|vp8_short_inv_walsh4x4_v6| PROC
stmdb sp!, {r4 - r11, lr}
@@ -122,11 +123,11 @@
str r5, [r1]
ldmia sp!, {r4 - r11, pc}
- ENDP ; |vp8_short_inv_walsh4x4_armv6|
+ ENDP ; |vp8_short_inv_walsh4x4_v6|
-;short vp8_short_inv_walsh4x4_1_armv6(short *input, short *output)
-|vp8_short_inv_walsh4x4_1_armv6| PROC
+;short vp8_short_inv_walsh4x4_1_v6(short *input, short *output)
+|vp8_short_inv_walsh4x4_1_v6| PROC
ldrsh r2, [r0] ; [0]
add r2, r2, #3 ; [0] + 3
@@ -144,7 +145,7 @@
str r2, [r1]
bx lr
- ENDP ; |vp8_short_inv_walsh4x4_1_armv6|
+ ENDP ; |vp8_short_inv_walsh4x4_1_v6|
; Constant Pool
c0x00030003 DCD 0x00030003
diff --git a/vp8/common/arm/armv6/loopfilter_v6.asm b/vp8/common/arm/armv6/loopfilter_v6.asm
index c2b02dc0a..b6417dee6 100644
--- a/vp8/common/arm/armv6/loopfilter_v6.asm
+++ b/vp8/common/arm/armv6/loopfilter_v6.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/armv6/recon_v6.asm b/vp8/common/arm/armv6/recon_v6.asm
index 085ff80c9..99c7bcf2d 100644
--- a/vp8/common/arm/armv6/recon_v6.asm
+++ b/vp8/common/arm/armv6/recon_v6.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/armv6/simpleloopfilter_v6.asm b/vp8/common/arm/armv6/simpleloopfilter_v6.asm
index 15c6c7d16..013712036 100644
--- a/vp8/common/arm/armv6/simpleloopfilter_v6.asm
+++ b/vp8/common/arm/armv6/simpleloopfilter_v6.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
@@ -54,113 +55,87 @@ pstep RN r1
;stack const char *thresh,
;stack int count
-;Note: All 16 elements in flimit are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
+; All 16 elements in flimit are equal. So, in the code, only one load is needed
+; for flimit. Same applies to limit. thresh is not used in simple looopfilter
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|vp8_loop_filter_simple_horizontal_edge_armv6| PROC
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
stmdb sp!, {r4 - r11, lr}
- sub src, src, pstep, lsl #1 ; move src pointer down by 2 lines
-
- ldr r12, [r3], #4 ; limit
- ldr r3, [src], pstep ; p1
-
- ldr r9, [sp, #36] ; count for 8-in-parallel
- ldr r4, [src], pstep ; p0
-
- ldr r7, [r2], #4 ; flimit
- ldr r5, [src], pstep ; q0
+ ldr r12, [r3] ; limit
+ ldr r3, [src, -pstep, lsl #1] ; p1
+ ldr r4, [src, -pstep] ; p0
+ ldr r5, [src] ; q0
+ ldr r6, [src, pstep] ; q1
+ ldr r7, [r2] ; flimit
ldr r2, c0x80808080
-
- ldr r6, [src] ; q1
-
+ ldr r9, [sp, #40] ; count for 8-in-parallel
uadd8 r7, r7, r7 ; flimit * 2
- mov r9, r9, lsl #1 ; 4-in-parallel
+ mov r9, r9, lsl #1 ; double the count. we're doing 4 at a time
uadd8 r12, r7, r12 ; flimit * 2 + limit
+ mov lr, #0 ; need 0 in a couple places
|simple_hnext8|
- ; vp8_simple_filter_mask() function
+ ; vp8_simple_filter_mask()
uqsub8 r7, r3, r6 ; p1 - q1
uqsub8 r8, r6, r3 ; q1 - p1
uqsub8 r10, r4, r5 ; p0 - q0
uqsub8 r11, r5, r4 ; q0 - p0
orr r8, r8, r7 ; abs(p1 - q1)
- ldr lr, c0x7F7F7F7F ; 01111111 mask
orr r10, r10, r11 ; abs(p0 - q0)
- and r8, lr, r8, lsr #1 ; abs(p1 - q1) / 2
uqadd8 r10, r10, r10 ; abs(p0 - q0) * 2
- mvn lr, #0 ; r10 == -1
+ uhadd8 r8, r8, lr ; abs(p1 - q2) >> 1
uqadd8 r10, r10, r8 ; abs(p0 - q0)*2 + abs(p1 - q1)/2
- ; STALL waiting on r10 :(
- uqsub8 r10, r10, r12 ; compare to flimit
- mov r8, #0
-
- usub8 r10, r8, r10 ; use usub8 instead of ssub8
- ; STALL (maybe?) when are flags set? :/
- sel r10, lr, r8 ; filter mask: lr
-
+ mvn r8, #0
+ usub8 r10, r12, r10 ; compare to flimit. usub8 sets GE flags
+ sel r10, r8, lr ; filter mask: F or 0
cmp r10, #0
- beq simple_hskip_filter ; skip filtering
+ beq simple_hskip_filter ; skip filtering if all masks are 0x00
- ;vp8_simple_filter() function
+ ;vp8_simple_filter()
eor r3, r3, r2 ; p1 offset to convert to a signed value
eor r6, r6, r2 ; q1 offset to convert to a signed value
eor r4, r4, r2 ; p0 offset to convert to a signed value
eor r5, r5, r2 ; q0 offset to convert to a signed value
- qsub8 r3, r3, r6 ; vp8_filter (r3) = vp8_signed_char_clamp(p1-q1)
- qsub8 r6, r5, r4 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( q0 - p0))
-
- qadd8 r3, r3, r6
- ldr r8, c0x03030303 ; r8 = 3
-
- qadd8 r3, r3, r6
+ qsub8 r3, r3, r6 ; vp8_filter = p1 - q1
+ qsub8 r6, r5, r4 ; q0 - p0
+ qadd8 r3, r3, r6 ; += q0 - p0
ldr r7, c0x04040404
-
- qadd8 r3, r3, r6
- and r3, r3, lr ; vp8_filter &= mask;
-
- ;save bottom 3 bits so that we round one side +4 and the other +3
- qadd8 r8 , r3 , r8 ; Filter2 (r8) = vp8_signed_char_clamp(vp8_filter+3)
- qadd8 r3 , r3 , r7 ; Filter1 (r3) = vp8_signed_char_clamp(vp8_filter+4)
-
- mov r7, #0
- shadd8 r8 , r8 , r7 ; Filter2 >>= 3
- shadd8 r3 , r3 , r7 ; Filter1 >>= 3
- shadd8 r8 , r8 , r7
- shadd8 r3 , r3 , r7
- shadd8 r8 , r8 , r7 ; r8: Filter2
- shadd8 r3 , r3 , r7 ; r7: filter1
-
- ;calculate output
- sub src, src, pstep, lsl #1
-
- qadd8 r4, r4, r8 ; u = vp8_signed_char_clamp(p0 + Filter2)
- qsub8 r5 ,r5, r3 ; u = vp8_signed_char_clamp(q0 - Filter1)
- eor r4, r4, r2 ; *op0 = u^0x80
- str r4, [src], pstep ; store op0 result
+ qadd8 r3, r3, r6 ; += q0 - p0
+ ldr r8, c0x03030303
+ qadd8 r3, r3, r6 ; vp8_filter = p1-q1 + 3*(q0-p0))
+ ;STALL
+ and r3, r3, r10 ; vp8_filter &= mask
+
+ qadd8 r7 , r3 , r7 ; Filter1 = vp8_filter + 4
+ qadd8 r8 , r3 , r8 ; Filter2 = vp8_filter + 3
+
+ shadd8 r7 , r7 , lr
+ shadd8 r8 , r8 , lr
+ shadd8 r7 , r7 , lr
+ shadd8 r8 , r8 , lr
+ shadd8 r7 , r7 , lr ; Filter1 >>= 3
+ shadd8 r8 , r8 , lr ; Filter2 >>= 3
+
+ qsub8 r5 ,r5, r7 ; u = q0 - Filter1
+ qadd8 r4, r4, r8 ; u = p0 + Filter2
eor r5, r5, r2 ; *oq0 = u^0x80
- str r5, [src], pstep ; store oq0 result
+ str r5, [src] ; store oq0 result
+ eor r4, r4, r2 ; *op0 = u^0x80
+ str r4, [src, -pstep] ; store op0 result
|simple_hskip_filter|
- add src, src, #4
- sub src, src, pstep
- sub src, src, pstep, lsl #1
-
subs r9, r9, #1
+ addne src, src, #4 ; next row
- ;pld [src]
- ;pld [src, pstep]
- ;pld [src, pstep, lsl #1]
-
- ldrne r3, [src], pstep ; p1
- ldrne r4, [src], pstep ; p0
- ldrne r5, [src], pstep ; q0
- ldrne r6, [src] ; q1
+ ldrne r3, [src, -pstep, lsl #1] ; p1
+ ldrne r4, [src, -pstep] ; p0
+ ldrne r5, [src] ; q0
+ ldrne r6, [src, pstep] ; q1
bne simple_hnext8
@@ -173,9 +148,9 @@ pstep RN r1
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
stmdb sp!, {r4 - r11, lr}
- ldr r12, [r2], #4 ; r12: flimit
+ ldr r12, [r2] ; r12: flimit
ldr r2, c0x80808080
- ldr r7, [r3], #4 ; limit
+ ldr r7, [r3] ; limit
; load soure data to r7, r8, r9, r10
ldrh r3, [src, #-2]
@@ -212,16 +187,14 @@ pstep RN r1
uqsub8 r10, r5, r4 ; q0 - p0
orr r7, r7, r8 ; abs(p1 - q1)
orr r9, r9, r10 ; abs(p0 - q0)
- ldr lr, c0x7F7F7F7F ; 0111 1111 mask
- uqadd8 r9, r9, r9 ; abs(p0 - q0) * 2
- and r7, lr, r7, lsr #1 ; abs(p1 - q1) / 2
mov r8, #0
+ uqadd8 r9, r9, r9 ; abs(p0 - q0) * 2
+ uhadd8 r7, r7, r8 ; abs(p1 - q1) / 2
uqadd8 r7, r7, r9 ; abs(p0 - q0)*2 + abs(p1 - q1)/2
mvn r10, #0 ; r10 == -1
- uqsub8 r7, r7, r12 ; compare to flimit
- usub8 r7, r8, r7
- sel r7, r10, r8 ; filter mask: lr
+ usub8 r7, r12, r7 ; compare to flimit
+ sel lr, r10, r8 ; filter mask
cmp lr, #0
beq simple_vskip_filter ; skip filtering
@@ -232,35 +205,34 @@ pstep RN r1
eor r4, r4, r2 ; p0 offset to convert to a signed value
eor r5, r5, r2 ; q0 offset to convert to a signed value
- qsub8 r3, r3, r6 ; vp8_filter (r3) = vp8_signed_char_clamp(p1-q1)
- qsub8 r6, r5, r4 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( q0 - p0))
+ qsub8 r3, r3, r6 ; vp8_filter = p1 - q1
+ qsub8 r6, r5, r4 ; q0 - p0
- qadd8 r3, r3, r6
- ldr r8, c0x03030303 ; r8 = 3
+ qadd8 r3, r3, r6 ; vp8_filter += q0 - p0
+ ldr r9, c0x03030303 ; r9 = 3
- qadd8 r3, r3, r6
+ qadd8 r3, r3, r6 ; vp8_filter += q0 - p0
ldr r7, c0x04040404
- qadd8 r3, r3, r6
+ qadd8 r3, r3, r6 ; vp8_filter = p1-q1 + 3*(q0-p0))
+ ;STALL
and r3, r3, lr ; vp8_filter &= mask
- ;save bottom 3 bits so that we round one side +4 and the other +3
- qadd8 r8 , r3 , r8 ; Filter2 (r8) = vp8_signed_char_clamp(vp8_filter+3)
- qadd8 r3 , r3 , r7 ; Filter1 (r3) = vp8_signed_char_clamp(vp8_filter+4)
+ qadd8 r9 , r3 , r9 ; Filter2 = vp8_filter + 3
+ qadd8 r3 , r3 , r7 ; Filter1 = vp8_filter + 4
- mov r7, #0
- shadd8 r8 , r8 , r7 ; Filter2 >>= 3
- shadd8 r3 , r3 , r7 ; Filter1 >>= 3
- shadd8 r8 , r8 , r7
- shadd8 r3 , r3 , r7
- shadd8 r8 , r8 , r7 ; r8: filter2
- shadd8 r3 , r3 , r7 ; r7: filter1
+ shadd8 r9 , r9 , r8
+ shadd8 r3 , r3 , r8
+ shadd8 r9 , r9 , r8
+ shadd8 r3 , r3 , r8
+ shadd8 r9 , r9 , r8 ; Filter2 >>= 3
+ shadd8 r3 , r3 , r8 ; Filter1 >>= 3
;calculate output
sub src, src, pstep, lsl #2
- qadd8 r4, r4, r8 ; u = vp8_signed_char_clamp(p0 + Filter2)
- qsub8 r5, r5, r3 ; u = vp8_signed_char_clamp(q0 - Filter1)
+ qadd8 r4, r4, r9 ; u = p0 + Filter2
+ qsub8 r5, r5, r3 ; u = q0 - Filter1
eor r4, r4, r2 ; *op0 = u^0x80
eor r5, r5, r2 ; *oq0 = u^0x80
@@ -285,10 +257,6 @@ pstep RN r1
|simple_vskip_filter|
subs r11, r11, #1
- ;pld [src]
- ;pld [src, pstep]
- ;pld [src, pstep, lsl #1]
-
; load soure data to r7, r8, r9, r10
ldrneh r3, [src, #-2]
ldrneh r4, [src], pstep
@@ -308,14 +276,12 @@ pstep RN r1
bne simple_vnext8
- ldmia sp!, {r4 - r12, pc}
+ ldmia sp!, {r4 - r11, pc}
ENDP ; |vp8_loop_filter_simple_vertical_edge_armv6|
; Constant Pool
c0x80808080 DCD 0x80808080
c0x03030303 DCD 0x03030303
c0x04040404 DCD 0x04040404
-c0x01010101 DCD 0x01010101
-c0x7F7F7F7F DCD 0x7F7F7F7F
END
diff --git a/vp8/common/arm/armv6/sixtappredict8x4_v6.asm b/vp8/common/arm/armv6/sixtappredict8x4_v6.asm
index 551d863e9..8b9939484 100644
--- a/vp8/common/arm/armv6/sixtappredict8x4_v6.asm
+++ b/vp8/common/arm/armv6/sixtappredict8x4_v6.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
@@ -24,10 +25,10 @@
;and the result is stored in transpose.
|vp8_sixtap_predict8x4_armv6| PROC
stmdb sp!, {r4 - r11, lr}
- sub sp, sp, #184 ;reserve space on stack for temporary storage: 20x(8+1) +4
+ str r3, [sp, #-184]! ;reserve space on stack for temporary storage, store yoffset
cmp r2, #0 ;skip first_pass filter if xoffset=0
- str r3, [sp], #4 ;store yoffset
+ add lr, sp, #4 ;point to temporary buffer
beq skip_firstpass_filter
;first-pass filter
@@ -44,7 +45,6 @@
mov r2, #0x90000 ; height=9 is top part of counter
sub r1, r1, #8
- mov lr, #20
|first_pass_hloop_v6|
ldrb r6, [r0, #-5] ; load source data
@@ -82,10 +82,10 @@
tst r2, #0xff ; test loop counter
usat r11, #8, r11, asr #7
add r12, r12, #0x40
- strh r11, [sp], lr ; result is transposed and stored, which
+ strh r11, [lr], #20 ; result is transposed and stored, which
usat r12, #8, r12, asr #7
- strh r12, [sp], lr
+ strh r12, [lr], #20
movne r11, r6
movne r12, r7
@@ -106,8 +106,7 @@
subs r2, r2, #0x10000
- mov r6, #158
- sub sp, sp, r6
+ sub lr, lr, #158
add r0, r0, r1 ; move to next input line
@@ -115,10 +114,7 @@
;second pass filter
secondpass_filter
- mov r1, #18
- sub sp, sp, r1 ; 18+4
-
- ldr r3, [sp, #-4] ; load back yoffset
+ ldr r3, [sp], #4 ; load back yoffset
ldr r0, [sp, #216] ; load dst address from stack 180+36
ldr r1, [sp, #220] ; load dst stride from stack 180+40
@@ -191,30 +187,28 @@ skip_firstpass_filter
sub r0, r0, r1, lsl #1
sub r1, r1, #8
mov r2, #9
- mov r3, #20
skip_firstpass_hloop
ldrb r4, [r0], #1 ; load data
subs r2, r2, #1
ldrb r5, [r0], #1
- strh r4, [sp], r3 ; store it to immediate buffer
+ strh r4, [lr], #20 ; store it to immediate buffer
ldrb r6, [r0], #1 ; load data
- strh r5, [sp], r3
+ strh r5, [lr], #20
ldrb r7, [r0], #1
- strh r6, [sp], r3
+ strh r6, [lr], #20
ldrb r8, [r0], #1
- strh r7, [sp], r3
+ strh r7, [lr], #20
ldrb r9, [r0], #1
- strh r8, [sp], r3
+ strh r8, [lr], #20
ldrb r10, [r0], #1
- strh r9, [sp], r3
+ strh r9, [lr], #20
ldrb r11, [r0], #1
- strh r10, [sp], r3
+ strh r10, [lr], #20
add r0, r0, r1 ; move to next input line
- strh r11, [sp], r3
+ strh r11, [lr], #20
- mov r4, #158
- sub sp, sp, r4 ; move over to next column
+ sub lr, lr, #158 ; move over to next column
bne skip_firstpass_hloop
b secondpass_filter
diff --git a/vp8/common/arm/bilinearfilter_arm.c b/vp8/common/arm/bilinearfilter_arm.c
index bf972a3bc..65afb41a1 100644
--- a/vp8/common/arm/bilinearfilter_arm.c
+++ b/vp8/common/arm/bilinearfilter_arm.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -48,7 +49,7 @@ extern void vp8_filter_block2d_bil_second_pass_armv6
const short *vp8_filter
);
-/*
+#if 0
void vp8_filter_block2d_bil_first_pass_6
(
unsigned char *src_ptr,
@@ -65,14 +66,14 @@ void vp8_filter_block2d_bil_first_pass_6
{
for ( j=0; j<output_width; j++ )
{
- // Apply bilinear filter
+ /* Apply bilinear filter */
output_ptr[j] = ( ( (int)src_ptr[0] * vp8_filter[0]) +
((int)src_ptr[1] * vp8_filter[1]) +
(VP8_FILTER_WEIGHT/2) ) >> VP8_FILTER_SHIFT;
src_ptr++;
}
- // Next row...
+ /* Next row... */
src_ptr += src_pixels_per_line - output_width;
output_ptr += output_width;
}
@@ -95,7 +96,7 @@ void vp8_filter_block2d_bil_second_pass_6
{
for ( j=0; j<output_width; j++ )
{
- // Apply filter
+ /* Apply filter */
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
((int)src_ptr[output_width] * vp8_filter[1]) +
(VP8_FILTER_WEIGHT/2);
@@ -103,12 +104,12 @@ void vp8_filter_block2d_bil_second_pass_6
src_ptr++;
}
- // Next row...
- //src_ptr += src_pixels_per_line - output_width;
+ /* Next row... */
+ /*src_ptr += src_pixels_per_line - output_width;*/
output_ptr += output_pitch;
}
}
-*/
+#endif
void vp8_filter_block2d_bil_armv6
(
@@ -123,13 +124,13 @@ void vp8_filter_block2d_bil_armv6
)
{
- unsigned short FData[36*16]; // Temp data bufffer used in filtering
+ unsigned short FData[36*16]; /* Temp data bufffer used in filtering */
- // First filter 1-D horizontally...
- // pixel_step = 1;
+ /* First filter 1-D horizontally... */
+ /* pixel_step = 1; */
vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pixels_per_line, Height + 1, Width, HFilter);
- // then 1-D vertically...
+ /* then 1-D vertically... */
vp8_filter_block2d_bil_second_pass_armv6(FData, output_ptr, dst_pitch, Height, Width, VFilter);
}
diff --git a/vp8/common/arm/filter_arm.c b/vp8/common/arm/filter_arm.c
index 2a4640cae..b4f2fe6ca 100644
--- a/vp8/common/arm/filter_arm.c
+++ b/vp8/common/arm/filter_arm.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -19,13 +20,13 @@
DECLARE_ALIGNED(16, static const short, sub_pel_filters[8][6]) =
{
- { 0, 0, 128, 0, 0, 0 }, // note that 1/8 pel positions are just as per alpha -0.5 bicubic
+ { 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
{ 0, -6, 123, 12, -1, 0 },
- { 2, -11, 108, 36, -8, 1 }, // New 1/4 pel 6 tap filter
+ { 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */
{ 0, -9, 93, 50, -6, 0 },
- { 3, -16, 77, 77, -16, 3 }, // New 1/2 pel 6 tap filter
+ { 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */
{ 0, -6, 50, 93, -9, 0 },
- { 1, -8, 36, 108, -11, 2 }, // New 1/4 pel 6 tap filter
+ { 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
{ 0, -1, 12, 123, -6, 0 },
};
@@ -49,6 +50,15 @@ extern void vp8_filter_block2d_second_pass_armv6
const short *vp8_filter
);
+extern void vp8_filter4_block2d_second_pass_armv6
+(
+ short *src_ptr,
+ unsigned char *output_ptr,
+ unsigned int output_pitch,
+ unsigned int cnt,
+ const short *vp8_filter
+);
+
extern void vp8_filter_block2d_first_pass_only_armv6
(
unsigned char *src_ptr,
@@ -83,39 +93,43 @@ void vp8_sixtap_predict_armv6
{
const short *HFilter;
const short *VFilter;
- DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); // Temp data bufffer used in filtering
+ DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data bufffer used in filtering */
- HFilter = sub_pel_filters[xoffset]; // 6 tap
- VFilter = sub_pel_filters[yoffset]; // 6 tap
+ HFilter = sub_pel_filters[xoffset]; /* 6 tap */
+ VFilter = sub_pel_filters[yoffset]; /* 6 tap */
- // Vfilter is null. First pass only
+ /* Vfilter is null. First pass only */
if (xoffset && !yoffset)
{
- //vp8_filter_block2d_first_pass_armv6 ( src_ptr, FData+2, src_pixels_per_line, 4, 4, HFilter );
- //vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, VFilter );
+ /*vp8_filter_block2d_first_pass_armv6 ( src_ptr, FData+2, src_pixels_per_line, 4, 4, HFilter );
+ vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, VFilter );*/
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, HFilter);
}
- // Hfilter is null. Second pass only
+ /* Hfilter is null. Second pass only */
else if (!xoffset && yoffset)
{
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, VFilter);
}
else
{
- // Vfilter is a 4 tap filter
+ /* Vfilter is a 4 tap filter */
if (yoffset & 0x1)
+ {
vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 4, 7, HFilter);
- // Vfilter is 6 tap filter
+ vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
+ }
+ /* Vfilter is 6 tap filter */
else
+ {
vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 4, 9, HFilter);
-
- vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
+ vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
+ }
}
}
-/*
+#if 0
void vp8_sixtap_predict8x4_armv6
(
unsigned char *src_ptr,
@@ -128,33 +142,33 @@ void vp8_sixtap_predict8x4_armv6
{
const short *HFilter;
const short *VFilter;
- DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); // Temp data bufffer used in filtering
-
- HFilter = sub_pel_filters[xoffset]; // 6 tap
- VFilter = sub_pel_filters[yoffset]; // 6 tap
-
-
-// if (xoffset && !yoffset)
-// {
-// vp8_filter_block2d_first_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter );
-// }
- // Hfilter is null. Second pass only
-// else if (!xoffset && yoffset)
-// {
-// vp8_filter_block2d_second_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter );
-// }
-// else
-// {
-// if (yoffset & 0x1)
- // vp8_filter_block2d_first_pass_armv6 ( src_ptr-src_pixels_per_line, FData+1, src_pixels_per_line, 8, 7, HFilter );
- // else
+ DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */
+
+ HFilter = sub_pel_filters[xoffset]; /* 6 tap */
+ VFilter = sub_pel_filters[yoffset]; /* 6 tap */
+
+
+ /*if (xoffset && !yoffset)
+ {
+ vp8_filter_block2d_first_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter );
+ }*/
+ /* Hfilter is null. Second pass only */
+ /*else if (!xoffset && yoffset)
+ {
+ vp8_filter_block2d_second_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter );
+ }
+ else
+ {
+ if (yoffset & 0x1)
+ vp8_filter_block2d_first_pass_armv6 ( src_ptr-src_pixels_per_line, FData+1, src_pixels_per_line, 8, 7, HFilter );
+ else*/
vp8_filter_block2d_first_pass_armv6 ( src_ptr-(2*src_pixels_per_line), FData, src_pixels_per_line, 8, 9, HFilter );
vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, 8, VFilter );
-// }
+ /*}*/
}
-*/
+#endif
void vp8_sixtap_predict8x8_armv6
(
@@ -168,16 +182,16 @@ void vp8_sixtap_predict8x8_armv6
{
const short *HFilter;
const short *VFilter;
- DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); // Temp data bufffer used in filtering
+ DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */
- HFilter = sub_pel_filters[xoffset]; // 6 tap
- VFilter = sub_pel_filters[yoffset]; // 6 tap
+ HFilter = sub_pel_filters[xoffset]; /* 6 tap */
+ VFilter = sub_pel_filters[yoffset]; /* 6 tap */
if (xoffset && !yoffset)
{
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter);
}
- // Hfilter is null. Second pass only
+ /* Hfilter is null. Second pass only */
else if (!xoffset && yoffset)
{
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter);
@@ -185,11 +199,15 @@ void vp8_sixtap_predict8x8_armv6
else
{
if (yoffset & 0x1)
+ {
vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 8, 11, HFilter);
+ vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
+ }
else
+ {
vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 8, 13, HFilter);
-
- vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
+ vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
+ }
}
}
@@ -206,16 +224,16 @@ void vp8_sixtap_predict16x16_armv6
{
const short *HFilter;
const short *VFilter;
- DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); // Temp data bufffer used in filtering
+ DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); /* Temp data bufffer used in filtering */
- HFilter = sub_pel_filters[xoffset]; // 6 tap
- VFilter = sub_pel_filters[yoffset]; // 6 tap
+ HFilter = sub_pel_filters[xoffset]; /* 6 tap */
+ VFilter = sub_pel_filters[yoffset]; /* 6 tap */
if (xoffset && !yoffset)
{
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, HFilter);
}
- // Hfilter is null. Second pass only
+ /* Hfilter is null. Second pass only */
else if (!xoffset && yoffset)
{
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, VFilter);
@@ -223,11 +241,15 @@ void vp8_sixtap_predict16x16_armv6
else
{
if (yoffset & 0x1)
+ {
vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 16, 19, HFilter);
+ vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
+ }
else
+ {
vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 16, 21, HFilter);
-
- vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
+ vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
+ }
}
}
diff --git a/vp8/common/arm/idct_arm.h b/vp8/common/arm/idct_arm.h
index f9ed21e0d..8b8d17917 100644
--- a/vp8/common/arm/idct_arm.h
+++ b/vp8/common/arm/idct_arm.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -14,41 +15,44 @@
#if HAVE_ARMV6
extern prototype_idct(vp8_short_idct4x4llm_1_v6);
extern prototype_idct(vp8_short_idct4x4llm_v6_dual);
-extern prototype_idct_scalar(vp8_dc_only_idct_armv6);
-extern prototype_second_order(vp8_short_inv_walsh4x4_1_armv6);
-extern prototype_second_order(vp8_short_inv_walsh4x4_armv6);
+extern prototype_idct_scalar_add(vp8_dc_only_idct_add_v6);
+extern prototype_second_order(vp8_short_inv_walsh4x4_1_v6);
+extern prototype_second_order(vp8_short_inv_walsh4x4_v6);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_idct_idct1
#define vp8_idct_idct1 vp8_short_idct4x4llm_1_v6
#undef vp8_idct_idct16
#define vp8_idct_idct16 vp8_short_idct4x4llm_v6_dual
-#undef vp8_idct_idct1_scalar
-#define vp8_idct_idct1_scalar vp8_dc_only_idct_armv6
+#undef vp8_idct_idct1_scalar_add
+#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_v6
#undef vp8_idct_iwalsh1
-#define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_armv6
+#define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_v6
#undef vp8_idct_iwalsh16
-#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_armv6
+#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_v6
+#endif
#endif
#if HAVE_ARMV7
extern prototype_idct(vp8_short_idct4x4llm_1_neon);
extern prototype_idct(vp8_short_idct4x4llm_neon);
-extern prototype_idct_scalar(vp8_dc_only_idct_neon);
+extern prototype_idct_scalar_add(vp8_dc_only_idct_add_neon);
extern prototype_second_order(vp8_short_inv_walsh4x4_1_neon);
extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_idct_idct1
#define vp8_idct_idct1 vp8_short_idct4x4llm_1_neon
#undef vp8_idct_idct16
#define vp8_idct_idct16 vp8_short_idct4x4llm_neon
-#undef vp8_idct_idct1_scalar
-#define vp8_idct_idct1_scalar vp8_dc_only_idct_neon
+#undef vp8_idct_idct1_scalar_add
+#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_neon
#undef vp8_idct_iwalsh1
#define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_neon
@@ -56,5 +60,6 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
#undef vp8_idct_iwalsh16
#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_neon
#endif
+#endif
#endif
diff --git a/vp8/common/arm/loopfilter_arm.c b/vp8/common/arm/loopfilter_arm.c
index fa7c62617..a81c50588 100644
--- a/vp8/common/arm/loopfilter_arm.c
+++ b/vp8/common/arm/loopfilter_arm.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -13,16 +14,6 @@
#include "loopfilter.h"
#include "onyxc_int.h"
-typedef void loop_filter_uvfunction
-(
- unsigned char *u, // source pointer
- int p, // pitch
- const signed char *flimit,
- const signed char *limit,
- const signed char *thresh,
- unsigned char *v
-);
-
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6);
extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6);
@@ -44,8 +35,8 @@ extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_neon;
#if HAVE_ARMV6
-//ARMV6 loopfilter functions
-// Horizontal MB filtering
+/*ARMV6 loopfilter functions*/
+/* Horizontal MB filtering */
void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
@@ -69,7 +60,7 @@ void vp8_loop_filter_mbhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsi
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
}
-// Vertical MB Filtering
+/* Vertical MB Filtering */
void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
@@ -93,7 +84,7 @@ void vp8_loop_filter_mbvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsi
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
}
-// Horizontal B Filtering
+/* Horizontal B Filtering */
void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
@@ -121,7 +112,7 @@ void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
}
-// Vertical B Filtering
+/* Vertical B Filtering */
void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
@@ -151,8 +142,8 @@ void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig
#endif
#if HAVE_ARMV7
-// NEON loopfilter functions
-// Horizontal MB filtering
+/* NEON loopfilter functions */
+/* Horizontal MB filtering */
void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
@@ -173,7 +164,7 @@ void vp8_loop_filter_mbhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsig
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
}
-// Vertical MB Filtering
+/* Vertical MB Filtering */
void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
@@ -194,7 +185,7 @@ void vp8_loop_filter_mbvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsig
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
}
-// Horizontal B Filtering
+/* Horizontal B Filtering */
void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
@@ -219,7 +210,7 @@ void vp8_loop_filter_bhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsign
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
}
-// Vertical B Filtering
+/* Vertical B Filtering */
void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
diff --git a/vp8/common/arm/loopfilter_arm.h b/vp8/common/arm/loopfilter_arm.h
index 4bb49456d..cd62207d7 100644
--- a/vp8/common/arm/loopfilter_arm.h
+++ b/vp8/common/arm/loopfilter_arm.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -21,6 +22,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bvs_armv6);
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_armv6);
extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_lf_normal_mb_v
#define vp8_lf_normal_mb_v vp8_loop_filter_mbv_armv6
@@ -45,6 +47,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
#undef vp8_lf_simple_b_h
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_armv6
#endif
+#endif
#if HAVE_ARMV7
extern prototype_loopfilter_block(vp8_loop_filter_mbv_neon);
@@ -56,6 +59,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bvs_neon);
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_neon);
extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_lf_normal_mb_v
#define vp8_lf_normal_mb_v vp8_loop_filter_mbv_neon
@@ -80,5 +84,6 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
#undef vp8_lf_simple_b_h
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_neon
#endif
+#endif
#endif
diff --git a/vp8/common/arm/neon/bilinearpredict16x16_neon.asm b/vp8/common/arm/neon/bilinearpredict16x16_neon.asm
index a2fea2bd6..bb72bad1f 100644
--- a/vp8/common/arm/neon/bilinearpredict16x16_neon.asm
+++ b/vp8/common/arm/neon/bilinearpredict16x16_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/neon/bilinearpredict4x4_neon.asm b/vp8/common/arm/neon/bilinearpredict4x4_neon.asm
index 74d2db5dc..6d4820b7e 100644
--- a/vp8/common/arm/neon/bilinearpredict4x4_neon.asm
+++ b/vp8/common/arm/neon/bilinearpredict4x4_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/neon/bilinearpredict8x4_neon.asm b/vp8/common/arm/neon/bilinearpredict8x4_neon.asm
index 46ebb0e0b..b9f3ce034 100644
--- a/vp8/common/arm/neon/bilinearpredict8x4_neon.asm
+++ b/vp8/common/arm/neon/bilinearpredict8x4_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/neon/bilinearpredict8x8_neon.asm b/vp8/common/arm/neon/bilinearpredict8x8_neon.asm
index 80728d4f8..f7a7d1496 100644
--- a/vp8/common/arm/neon/bilinearpredict8x8_neon.asm
+++ b/vp8/common/arm/neon/bilinearpredict8x8_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm b/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
index f42ac63c9..e3ea91fe6 100644
--- a/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
+++ b/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/neon/copymem16x16_neon.asm b/vp8/common/arm/neon/copymem16x16_neon.asm
index 89d5e1018..bda4b9654 100644
--- a/vp8/common/arm/neon/copymem16x16_neon.asm
+++ b/vp8/common/arm/neon/copymem16x16_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/neon/copymem8x4_neon.asm b/vp8/common/arm/neon/copymem8x4_neon.asm
index 302f734ff..35c0f6708 100644
--- a/vp8/common/arm/neon/copymem8x4_neon.asm
+++ b/vp8/common/arm/neon/copymem8x4_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/neon/copymem8x8_neon.asm b/vp8/common/arm/neon/copymem8x8_neon.asm
index 50d39ef66..1f5b9411b 100644
--- a/vp8/common/arm/neon/copymem8x8_neon.asm
+++ b/vp8/common/arm/neon/copymem8x8_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/neon/dc_only_idct_add_neon.asm b/vp8/common/arm/neon/dc_only_idct_add_neon.asm
new file mode 100644
index 000000000..49ba05fb0
--- /dev/null
+++ b/vp8/common/arm/neon/dc_only_idct_add_neon.asm
@@ -0,0 +1,49 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_dc_only_idct_add_neon|
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+;void vp8_dc_only_idct_add_neon(short input_dc, unsigned char *pred_ptr,
+; unsigned char *dst_ptr, int pitch, int stride)
+; r0 input_dc
+; r1 pred_ptr
+; r2 dst_ptr
+; r3 pitch
+; sp stride
+|vp8_dc_only_idct_add_neon| PROC
+ add r0, r0, #4
+ asr r0, r0, #3
+ ldr r12, [sp]
+ vdup.16 q0, r0
+
+ vld1.32 {d2[0]}, [r1], r3
+ vld1.32 {d2[1]}, [r1], r3
+ vld1.32 {d4[0]}, [r1], r3
+ vld1.32 {d4[1]}, [r1]
+
+ vaddw.u8 q1, q0, d2
+ vaddw.u8 q2, q0, d4
+
+ vqmovun.s16 d2, q1
+ vqmovun.s16 d4, q2
+
+ vst1.32 {d2[0]}, [r2], r12
+ vst1.32 {d2[1]}, [r2], r12
+ vst1.32 {d4[0]}, [r2], r12
+ vst1.32 {d4[1]}, [r2]
+
+ bx lr
+
+ ENDP
+ END
diff --git a/vp8/common/arm/neon/iwalsh_neon.asm b/vp8/common/arm/neon/iwalsh_neon.asm
index 4fc744c96..663bf390e 100644
--- a/vp8/common/arm/neon/iwalsh_neon.asm
+++ b/vp8/common/arm/neon/iwalsh_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_short_inv_walsh4x4_neon|
EXPORT |vp8_short_inv_walsh4x4_1_neon|
diff --git a/vp8/common/arm/neon/loopfilter_neon.asm b/vp8/common/arm/neon/loopfilter_neon.asm
new file mode 100644
index 000000000..bf0c35721
--- /dev/null
+++ b/vp8/common/arm/neon/loopfilter_neon.asm
@@ -0,0 +1,409 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_loop_filter_horizontal_edge_y_neon|
+ EXPORT |vp8_loop_filter_horizontal_edge_uv_neon|
+ EXPORT |vp8_loop_filter_vertical_edge_y_neon|
+ EXPORT |vp8_loop_filter_vertical_edge_uv_neon|
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; flimit, limit, and thresh should be positive numbers.
+; All 16 elements in these variables are equal.
+
+; void vp8_loop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
+; const signed char *flimit,
+; const signed char *limit,
+; const signed char *thresh,
+; int count)
+; r0 unsigned char *src
+; r1 int pitch
+; r2 const signed char *flimit
+; r3 const signed char *limit
+; sp const signed char *thresh,
+; sp+4 int count (unused)
+|vp8_loop_filter_horizontal_edge_y_neon| PROC
+ stmdb sp!, {lr}
+ vld1.s8 {d0[], d1[]}, [r2] ; flimit
+ vld1.s8 {d2[], d3[]}, [r3] ; limit
+ sub r2, r0, r1, lsl #2 ; move src pointer down by 4 lines
+ ldr r12, [sp, #4] ; load thresh pointer
+
+ vld1.u8 {q3}, [r2], r1 ; p3
+ vld1.u8 {q4}, [r2], r1 ; p2
+ vld1.u8 {q5}, [r2], r1 ; p1
+ vld1.u8 {q6}, [r2], r1 ; p0
+ vld1.u8 {q7}, [r2], r1 ; q0
+ vld1.u8 {q8}, [r2], r1 ; q1
+ vld1.u8 {q9}, [r2], r1 ; q2
+ vld1.u8 {q10}, [r2] ; q3
+ vld1.s8 {d4[], d5[]}, [r12] ; thresh
+ sub r0, r0, r1, lsl #1
+
+ bl vp8_loop_filter_neon
+
+ vst1.u8 {q5}, [r0], r1 ; store op1
+ vst1.u8 {q6}, [r0], r1 ; store op0
+ vst1.u8 {q7}, [r0], r1 ; store oq0
+ vst1.u8 {q8}, [r0], r1 ; store oq1
+
+ ldmia sp!, {pc}
+ ENDP ; |vp8_loop_filter_horizontal_edge_y_neon|
+
+; void vp8_loop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch
+; const signed char *flimit,
+; const signed char *limit,
+; const signed char *thresh,
+; unsigned char *v)
+; r0 unsigned char *u,
+; r1 int pitch,
+; r2 const signed char *flimit,
+; r3 const signed char *limit,
+; sp const signed char *thresh,
+; sp+4 unsigned char *v
+|vp8_loop_filter_horizontal_edge_uv_neon| PROC
+ stmdb sp!, {lr}
+ vld1.s8 {d0[], d1[]}, [r2] ; flimit
+ vld1.s8 {d2[], d3[]}, [r3] ; limit
+ ldr r2, [sp, #8] ; load v ptr
+
+ sub r3, r0, r1, lsl #2 ; move u pointer down by 4 lines
+ vld1.u8 {d6}, [r3], r1 ; p3
+ vld1.u8 {d8}, [r3], r1 ; p2
+ vld1.u8 {d10}, [r3], r1 ; p1
+ vld1.u8 {d12}, [r3], r1 ; p0
+ vld1.u8 {d14}, [r3], r1 ; q0
+ vld1.u8 {d16}, [r3], r1 ; q1
+ vld1.u8 {d18}, [r3], r1 ; q2
+ vld1.u8 {d20}, [r3] ; q3
+
+ ldr r3, [sp, #4] ; load thresh pointer
+
+ sub r12, r2, r1, lsl #2 ; move v pointer down by 4 lines
+ vld1.u8 {d7}, [r12], r1 ; p3
+ vld1.u8 {d9}, [r12], r1 ; p2
+ vld1.u8 {d11}, [r12], r1 ; p1
+ vld1.u8 {d13}, [r12], r1 ; p0
+ vld1.u8 {d15}, [r12], r1 ; q0
+ vld1.u8 {d17}, [r12], r1 ; q1
+ vld1.u8 {d19}, [r12], r1 ; q2
+ vld1.u8 {d21}, [r12] ; q3
+
+ vld1.s8 {d4[], d5[]}, [r3] ; thresh
+
+ bl vp8_loop_filter_neon
+
+ sub r0, r0, r1, lsl #1
+ sub r2, r2, r1, lsl #1
+
+ vst1.u8 {d10}, [r0], r1 ; store u op1
+ vst1.u8 {d11}, [r2], r1 ; store v op1
+ vst1.u8 {d12}, [r0], r1 ; store u op0
+ vst1.u8 {d13}, [r2], r1 ; store v op0
+ vst1.u8 {d14}, [r0], r1 ; store u oq0
+ vst1.u8 {d15}, [r2], r1 ; store v oq0
+ vst1.u8 {d16}, [r0] ; store u oq1
+ vst1.u8 {d17}, [r2] ; store v oq1
+
+ ldmia sp!, {pc}
+ ENDP ; |vp8_loop_filter_horizontal_edge_uv_neon|
+
+; void vp8_loop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
+; const signed char *flimit,
+; const signed char *limit,
+; const signed char *thresh,
+; int count)
+; r0 unsigned char *src,
+; r1 int pitch,
+; r2 const signed char *flimit,
+; r3 const signed char *limit,
+; sp const signed char *thresh,
+; sp+4 int count (unused)
+|vp8_loop_filter_vertical_edge_y_neon| PROC
+ stmdb sp!, {lr}
+ vld1.s8 {d0[], d1[]}, [r2] ; flimit
+ vld1.s8 {d2[], d3[]}, [r3] ; limit
+ sub r2, r0, #4 ; src ptr down by 4 columns
+ sub r0, r0, #2 ; dst ptr
+ ldr r12, [sp, #4] ; load thresh pointer
+
+ vld1.u8 {d6}, [r2], r1 ; load first 8-line src data
+ vld1.u8 {d8}, [r2], r1
+ vld1.u8 {d10}, [r2], r1
+ vld1.u8 {d12}, [r2], r1
+ vld1.u8 {d14}, [r2], r1
+ vld1.u8 {d16}, [r2], r1
+ vld1.u8 {d18}, [r2], r1
+ vld1.u8 {d20}, [r2], r1
+
+ vld1.s8 {d4[], d5[]}, [r12] ; thresh
+
+ vld1.u8 {d7}, [r2], r1 ; load second 8-line src data
+ vld1.u8 {d9}, [r2], r1
+ vld1.u8 {d11}, [r2], r1
+ vld1.u8 {d13}, [r2], r1
+ vld1.u8 {d15}, [r2], r1
+ vld1.u8 {d17}, [r2], r1
+ vld1.u8 {d19}, [r2], r1
+ vld1.u8 {d21}, [r2]
+
+ ;transpose to 8x16 matrix
+ vtrn.32 q3, q7
+ vtrn.32 q4, q8
+ vtrn.32 q5, q9
+ vtrn.32 q6, q10
+
+ vtrn.16 q3, q5
+ vtrn.16 q4, q6
+ vtrn.16 q7, q9
+ vtrn.16 q8, q10
+
+ vtrn.8 q3, q4
+ vtrn.8 q5, q6
+ vtrn.8 q7, q8
+ vtrn.8 q9, q10
+
+ bl vp8_loop_filter_neon
+
+ vswp d12, d11
+ vswp d16, d13
+ vswp d14, d12
+ vswp d16, d15
+
+ ;store op1, op0, oq0, oq1
+ vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
+ vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
+ vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
+ vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
+ vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
+ vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
+ vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
+ vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
+ vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r0], r1
+ vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r0], r1
+ vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r0], r1
+ vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r0], r1
+ vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r0], r1
+ vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r0], r1
+ vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r0], r1
+ vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r0]
+
+ ldmia sp!, {pc}
+ ENDP ; |vp8_loop_filter_vertical_edge_y_neon|
+
+; void vp8_loop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch
+; const signed char *flimit,
+; const signed char *limit,
+; const signed char *thresh,
+; unsigned char *v)
+; r0 unsigned char *u,
+; r1 int pitch,
+; r2 const signed char *flimit,
+; r3 const signed char *limit,
+; sp const signed char *thresh,
+; sp+4 unsigned char *v
+|vp8_loop_filter_vertical_edge_uv_neon| PROC
+ stmdb sp!, {lr}
+ sub r12, r0, #4 ; move u pointer down by 4 columns
+ vld1.s8 {d0[], d1[]}, [r2] ; flimit
+ vld1.s8 {d2[], d3[]}, [r3] ; limit
+
+ ldr r2, [sp, #8] ; load v ptr
+
+ vld1.u8 {d6}, [r12], r1 ;load u data
+ vld1.u8 {d8}, [r12], r1
+ vld1.u8 {d10}, [r12], r1
+ vld1.u8 {d12}, [r12], r1
+ vld1.u8 {d14}, [r12], r1
+ vld1.u8 {d16}, [r12], r1
+ vld1.u8 {d18}, [r12], r1
+ vld1.u8 {d20}, [r12]
+
+ sub r3, r2, #4 ; move v pointer down by 4 columns
+ vld1.u8 {d7}, [r3], r1 ;load v data
+ vld1.u8 {d9}, [r3], r1
+ vld1.u8 {d11}, [r3], r1
+ vld1.u8 {d13}, [r3], r1
+ vld1.u8 {d15}, [r3], r1
+ vld1.u8 {d17}, [r3], r1
+ vld1.u8 {d19}, [r3], r1
+ vld1.u8 {d21}, [r3]
+
+ ldr r12, [sp, #4] ; load thresh pointer
+
+ ;transpose to 8x16 matrix
+ vtrn.32 q3, q7
+ vtrn.32 q4, q8
+ vtrn.32 q5, q9
+ vtrn.32 q6, q10
+
+ vtrn.16 q3, q5
+ vtrn.16 q4, q6
+ vtrn.16 q7, q9
+ vtrn.16 q8, q10
+
+ vtrn.8 q3, q4
+ vtrn.8 q5, q6
+ vtrn.8 q7, q8
+ vtrn.8 q9, q10
+
+ vld1.s8 {d4[], d5[]}, [r12] ; thresh
+
+ bl vp8_loop_filter_neon
+
+ sub r0, r0, #2
+ sub r2, r2, #2
+
+ vswp d12, d11
+ vswp d16, d13
+ vswp d14, d12
+ vswp d16, d15
+
+ ;store op1, op0, oq0, oq1
+ vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
+ vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r2], r1
+ vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
+ vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r2], r1
+ vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
+ vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r2], r1
+ vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
+ vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r2], r1
+ vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
+ vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r2], r1
+ vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
+ vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r2], r1
+ vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
+ vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r2], r1
+ vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0]
+ vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r2]
+
+ ldmia sp!, {pc}
+ ENDP ; |vp8_loop_filter_vertical_edge_uv_neon|
+
+; void vp8_loop_filter_neon();
+; This is a helper function for the loopfilters. The invidual functions do the
+; necessary load, transpose (if necessary) and store.
+
+; r0-r3 PRESERVE
+; q0 flimit
+; q1 limit
+; q2 thresh
+; q3 p3
+; q4 p2
+; q5 p1
+; q6 p0
+; q7 q0
+; q8 q1
+; q9 q2
+; q10 q3
+|vp8_loop_filter_neon| PROC
+ ldr r12, _lf_coeff_
+
+ ; vp8_filter_mask
+ vabd.u8 q11, q3, q4 ; abs(p3 - p2)
+ vabd.u8 q12, q4, q5 ; abs(p2 - p1)
+ vabd.u8 q13, q5, q6 ; abs(p1 - p0)
+ vabd.u8 q14, q8, q7 ; abs(q1 - q0)
+ vabd.u8 q3, q9, q8 ; abs(q2 - q1)
+ vabd.u8 q4, q10, q9 ; abs(q3 - q2)
+ vabd.u8 q9, q6, q7 ; abs(p0 - q0)
+
+ vmax.u8 q11, q11, q12
+ vmax.u8 q12, q13, q14
+ vmax.u8 q3, q3, q4
+ vmax.u8 q15, q11, q12
+
+ ; vp8_hevmask
+ vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1
+ vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1
+ vmax.u8 q15, q15, q3
+
+ vadd.u8 q0, q0, q0 ; flimit * 2
+ vadd.u8 q0, q0, q1 ; flimit * 2 + limit
+ vcge.u8 q15, q1, q15
+
+ vabd.u8 q2, q5, q8 ; a = abs(p1 - q1)
+ vqadd.u8 q9, q9, q9 ; b = abs(p0 - q0) * 2
+ vshr.u8 q2, q2, #1 ; a = a / 2
+ vqadd.u8 q9, q9, q2 ; a = b + a
+ vcge.u8 q9, q0, q9 ; (a > flimit * 2 + limit) * -1
+
+ vld1.u8 {q0}, [r12]!
+
+ ; vp8_filter() function
+ ; convert to signed
+ veor q7, q7, q0 ; qs0
+ veor q6, q6, q0 ; ps0
+ veor q5, q5, q0 ; ps1
+ veor q8, q8, q0 ; qs1
+
+ vld1.u8 {q10}, [r12]!
+
+ vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
+ vsubl.s8 q11, d15, d13
+
+ vmovl.u8 q4, d20
+
+ vqsub.s8 q1, q5, q8 ; vp8_filter = clamp(ps1-qs1)
+ vorr q14, q13, q14 ; vp8_hevmask
+
+ vmul.i16 q2, q2, q4 ; 3 * ( qs0 - ps0)
+ vmul.i16 q11, q11, q4
+
+ vand q1, q1, q14 ; vp8_filter &= hev
+ vand q15, q15, q9 ; vp8_filter_mask
+
+ vaddw.s8 q2, q2, d2
+ vaddw.s8 q11, q11, d3
+
+ vld1.u8 {q9}, [r12]!
+
+ ; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
+ vqmovn.s16 d2, q2
+ vqmovn.s16 d3, q11
+ vand q1, q1, q15 ; vp8_filter &= mask
+
+ vqadd.s8 q2, q1, q10 ; Filter2 = clamp(vp8_filter+3)
+ vqadd.s8 q1, q1, q9 ; Filter1 = clamp(vp8_filter+4)
+ vshr.s8 q2, q2, #3 ; Filter2 >>= 3
+ vshr.s8 q1, q1, #3 ; Filter1 >>= 3
+
+ vqadd.s8 q11, q6, q2 ; u = clamp(ps0 + Filter2)
+ vqsub.s8 q10, q7, q1 ; u = clamp(qs0 - Filter1)
+
+ ; outer tap adjustments: ++vp8_filter >> 1
+ vrshr.s8 q1, q1, #1
+ vbic q1, q1, q14 ; vp8_filter &= ~hev
+
+ vqadd.s8 q13, q5, q1 ; u = clamp(ps1 + vp8_filter)
+ vqsub.s8 q12, q8, q1 ; u = clamp(qs1 - vp8_filter)
+
+ veor q5, q13, q0 ; *op1 = u^0x80
+ veor q6, q11, q0 ; *op0 = u^0x80
+ veor q7, q10, q0 ; *oq0 = u^0x80
+ veor q8, q12, q0 ; *oq1 = u^0x80
+
+ bx lr
+ ENDP ; |vp8_loop_filter_horizontal_edge_y_neon|
+
+ AREA loopfilter_dat, DATA, READONLY
+_lf_coeff_
+ DCD lf_coeff
+lf_coeff
+ DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
+ DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
+ DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
+ DCD 0x01010101, 0x01010101, 0x01010101, 0x01010101
+
+ END
diff --git a/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm b/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
deleted file mode 100644
index e3e8e8a72..000000000
--- a/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
+++ /dev/null
@@ -1,205 +0,0 @@
-;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |vp8_loop_filter_horizontal_edge_uv_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0 unsigned char *u,
-; r1 int p, //pitch
-; r2 const signed char *flimit,
-; r3 const signed char *limit,
-; stack(r4) const signed char *thresh,
-; stack(r5) unsigned char *v
-
-|vp8_loop_filter_horizontal_edge_uv_neon| PROC
- sub r0, r0, r1, lsl #2 ; move u pointer down by 4 lines
- vld1.s8 {d0[], d1[]}, [r2] ; flimit
-
- ldr r2, [sp, #4] ; load v ptr
- ldr r12, [sp, #0] ; load thresh pointer
-
- sub r2, r2, r1, lsl #2 ; move v pointer down by 4 lines
-
- vld1.u8 {d6}, [r0], r1 ; p3
- vld1.u8 {d7}, [r2], r1 ; p3
- vld1.u8 {d8}, [r0], r1 ; p2
- vld1.u8 {d9}, [r2], r1 ; p2
- vld1.u8 {d10}, [r0], r1 ; p1
- vld1.u8 {d11}, [r2], r1 ; p1
- vld1.u8 {d12}, [r0], r1 ; p0
- vld1.u8 {d13}, [r2], r1 ; p0
- vld1.u8 {d14}, [r0], r1 ; q0
- vld1.u8 {d15}, [r2], r1 ; q0
- vld1.u8 {d16}, [r0], r1 ; q1
- vld1.u8 {d17}, [r2], r1 ; q1
- vld1.u8 {d18}, [r0], r1 ; q2
- vld1.u8 {d19}, [r2], r1 ; q2
- vld1.u8 {d20}, [r0], r1 ; q3
- vld1.u8 {d21}, [r2], r1 ; q3
-
- vld1.s8 {d2[], d3[]}, [r3] ; limit
- vld1.s8 {d4[], d5[]}, [r12] ; thresh
-
- ldr r12, _lfhuv_coeff_
- ;vp8_filter_mask() function
- ;vp8_hevmask() function
- vabd.u8 q11, q3, q4 ; abs(p3 - p2)
- vabd.u8 q12, q4, q5 ; abs(p2 - p1)
- vabd.u8 q13, q5, q6 ; abs(p1 - p0)
- vabd.u8 q14, q8, q7 ; abs(q1 - q0)
- vabd.u8 q3, q9, q8 ; abs(q2 - q1)
- vabd.u8 q4, q10, q9 ; abs(q3 - q2)
- vabd.u8 q9, q6, q7 ; abs(p0 - q0)
-
- vcge.u8 q15, q1, q11 ; (abs(p3 - p2) > limit)*-1
- vcge.u8 q12, q1, q12 ; (abs(p2 - p1) > limit)*-1
- vcge.u8 q10, q1, q13 ; (abs(p1 - p0) > limit)*-1
- vcge.u8 q11, q1, q14 ; (abs(q1 - q0) > limit)*-1
-
- vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1
- vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1
-
- vcge.u8 q3, q1, q3 ; (abs(q2 - q1) > limit)*-1
- vcge.u8 q4, q1, q4 ; (abs(q3 - q2) > limit)*-1
- vadd.u8 q0, q0, q0 ; flimit * 2
- vadd.u8 q0, q0, q1 ; flimit * 2 + limit
-
- vand q15, q15, q12
- vand q10, q10, q11
- vand q3, q3, q4
-
- vabd.u8 q2, q5, q8 ; abs(p1 - q1)
- vqadd.u8 q9, q9, q9 ; abs(p0 - q0) * 2
- vshr.u8 q2, q2, #1 ; abs(p1 - q1) / 2
- vqadd.u8 q9, q9, q2 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
- vcge.u8 q9, q0, q9 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
-
- vld1.u8 {q0}, [r12]!
-
- vand q15, q15, q10
-
- ;vp8_filter() function
- veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value
- veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value
- veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value
- veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value
-;;;;;;;;;;;;;;
- vld1.u8 {q10}, [r12]!
-
- ;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0)
- vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
- vsubl.s8 q11, d15, d13
-
- vand q3, q3, q9
- vmovl.u8 q4, d20
-
- vqsub.s8 q1, q5, q8 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
- vorr q14, q13, q14 ; q14: vp8_hevmask
-
- ;vmul.i8 q2, q2, q10 ; 3 * ( qs0 - ps0)
- vmul.i16 q2, q2, q4 ; 3 * ( qs0 - ps0)
- vmul.i16 q11, q11, q4
-
- vand q1, q1, q14 ; vp8_filter &= hev
- vand q15, q15, q3 ; q15: vp8_filter_mask
- ;;
- ;vld1.u8 {q4}, [r12]! ;no need 7 any more
-
- ;vqadd.s8 q1, q1, q2
- vaddw.s8 q2, q2, d2
- vaddw.s8 q11, q11, d3
-
- vld1.u8 {q9}, [r12]!
- ;
- vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
- vqmovn.s16 d3, q11
- ;;
-
- vand q1, q1, q15 ; vp8_filter &= mask
- ;;
-;;;;;;;;;;;;
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7
-; vand q2, q1, q4 ; s = vp8_filter & 7
-; vqadd.s8 q1, q1, q9 ; vp8_filter = vp8_signed_char_clamp(vp8_filter+4)
- ;;;;
-; vshr.s8 q1, q1, #3 ; vp8_filter >>= 3
-; vceq.i8 q2, q2, q9 ; s = (s==4)*-1
- ;;
-; ;calculate output
-; vqsub.s8 q10, q7, q1 ; u = vp8_signed_char_clamp(qs0 - vp8_filter)
-; vqadd.s8 q11, q2, q1 ; u = vp8_signed_char_clamp(s + vp8_filter)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; q10=3
- vqadd.s8 q2, q1, q10 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
- vqadd.s8 q1, q1, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
- vshr.s8 q2, q2, #3 ; Filter2 >>= 3
- vshr.s8 q1, q1, #3 ; Filter1 >>= 3
-
- ;calculate output
- vqadd.s8 q11, q6, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2)
- vqsub.s8 q10, q7, q1 ; u = vp8_signed_char_clamp(qs0 - Filter1)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- vrshr.s8 q1, q1, #1 ;round/shift: vp8_filter += 1; vp8_filter >>= 1
-
- sub r0, r0, r1, lsl #2
- sub r0, r0, r1, lsl #1
- ;
-
- vbic q1, q1, q14 ; vp8_filter &= ~hev
-
- sub r2, r2, r1, lsl #2
- sub r2, r2, r1, lsl #1
- ;;
-
- vqadd.s8 q13, q5, q1 ; u = vp8_signed_char_clamp(ps1 + vp8_filter)
- ;vqadd.s8 q11, q6, q11 ; u = vp8_signed_char_clamp(ps0 + u)
- vqsub.s8 q12, q8, q1 ; u = vp8_signed_char_clamp(qs1 - vp8_filter)
- ;
-
- veor q5, q13, q0 ; *op1 = u^0x80
- veor q6, q11, q0 ; *op0 = u^0x80
- veor q7, q10, q0 ; *oq0 = u^0x80
- veor q8, q12, q0 ; *oq1 = u^0x80
- ;
-
- vst1.u8 {d10}, [r0], r1 ; store u op1
- vst1.u8 {d11}, [r2], r1 ; store v op1
- vst1.u8 {d12}, [r0], r1 ; store u op0
- vst1.u8 {d13}, [r2], r1 ; store v op0
- vst1.u8 {d14}, [r0], r1 ; store u oq0
- vst1.u8 {d15}, [r2], r1 ; store v oq0
- vst1.u8 {d16}, [r0], r1 ; store u oq1
- vst1.u8 {d17}, [r2], r1 ; store v oq1
-
- bx lr
- ENDP ; |vp8_loop_filter_horizontal_edge_uv_neon|
-
-;-----------------
- AREA hloopfilteruv_dat, DATA, READWRITE ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_lfhuv_coeff_
- DCD lfhuv_coeff
-lfhuv_coeff
- DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
- DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
- DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
- DCD 0x01010101, 0x01010101, 0x01010101, 0x01010101
-
- END
diff --git a/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm b/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
deleted file mode 100644
index f11055d42..000000000
--- a/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
+++ /dev/null
@@ -1,188 +0,0 @@
-;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |vp8_loop_filter_horizontal_edge_y_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0 unsigned char *s,
-; r1 int p, //pitch
-; r2 const signed char *flimit,
-; r3 const signed char *limit,
-; stack(r4) const signed char *thresh,
-; //stack(r5) int count --unused
-
-|vp8_loop_filter_horizontal_edge_y_neon| PROC
- sub r0, r0, r1, lsl #2 ; move src pointer down by 4 lines
- ldr r12, [sp, #0] ; load thresh pointer
-
- vld1.u8 {q3}, [r0], r1 ; p3
- vld1.s8 {d0[], d1[]}, [r2] ; flimit
- vld1.u8 {q4}, [r0], r1 ; p2
- vld1.s8 {d2[], d3[]}, [r3] ; limit
- vld1.u8 {q5}, [r0], r1 ; p1
- vld1.s8 {d4[], d5[]}, [r12] ; thresh
- vld1.u8 {q6}, [r0], r1 ; p0
- ldr r12, _lfhy_coeff_
- vld1.u8 {q7}, [r0], r1 ; q0
-
- ;vp8_filter_mask() function
- ;vp8_hevmask() function
- vabd.u8 q11, q3, q4 ; abs(p3 - p2)
- vld1.u8 {q8}, [r0], r1 ; q1
- vabd.u8 q12, q4, q5 ; abs(p2 - p1)
- vld1.u8 {q9}, [r0], r1 ; q2
- vabd.u8 q13, q5, q6 ; abs(p1 - p0)
- vld1.u8 {q10}, [r0], r1 ; q3
- vabd.u8 q14, q8, q7 ; abs(q1 - q0)
- vabd.u8 q3, q9, q8 ; abs(q2 - q1)
- vabd.u8 q4, q10, q9 ; abs(q3 - q2)
- vabd.u8 q9, q6, q7 ; abs(p0 - q0)
-
- vcge.u8 q15, q1, q11 ; (abs(p3 - p2) > limit)*-1
- vcge.u8 q12, q1, q12 ; (abs(p2 - p1) > limit)*-1
- vcge.u8 q10, q1, q13 ; (abs(p1 - p0) > limit)*-1
- vcge.u8 q11, q1, q14 ; (abs(q1 - q0) > limit)*-1
-
- vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1
- vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1
-
- vcge.u8 q3, q1, q3 ; (abs(q2 - q1) > limit)*-1
- vcge.u8 q4, q1, q4 ; (abs(q3 - q2) > limit)*-1
- vadd.u8 q0, q0, q0 ; flimit * 2
- vadd.u8 q0, q0, q1 ; flimit * 2 + limit
-
- vand q15, q15, q12
- vand q10, q10, q11
- vand q3, q3, q4
-
- vabd.u8 q2, q5, q8 ; abs(p1 - q1)
- vqadd.u8 q9, q9, q9 ; abs(p0 - q0) * 2
- vshr.u8 q2, q2, #1 ; abs(p1 - q1) / 2
- vqadd.u8 q9, q9, q2 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
- vcge.u8 q9, q0, q9 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
-
- vld1.u8 {q0}, [r12]!
-
- vand q15, q15, q10
-
- ;vp8_filter() function
- veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value
- veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value
- veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value
- veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value
-;;;;;;;;;;;;;;
- vld1.u8 {q10}, [r12]!
-
- ;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0)
- vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
- vsubl.s8 q11, d15, d13
-
- vand q3, q3, q9
- vmovl.u8 q4, d20
-
- vqsub.s8 q1, q5, q8 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
- vorr q14, q13, q14 ; q14: vp8_hevmask
-
- ;vmul.i8 q2, q2, q10 ; 3 * ( qs0 - ps0)
- vmul.i16 q2, q2, q4 ; 3 * ( qs0 - ps0)
- vmul.i16 q11, q11, q4
-
- vand q1, q1, q14 ; vp8_filter &= hev
- vand q15, q15, q3 ; q15: vp8_filter_mask
- ;;
- ;vld1.u8 {q4}, [r12]! ;no need 7 any more
-
- ;vqadd.s8 q1, q1, q2
- vaddw.s8 q2, q2, d2
- vaddw.s8 q11, q11, d3
-
- vld1.u8 {q9}, [r12]!
- ;
- vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
- vqmovn.s16 d3, q11
- ;;
-
- vand q1, q1, q15 ; vp8_filter &= mask
- ;;
-;;;;;;;;;;;;
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7
-; vand q2, q1, q4 ; s = vp8_filter & 7
-; vqadd.s8 q1, q1, q9 ; vp8_filter = vp8_signed_char_clamp(vp8_filter+4)
- ;;;;
-; vshr.s8 q1, q1, #3 ; vp8_filter >>= 3
-; vceq.i8 q2, q2, q9 ; s = (s==4)*-1
- ;;
-; ;calculate output
-; vqsub.s8 q10, q7, q1 ; u = vp8_signed_char_clamp(qs0 - vp8_filter)
-; vqadd.s8 q11, q2, q1 ; u = vp8_signed_char_clamp(s + vp8_filter)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; q10=3
- vqadd.s8 q2, q1, q10 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
- vqadd.s8 q1, q1, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
- vshr.s8 q2, q2, #3 ; Filter2 >>= 3
- vshr.s8 q1, q1, #3 ; Filter1 >>= 3
-
- ;calculate output
- vqadd.s8 q11, q6, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2)
- vqsub.s8 q10, q7, q1 ; u = vp8_signed_char_clamp(qs0 - Filter1)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- vrshr.s8 q1, q1, #1 ;round/shift: vp8_filter += 1; vp8_filter >>= 1
-
- sub r0, r0, r1, lsl #2
- sub r0, r0, r1, lsl #1
- ;
-
- vbic q1, q1, q14 ; vp8_filter &= ~hev
- ;
- add r2, r1, r0
-
- vqadd.s8 q13, q5, q1 ; u = vp8_signed_char_clamp(ps1 + vp8_filter)
- ;vqadd.s8 q11, q6, q11 ; u = vp8_signed_char_clamp(ps0 + u)
- vqsub.s8 q12, q8, q1 ; u = vp8_signed_char_clamp(qs1 - vp8_filter)
-
- add r3, r2, r1
-
- veor q5, q13, q0 ; *op1 = u^0x80
- veor q6, q11, q0 ; *op0 = u^0x80
- veor q7, q10, q0 ; *oq0 = u^0x80
- veor q8, q12, q0 ; *oq1 = u^0x80
-
- add r12, r3, r1
-
- vst1.u8 {q5}, [r0] ; store op1
- vst1.u8 {q6}, [r2] ; store op0
- vst1.u8 {q7}, [r3] ; store oq0
- vst1.u8 {q8}, [r12] ; store oq1
-
- bx lr
- ENDP ; |vp8_loop_filter_horizontal_edge_y_neon|
-
-;-----------------
- AREA hloopfiltery_dat, DATA, READWRITE ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_lfhy_coeff_
- DCD lfhy_coeff
-lfhy_coeff
- DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
- DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
- DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
- DCD 0x01010101, 0x01010101, 0x01010101, 0x01010101
-
- END
diff --git a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
index 6d74fab52..0b84dc750 100644
--- a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
+++ b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
index 2bb6222b9..a793d095a 100644
--- a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
+++ b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm b/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
deleted file mode 100644
index d79cc68a3..000000000
--- a/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
+++ /dev/null
@@ -1,231 +0,0 @@
-;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |vp8_loop_filter_vertical_edge_uv_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0 unsigned char *u,
-; r1 int p, //pitch
-; r2 const signed char *flimit,
-; r3 const signed char *limit,
-; stack(r4) const signed char *thresh,
-; stack(r5) unsigned char *v
-
-|vp8_loop_filter_vertical_edge_uv_neon| PROC
- sub r0, r0, #4 ; move u pointer down by 4 columns
- vld1.s8 {d0[], d1[]}, [r2] ; flimit
-
- ldr r2, [sp, #4] ; load v ptr
- ldr r12, [sp, #0] ; load thresh pointer
-
- sub r2, r2, #4 ; move v pointer down by 4 columns
-
- vld1.u8 {d6}, [r0], r1 ;load u data
- vld1.u8 {d7}, [r2], r1 ;load v data
- vld1.u8 {d8}, [r0], r1
- vld1.u8 {d9}, [r2], r1
- vld1.u8 {d10}, [r0], r1
- vld1.u8 {d11}, [r2], r1
- vld1.u8 {d12}, [r0], r1
- vld1.u8 {d13}, [r2], r1
- vld1.u8 {d14}, [r0], r1
- vld1.u8 {d15}, [r2], r1
- vld1.u8 {d16}, [r0], r1
- vld1.u8 {d17}, [r2], r1
- vld1.u8 {d18}, [r0], r1
- vld1.u8 {d19}, [r2], r1
- vld1.u8 {d20}, [r0], r1
- vld1.u8 {d21}, [r2], r1
-
- ;transpose to 8x16 matrix
- vtrn.32 q3, q7
- vtrn.32 q4, q8
- vtrn.32 q5, q9
- vtrn.32 q6, q10
-
- vtrn.16 q3, q5
- vtrn.16 q4, q6
- vtrn.16 q7, q9
- vtrn.16 q8, q10
-
- vtrn.8 q3, q4
- vtrn.8 q5, q6
- vtrn.8 q7, q8
- vtrn.8 q9, q10
-
- vld1.s8 {d2[], d3[]}, [r3] ; limit
- vld1.s8 {d4[], d5[]}, [r12] ; thresh
-
- ldr r12, _vlfuv_coeff_
- ;vp8_filter_mask() function
- ;vp8_hevmask() function
- vabd.u8 q11, q3, q4 ; abs(p3 - p2)
- vabd.u8 q12, q4, q5 ; abs(p2 - p1)
- vabd.u8 q13, q5, q6 ; abs(p1 - p0)
- vabd.u8 q14, q8, q7 ; abs(q1 - q0)
- vabd.u8 q3, q9, q8 ; abs(q2 - q1)
- vabd.u8 q4, q10, q9 ; abs(q3 - q2)
- vabd.u8 q9, q6, q7 ; abs(p0 - q0)
-
- vcge.u8 q15, q1, q11 ; (abs(p3 - p2) > limit)*-1
- vcge.u8 q12, q1, q12 ; (abs(p2 - p1) > limit)*-1
- vcge.u8 q10, q1, q13 ; (abs(p1 - p0) > limit)*-1
- vcge.u8 q11, q1, q14 ; (abs(q1 - q0) > limit)*-1
-
- vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1
- vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1
-
- vcge.u8 q3, q1, q3 ; (abs(q2 - q1) > limit)*-1
- vcge.u8 q4, q1, q4 ; (abs(q3 - q2) > limit)*-1
- vadd.u8 q0, q0, q0 ; flimit * 2
- vadd.u8 q0, q0, q1 ; flimit * 2 + limit
-
- vand q15, q15, q12
- vand q10, q10, q11
- vand q3, q3, q4
-
- vabd.u8 q2, q5, q8 ; abs(p1 - q1)
- vqadd.u8 q9, q9, q9 ; abs(p0 - q0) * 2
- vshr.u8 q2, q2, #1 ; abs(p1 - q1) / 2
- vqadd.u8 q9, q9, q2 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
- vcge.u8 q9, q0, q9 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
-
- vld1.u8 {q0}, [r12]!
-
- vand q15, q15, q10
-
-
- ;vp8_filter() function
- veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value
- veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value
- veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value
- veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value
-;;;;;;;;;;;;;;
- vld1.u8 {q10}, [r12]!
-
- ;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0)
- vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
- vsubl.s8 q11, d15, d13
-
- vand q3, q3, q9
- vmovl.u8 q4, d20
-
- vqsub.s8 q1, q5, q8 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
- vorr q14, q13, q14 ; q14: vp8_hevmask
-
- ;vmul.i8 q2, q2, q10 ; 3 * ( qs0 - ps0)
- vmul.i16 q2, q2, q4 ; 3 * ( qs0 - ps0)
- vmul.i16 q11, q11, q4
-
- vand q1, q1, q14 ; vp8_filter &= hev
- vand q15, q15, q3 ; q15: vp8_filter_mask
- ;;
- ;vld1.u8 {q4}, [r12]! ;no need 7 any more
-
- ;vqadd.s8 q1, q1, q2
- vaddw.s8 q2, q2, d2
- vaddw.s8 q11, q11, d3
-
- vld1.u8 {q9}, [r12]!
- ;
- vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
- vqmovn.s16 d3, q11
- ;;
-
- vand q1, q1, q15 ; vp8_filter &= mask
- ;;
-;;;;;;;;;;;;
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7
-; vand q2, q1, q4 ; s = vp8_filter & 7
-; vqadd.s8 q1, q1, q9 ; vp8_filter = vp8_signed_char_clamp(vp8_filter+4)
- ;;;;
-; vshr.s8 q1, q1, #3 ; vp8_filter >>= 3
-; vceq.i8 q2, q2, q9 ; s = (s==4)*-1
- ;;
-; ;calculate output
-; vqsub.s8 q10, q7, q1 ; u = vp8_signed_char_clamp(qs0 - vp8_filter)
-; vqadd.s8 q11, q2, q1 ; u = vp8_signed_char_clamp(s + vp8_filter)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; q10=3
- vqadd.s8 q2, q1, q10 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
- vqadd.s8 q1, q1, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
- vshr.s8 q2, q2, #3 ; Filter2 >>= 3
- vshr.s8 q1, q1, #3 ; Filter1 >>= 3
- ;calculate output
- vqadd.s8 q11, q6, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2)
- vqsub.s8 q10, q7, q1 ; u = vp8_signed_char_clamp(qs0 - Filter1)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- vrshr.s8 q1, q1, #1 ;round/shift: vp8_filter += 1; vp8_filter >>= 1
-
- sub r0, r0, r1, lsl #3
- add r0, r0, #2
-
- vbic q1, q1, q14 ; vp8_filter &= ~hev
-
- sub r2, r2, r1, lsl #3
- add r2, r2, #2
-
- vqadd.s8 q13, q5, q1 ; u = vp8_signed_char_clamp(ps1 + vp8_filter)
- ;vqadd.s8 q11, q6, q11 ; u = vp8_signed_char_clamp(ps0 + u)
- vqsub.s8 q12, q8, q1 ; u = vp8_signed_char_clamp(qs1 - vp8_filter)
-
- veor q7, q10, q0 ; *oq0 = u^0x80
- veor q5, q13, q0 ; *op1 = u^0x80
- veor q6, q11, q0 ; *op0 = u^0x80
- veor q8, q12, q0 ; *oq1 = u^0x80
-
- vswp d12, d11
- vswp d16, d13
- vswp d14, d12
- vswp d16, d15
-
- ;store op1, op0, oq0, oq1
- vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
- vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r2], r1
- vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
- vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r2], r1
- vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
- vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r2], r1
- vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
- vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r2], r1
- vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
- vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r2], r1
- vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
- vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r2], r1
- vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
- vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r2], r1
- vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
- vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r2], r1
-
- bx lr
- ENDP ; |vp8_loop_filter_vertical_edge_uv_neon|
-
-;-----------------
- AREA vloopfilteruv_dat, DATA, READWRITE ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_vlfuv_coeff_
- DCD vlfuv_coeff
-vlfuv_coeff
- DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
- DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
- DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
- DCD 0x01010101, 0x01010101, 0x01010101, 0x01010101
-
- END
diff --git a/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm b/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
deleted file mode 100644
index 3a230a953..000000000
--- a/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
+++ /dev/null
@@ -1,235 +0,0 @@
-;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |vp8_loop_filter_vertical_edge_y_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0 unsigned char *s,
-; r1 int p, //pitch
-; r2 const signed char *flimit,
-; r3 const signed char *limit,
-; stack(r4) const signed char *thresh,
-; //stack(r5) int count --unused
-
-|vp8_loop_filter_vertical_edge_y_neon| PROC
- sub r0, r0, #4 ; move src pointer down by 4 columns
- ldr r12, [sp, #0] ; load thresh pointer
-
- vld1.u8 {d6}, [r0], r1 ; load first 8-line src data
- vld1.s8 {d0[], d1[]}, [r2] ; flimit
- vld1.u8 {d8}, [r0], r1
- vld1.s8 {d2[], d3[]}, [r3] ; limit
- vld1.u8 {d10}, [r0], r1
- vld1.s8 {d4[], d5[]}, [r12] ; thresh
- vld1.u8 {d12}, [r0], r1
- ldr r12, _vlfy_coeff_
- vld1.u8 {d14}, [r0], r1
- vld1.u8 {d16}, [r0], r1
- vld1.u8 {d18}, [r0], r1
- vld1.u8 {d20}, [r0], r1
-
- vld1.u8 {d7}, [r0], r1 ; load second 8-line src data
- vld1.u8 {d9}, [r0], r1
- vld1.u8 {d11}, [r0], r1
- vld1.u8 {d13}, [r0], r1
- vld1.u8 {d15}, [r0], r1
- vld1.u8 {d17}, [r0], r1
- vld1.u8 {d19}, [r0], r1
- vld1.u8 {d21}, [r0], r1
-
- ;transpose to 8x16 matrix
- vtrn.32 q3, q7
- vtrn.32 q4, q8
- vtrn.32 q5, q9
- vtrn.32 q6, q10
-
- vtrn.16 q3, q5
- vtrn.16 q4, q6
- vtrn.16 q7, q9
- vtrn.16 q8, q10
-
- vtrn.8 q3, q4
- vtrn.8 q5, q6
- vtrn.8 q7, q8
- vtrn.8 q9, q10
-
- ;vp8_filter_mask() function
- ;vp8_hevmask() function
- vabd.u8 q11, q3, q4 ; abs(p3 - p2)
- vabd.u8 q12, q4, q5 ; abs(p2 - p1)
- vabd.u8 q13, q5, q6 ; abs(p1 - p0)
- vabd.u8 q14, q8, q7 ; abs(q1 - q0)
- vabd.u8 q3, q9, q8 ; abs(q2 - q1)
- vabd.u8 q4, q10, q9 ; abs(q3 - q2)
- vabd.u8 q9, q6, q7 ; abs(p0 - q0)
-
- vcge.u8 q15, q1, q11 ; (abs(p3 - p2) > limit)*-1
- vcge.u8 q12, q1, q12 ; (abs(p2 - p1) > limit)*-1
- vcge.u8 q10, q1, q13 ; (abs(p1 - p0) > limit)*-1
- vcge.u8 q11, q1, q14 ; (abs(q1 - q0) > limit)*-1
-
- vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1
- vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1
-
- vcge.u8 q3, q1, q3 ; (abs(q2 - q1) > limit)*-1
- vcge.u8 q4, q1, q4 ; (abs(q3 - q2) > limit)*-1
- vadd.u8 q0, q0, q0 ; flimit * 2
- vadd.u8 q0, q0, q1 ; flimit * 2 + limit
-
- vand q15, q15, q12
- vand q10, q10, q11
- vand q3, q3, q4
-
- vabd.u8 q2, q5, q8 ; abs(p1 - q1)
- vqadd.u8 q9, q9, q9 ; abs(p0 - q0) * 2
- vshr.u8 q2, q2, #1 ; abs(p1 - q1) / 2
- vqadd.u8 q9, q9, q2 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
- vcge.u8 q9, q0, q9 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
-
- vld1.u8 {q0}, [r12]!
-
- vand q15, q15, q10
-
-
- ;vp8_filter() function
- veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value
- veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value
- veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value
- veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value
-;;;;;;;;;;;;;;
- vld1.u8 {q10}, [r12]!
-
- ;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0)
- vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
- vsubl.s8 q11, d15, d13
-
- vand q3, q3, q9
- vmovl.u8 q4, d20
-
- vqsub.s8 q1, q5, q8 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
- vorr q14, q13, q14 ; q14: vp8_hevmask
-
- ;vmul.i8 q2, q2, q10 ; 3 * ( qs0 - ps0)
- vmul.i16 q2, q2, q4 ; 3 * ( qs0 - ps0)
- vmul.i16 q11, q11, q4
-
- vand q1, q1, q14 ; vp8_filter &= hev
- vand q15, q15, q3 ; q15: vp8_filter_mask
- ;;
- ;vld1.u8 {q4}, [r12]! ;no need 7 any more
-
- ;vqadd.s8 q1, q1, q2
- vaddw.s8 q2, q2, d2
- vaddw.s8 q11, q11, d3
-
- vld1.u8 {q9}, [r12]!
- ;
- vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
- vqmovn.s16 d3, q11
- ;;
-
- vand q1, q1, q15 ; vp8_filter &= mask
- ;;
-;;;;;;;;;;;;
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7
-; vand q2, q1, q4 ; s = vp8_filter & 7
-; vqadd.s8 q1, q1, q9 ; vp8_filter = vp8_signed_char_clamp(vp8_filter+4)
- ;;;;
-; vshr.s8 q1, q1, #3 ; vp8_filter >>= 3
-; vceq.i8 q2, q2, q9 ; s = (s==4)*-1
- ;;
-; ;calculate output
-; vqsub.s8 q10, q7, q1 ; u = vp8_signed_char_clamp(qs0 - vp8_filter)
-; vqadd.s8 q11, q2, q1 ; u = vp8_signed_char_clamp(s + vp8_filter)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; q10=3
- vqadd.s8 q2, q1, q10 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
- vqadd.s8 q1, q1, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
- vshr.s8 q2, q2, #3 ; Filter2 >>= 3
- vshr.s8 q1, q1, #3 ; Filter1 >>= 3
- ;calculate output
- vqadd.s8 q11, q6, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2)
- vqsub.s8 q10, q7, q1 ; u = vp8_signed_char_clamp(qs0 - Filter1)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- vrshr.s8 q1, q1, #1 ;round/shift: vp8_filter += 1; vp8_filter >>= 1
-
- sub r0, r0, r1, lsl #4
- add r0, r0, #2
- ;
-
- vbic q1, q1, q14 ; vp8_filter &= ~hev
- add r2, r0, r1
- ;
-
- vqadd.s8 q13, q5, q1 ; u = vp8_signed_char_clamp(ps1 + vp8_filter)
- ;vqadd.s8 q11, q6, q11 ; u = vp8_signed_char_clamp(ps0 + u)
- vqsub.s8 q12, q8, q1 ; u = vp8_signed_char_clamp(qs1 - vp8_filter)
-
- veor q7, q10, q0 ; *oq0 = u^0x80
- veor q5, q13, q0 ; *op1 = u^0x80
- veor q6, q11, q0 ; *op0 = u^0x80
- veor q8, q12, q0 ; *oq1 = u^0x80
- add r3, r2, r1
- ;
- vswp d12, d11
- vswp d16, d13
- add r12, r3, r1
- vswp d14, d12
- vswp d16, d15
-
- ;store op1, op0, oq0, oq1
- vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0]
- vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r2]
- vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [r3]
- vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r12], r1
- add r0, r12, r1
- vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [r12]
- vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
- add r2, r0, r1
- vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0]
- vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r2], r1
- add r3, r2, r1
- vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r2]
- vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r3], r1
- add r12, r3, r1
- vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r3]
- vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r12], r1
- add r0, r12, r1
- vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r12]
- vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r0], r1
- add r2, r0, r1
- vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r0]
- vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r2]
-
- bx lr
- ENDP ; |vp8_loop_filter_vertical_edge_y_neon|
-
-;-----------------
- AREA vloopfiltery_dat, DATA, READWRITE ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_vlfy_coeff_
- DCD vlfy_coeff
-vlfy_coeff
- DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
- DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
- DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
- DCD 0x01010101, 0x01010101, 0x01010101, 0x01010101
-
- END
diff --git a/vp8/common/arm/neon/mbloopfilter_neon.asm b/vp8/common/arm/neon/mbloopfilter_neon.asm
new file mode 100644
index 000000000..255dd5619
--- /dev/null
+++ b/vp8/common/arm/neon/mbloopfilter_neon.asm
@@ -0,0 +1,519 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_mbloop_filter_horizontal_edge_y_neon|
+ EXPORT |vp8_mbloop_filter_horizontal_edge_uv_neon|
+ EXPORT |vp8_mbloop_filter_vertical_edge_y_neon|
+ EXPORT |vp8_mbloop_filter_vertical_edge_uv_neon|
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; flimit, limit, and thresh should be positive numbers.
+; All 16 elements in these variables are equal.
+
+; void vp8_mbloop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
+; const signed char *flimit,
+; const signed char *limit,
+; const signed char *thresh,
+; int count)
+; r0 unsigned char *src,
+; r1 int pitch,
+; r2 const signed char *flimit,
+; r3 const signed char *limit,
+; sp const signed char *thresh,
+; sp+4 int count (unused)
+|vp8_mbloop_filter_horizontal_edge_y_neon| PROC
+ stmdb sp!, {lr}
+ sub r0, r0, r1, lsl #2 ; move src pointer down by 4 lines
+ ldr r12, [sp, #4] ; load thresh pointer
+
+ vld1.u8 {q3}, [r0], r1 ; p3
+ vld1.s8 {d2[], d3[]}, [r3] ; limit
+ vld1.u8 {q4}, [r0], r1 ; p2
+ vld1.s8 {d4[], d5[]}, [r12] ; thresh
+ vld1.u8 {q5}, [r0], r1 ; p1
+ vld1.u8 {q6}, [r0], r1 ; p0
+ vld1.u8 {q7}, [r0], r1 ; q0
+ vld1.u8 {q8}, [r0], r1 ; q1
+ vld1.u8 {q9}, [r0], r1 ; q2
+ vld1.u8 {q10}, [r0], r1 ; q3
+
+ bl vp8_mbloop_filter_neon
+
+ sub r0, r0, r1, lsl #3
+ add r0, r0, r1
+ add r2, r0, r1
+ add r3, r2, r1
+
+ vst1.u8 {q4}, [r0] ; store op2
+ vst1.u8 {q5}, [r2] ; store op1
+ vst1.u8 {q6}, [r3], r1 ; store op0
+ add r12, r3, r1
+ vst1.u8 {q7}, [r3] ; store oq0
+ vst1.u8 {q8}, [r12], r1 ; store oq1
+ vst1.u8 {q9}, [r12] ; store oq2
+
+ ldmia sp!, {pc}
+ ENDP ; |vp8_mbloop_filter_horizontal_edge_y_neon|
+
+; void vp8_mbloop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch,
+; const signed char *flimit,
+; const signed char *limit,
+; const signed char *thresh,
+; unsigned char *v)
+; r0 unsigned char *u,
+; r1 int pitch,
+; r2 const signed char *flimit,
+; r3 const signed char *limit,
+; sp const signed char *thresh,
+; sp+4 unsigned char *v
+|vp8_mbloop_filter_horizontal_edge_uv_neon| PROC
+ stmdb sp!, {lr}
+ sub r0, r0, r1, lsl #2 ; move u pointer down by 4 lines
+ vld1.s8 {d2[], d3[]}, [r3] ; limit
+ ldr r3, [sp, #8] ; load v ptr
+ ldr r12, [sp, #4] ; load thresh pointer
+ sub r3, r3, r1, lsl #2 ; move v pointer down by 4 lines
+
+ vld1.u8 {d6}, [r0], r1 ; p3
+ vld1.u8 {d7}, [r3], r1 ; p3
+ vld1.u8 {d8}, [r0], r1 ; p2
+ vld1.u8 {d9}, [r3], r1 ; p2
+ vld1.u8 {d10}, [r0], r1 ; p1
+ vld1.u8 {d11}, [r3], r1 ; p1
+ vld1.u8 {d12}, [r0], r1 ; p0
+ vld1.u8 {d13}, [r3], r1 ; p0
+ vld1.u8 {d14}, [r0], r1 ; q0
+ vld1.u8 {d15}, [r3], r1 ; q0
+ vld1.u8 {d16}, [r0], r1 ; q1
+ vld1.u8 {d17}, [r3], r1 ; q1
+ vld1.u8 {d18}, [r0], r1 ; q2
+ vld1.u8 {d19}, [r3], r1 ; q2
+ vld1.u8 {d20}, [r0], r1 ; q3
+ vld1.u8 {d21}, [r3], r1 ; q3
+
+ vld1.s8 {d4[], d5[]}, [r12] ; thresh
+
+ bl vp8_mbloop_filter_neon
+
+ sub r0, r0, r1, lsl #3
+ sub r3, r3, r1, lsl #3
+
+ add r0, r0, r1
+ add r3, r3, r1
+
+ vst1.u8 {d8}, [r0], r1 ; store u op2
+ vst1.u8 {d9}, [r3], r1 ; store v op2
+ vst1.u8 {d10}, [r0], r1 ; store u op1
+ vst1.u8 {d11}, [r3], r1 ; store v op1
+ vst1.u8 {d12}, [r0], r1 ; store u op0
+ vst1.u8 {d13}, [r3], r1 ; store v op0
+ vst1.u8 {d14}, [r0], r1 ; store u oq0
+ vst1.u8 {d15}, [r3], r1 ; store v oq0
+ vst1.u8 {d16}, [r0], r1 ; store u oq1
+ vst1.u8 {d17}, [r3], r1 ; store v oq1
+ vst1.u8 {d18}, [r0], r1 ; store u oq2
+ vst1.u8 {d19}, [r3], r1 ; store v oq2
+
+ ldmia sp!, {pc}
+ ENDP ; |vp8_mbloop_filter_horizontal_edge_uv_neon|
+
+; void vp8_mbloop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
+; const signed char *flimit,
+; const signed char *limit,
+; const signed char *thresh,
+; int count)
+; r0 unsigned char *src,
+; r1 int pitch,
+; r2 const signed char *flimit,
+; r3 const signed char *limit,
+; sp const signed char *thresh,
+; sp+4 int count (unused)
+|vp8_mbloop_filter_vertical_edge_y_neon| PROC
+ stmdb sp!, {lr}
+ sub r0, r0, #4 ; move src pointer down by 4 columns
+
+ vld1.u8 {d6}, [r0], r1 ; load first 8-line src data
+ ldr r12, [sp, #4] ; load thresh pointer
+ vld1.u8 {d8}, [r0], r1
+ sub sp, sp, #32
+ vld1.u8 {d10}, [r0], r1
+ vld1.u8 {d12}, [r0], r1
+ vld1.u8 {d14}, [r0], r1
+ vld1.u8 {d16}, [r0], r1
+ vld1.u8 {d18}, [r0], r1
+ vld1.u8 {d20}, [r0], r1
+
+ vld1.u8 {d7}, [r0], r1 ; load second 8-line src data
+ vld1.u8 {d9}, [r0], r1
+ vld1.u8 {d11}, [r0], r1
+ vld1.u8 {d13}, [r0], r1
+ vld1.u8 {d15}, [r0], r1
+ vld1.u8 {d17}, [r0], r1
+ vld1.u8 {d19}, [r0], r1
+ vld1.u8 {d21}, [r0], r1
+
+ ;transpose to 8x16 matrix
+ vtrn.32 q3, q7
+ vtrn.32 q4, q8
+ vtrn.32 q5, q9
+ vtrn.32 q6, q10
+
+ vtrn.16 q3, q5
+ vtrn.16 q4, q6
+ vtrn.16 q7, q9
+ vtrn.16 q8, q10
+
+ vtrn.8 q3, q4
+ vtrn.8 q5, q6
+ vtrn.8 q7, q8
+ vtrn.8 q9, q10
+
+ vld1.s8 {d4[], d5[]}, [r12] ; thresh
+ vld1.s8 {d2[], d3[]}, [r3] ; limit
+ mov r12, sp
+ vst1.u8 {q3}, [r12]!
+ vst1.u8 {q10}, [r12]!
+
+ bl vp8_mbloop_filter_neon
+
+ sub r0, r0, r1, lsl #4
+
+ add r2, r0, r1
+
+ add r3, r2, r1
+
+ vld1.u8 {q3}, [sp]!
+ vld1.u8 {q10}, [sp]!
+
+ ;transpose to 16x8 matrix
+ vtrn.32 q3, q7
+ vtrn.32 q4, q8
+ vtrn.32 q5, q9
+ vtrn.32 q6, q10
+ add r12, r3, r1
+
+ vtrn.16 q3, q5
+ vtrn.16 q4, q6
+ vtrn.16 q7, q9
+ vtrn.16 q8, q10
+
+ vtrn.8 q3, q4
+ vtrn.8 q5, q6
+ vtrn.8 q7, q8
+ vtrn.8 q9, q10
+
+ ;store op2, op1, op0, oq0, oq1, oq2
+ vst1.8 {d6}, [r0]
+ vst1.8 {d8}, [r2]
+ vst1.8 {d10}, [r3]
+ vst1.8 {d12}, [r12], r1
+ add r0, r12, r1
+ vst1.8 {d14}, [r12]
+ vst1.8 {d16}, [r0], r1
+ add r2, r0, r1
+ vst1.8 {d18}, [r0]
+ vst1.8 {d20}, [r2], r1
+ add r3, r2, r1
+ vst1.8 {d7}, [r2]
+ vst1.8 {d9}, [r3], r1
+ add r12, r3, r1
+ vst1.8 {d11}, [r3]
+ vst1.8 {d13}, [r12], r1
+ add r0, r12, r1
+ vst1.8 {d15}, [r12]
+ vst1.8 {d17}, [r0], r1
+ add r2, r0, r1
+ vst1.8 {d19}, [r0]
+ vst1.8 {d21}, [r2]
+
+ ldmia sp!, {pc}
+ ENDP ; |vp8_mbloop_filter_vertical_edge_y_neon|
+
+; void vp8_mbloop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch,
+; const signed char *flimit,
+; const signed char *limit,
+; const signed char *thresh,
+; unsigned char *v)
+; r0 unsigned char *u,
+; r1 int pitch,
+; r2 const signed char *flimit,
+; r3 const signed char *limit,
+; sp const signed char *thresh,
+; sp+4 unsigned char *v
+|vp8_mbloop_filter_vertical_edge_uv_neon| PROC
+ stmdb sp!, {lr}
+ sub r0, r0, #4 ; move src pointer down by 4 columns
+ vld1.s8 {d2[], d3[]}, [r3] ; limit
+ ldr r3, [sp, #8] ; load v ptr
+ ldr r12, [sp, #4] ; load thresh pointer
+
+ sub r3, r3, #4 ; move v pointer down by 4 columns
+
+ vld1.u8 {d6}, [r0], r1 ;load u data
+ vld1.u8 {d7}, [r3], r1 ;load v data
+ vld1.u8 {d8}, [r0], r1
+ vld1.u8 {d9}, [r3], r1
+ vld1.u8 {d10}, [r0], r1
+ vld1.u8 {d11}, [r3], r1
+ vld1.u8 {d12}, [r0], r1
+ vld1.u8 {d13}, [r3], r1
+ vld1.u8 {d14}, [r0], r1
+ vld1.u8 {d15}, [r3], r1
+ vld1.u8 {d16}, [r0], r1
+ vld1.u8 {d17}, [r3], r1
+ vld1.u8 {d18}, [r0], r1
+ vld1.u8 {d19}, [r3], r1
+ vld1.u8 {d20}, [r0], r1
+ vld1.u8 {d21}, [r3], r1
+
+ ;transpose to 8x16 matrix
+ vtrn.32 q3, q7
+ vtrn.32 q4, q8
+ vtrn.32 q5, q9
+ vtrn.32 q6, q10
+
+ vtrn.16 q3, q5
+ vtrn.16 q4, q6
+ vtrn.16 q7, q9
+ vtrn.16 q8, q10
+
+ vtrn.8 q3, q4
+ vtrn.8 q5, q6
+ vtrn.8 q7, q8
+ vtrn.8 q9, q10
+
+ sub sp, sp, #32
+ vld1.s8 {d4[], d5[]}, [r12] ; thresh
+ mov r12, sp
+ vst1.u8 {q3}, [r12]!
+ vst1.u8 {q10}, [r12]!
+
+ bl vp8_mbloop_filter_neon
+
+ sub r0, r0, r1, lsl #3
+ sub r3, r3, r1, lsl #3
+
+ vld1.u8 {q3}, [sp]!
+ vld1.u8 {q10}, [sp]!
+
+ ;transpose to 16x8 matrix
+ vtrn.32 q3, q7
+ vtrn.32 q4, q8
+ vtrn.32 q5, q9
+ vtrn.32 q6, q10
+
+ vtrn.16 q3, q5
+ vtrn.16 q4, q6
+ vtrn.16 q7, q9
+ vtrn.16 q8, q10
+
+ vtrn.8 q3, q4
+ vtrn.8 q5, q6
+ vtrn.8 q7, q8
+ vtrn.8 q9, q10
+
+ ;store op2, op1, op0, oq0, oq1, oq2
+ vst1.8 {d6}, [r0], r1
+ vst1.8 {d7}, [r3], r1
+ vst1.8 {d8}, [r0], r1
+ vst1.8 {d9}, [r3], r1
+ vst1.8 {d10}, [r0], r1
+ vst1.8 {d11}, [r3], r1
+ vst1.8 {d12}, [r0], r1
+ vst1.8 {d13}, [r3], r1
+ vst1.8 {d14}, [r0], r1
+ vst1.8 {d15}, [r3], r1
+ vst1.8 {d16}, [r0], r1
+ vst1.8 {d17}, [r3], r1
+ vst1.8 {d18}, [r0], r1
+ vst1.8 {d19}, [r3], r1
+ vst1.8 {d20}, [r0], r1
+ vst1.8 {d21}, [r3], r1
+
+ ldmia sp!, {pc}
+ ENDP ; |vp8_mbloop_filter_vertical_edge_uv_neon|
+
+; void vp8_mbloop_filter_neon()
+; This is a helper function for the macroblock loopfilters. The individual
+; functions do the necessary load, transpose (if necessary), preserve (if
+; necessary) and store.
+
+; TODO:
+; The vertical filter writes p3/q3 back out because two 4 element writes are
+; much simpler than ordering and writing two 3 element sets (or three 2 elements
+; sets, or whichever other combinations are possible).
+; If we can preserve q3 and q10, the vertical filter will be able to avoid
+; storing those values on the stack and reading them back after the filter.
+
+; r0,r1 PRESERVE
+; r2 flimit
+; r3 PRESERVE
+; q1 limit
+; q2 thresh
+; q3 p3
+; q4 p2
+; q5 p1
+; q6 p0
+; q7 q0
+; q8 q1
+; q9 q2
+; q10 q3
+
+|vp8_mbloop_filter_neon| PROC
+ ldr r12, _mblf_coeff_
+
+ ; vp8_filter_mask
+ vabd.u8 q11, q3, q4 ; abs(p3 - p2)
+ vabd.u8 q12, q4, q5 ; abs(p2 - p1)
+ vabd.u8 q13, q5, q6 ; abs(p1 - p0)
+ vabd.u8 q14, q8, q7 ; abs(q1 - q0)
+ vabd.u8 q3, q9, q8 ; abs(q2 - q1)
+ vabd.u8 q0, q10, q9 ; abs(q3 - q2)
+
+ vmax.u8 q11, q11, q12
+ vmax.u8 q12, q13, q14
+ vmax.u8 q3, q3, q0
+ vmax.u8 q15, q11, q12
+
+ vabd.u8 q12, q6, q7 ; abs(p0 - q0)
+
+ ; vp8_hevmask
+ vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh) * -1
+ vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh) * -1
+ vmax.u8 q15, q15, q3
+
+ vld1.s8 {d4[], d5[]}, [r2] ; flimit
+
+ vld1.u8 {q0}, [r12]!
+
+ vadd.u8 q2, q2, q2 ; flimit * 2
+ vadd.u8 q2, q2, q1 ; flimit * 2 + limit
+ vcge.u8 q15, q1, q15
+
+ vabd.u8 q1, q5, q8 ; a = abs(p1 - q1)
+ vqadd.u8 q12, q12, q12 ; b = abs(p0 - q0) * 2
+ vshr.u8 q1, q1, #1 ; a = a / 2
+ vqadd.u8 q12, q12, q1 ; a = b + a
+ vcge.u8 q12, q2, q12 ; (a > flimit * 2 + limit) * -1
+
+ ; vp8_filter
+ ; convert to signed
+ veor q7, q7, q0 ; qs0
+ veor q6, q6, q0 ; ps0
+ veor q5, q5, q0 ; ps1
+ veor q8, q8, q0 ; qs1
+ veor q4, q4, q0 ; ps2
+ veor q9, q9, q0 ; qs2
+
+ vorr q14, q13, q14 ; vp8_hevmask
+
+ vsubl.s8 q2, d14, d12 ; qs0 - ps0
+ vsubl.s8 q13, d15, d13
+
+ vqsub.s8 q1, q5, q8 ; vp8_filter = clamp(ps1-qs1)
+
+ vadd.s16 q10, q2, q2 ; 3 * (qs0 - ps0)
+ vadd.s16 q11, q13, q13
+ vand q15, q15, q12 ; vp8_filter_mask
+
+ vadd.s16 q2, q2, q10
+ vadd.s16 q13, q13, q11
+
+ vld1.u8 {q12}, [r12]! ; #3
+
+ vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0)
+ vaddw.s8 q13, q13, d3
+
+ vld1.u8 {q11}, [r12]! ; #4
+
+ ; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
+ vqmovn.s16 d2, q2
+ vqmovn.s16 d3, q13
+
+ vand q1, q1, q15 ; vp8_filter &= mask
+
+ vld1.u8 {q15}, [r12]! ; #63
+ ;
+ vand q13, q1, q14 ; Filter2 &= hev
+
+ vld1.u8 {d7}, [r12]! ; #9
+
+ vqadd.s8 q2, q13, q11 ; Filter1 = clamp(Filter2+4)
+ vqadd.s8 q13, q13, q12 ; Filter2 = clamp(Filter2+3)
+
+ vld1.u8 {d6}, [r12]! ; #18
+
+ vshr.s8 q2, q2, #3 ; Filter1 >>= 3
+ vshr.s8 q13, q13, #3 ; Filter2 >>= 3
+
+ vmov q10, q15
+ vmov q12, q15
+
+ vqsub.s8 q7, q7, q2 ; qs0 = clamp(qs0 - Filter1)
+
+ vld1.u8 {d5}, [r12]! ; #27
+
+ vqadd.s8 q6, q6, q13 ; ps0 = clamp(ps0 + Filter2)
+
+ vbic q1, q1, q14 ; vp8_filter &= ~hev
+
+ ; roughly 1/7th difference across boundary
+ ; roughly 2/7th difference across boundary
+ ; roughly 3/7th difference across boundary
+ vmov q11, q15
+ vmov q13, q15
+ vmov q14, q15
+
+ vmlal.s8 q10, d2, d7 ; Filter2 * 9
+ vmlal.s8 q11, d3, d7
+ vmlal.s8 q12, d2, d6 ; Filter2 * 18
+ vmlal.s8 q13, d3, d6
+ vmlal.s8 q14, d2, d5 ; Filter2 * 27
+ vmlal.s8 q15, d3, d5
+ vqshrn.s16 d20, q10, #7 ; u = clamp((63 + Filter2 * 9)>>7)
+ vqshrn.s16 d21, q11, #7
+ vqshrn.s16 d24, q12, #7 ; u = clamp((63 + Filter2 * 18)>>7)
+ vqshrn.s16 d25, q13, #7
+ vqshrn.s16 d28, q14, #7 ; u = clamp((63 + Filter2 * 27)>>7)
+ vqshrn.s16 d29, q15, #7
+
+ vqsub.s8 q11, q9, q10 ; s = clamp(qs2 - u)
+ vqadd.s8 q10, q4, q10 ; s = clamp(ps2 + u)
+ vqsub.s8 q13, q8, q12 ; s = clamp(qs1 - u)
+ vqadd.s8 q12, q5, q12 ; s = clamp(ps1 + u)
+ vqsub.s8 q15, q7, q14 ; s = clamp(qs0 - u)
+ vqadd.s8 q14, q6, q14 ; s = clamp(ps0 + u)
+ veor q9, q11, q0 ; *oq2 = s^0x80
+ veor q4, q10, q0 ; *op2 = s^0x80
+ veor q8, q13, q0 ; *oq1 = s^0x80
+ veor q5, q12, q0 ; *op2 = s^0x80
+ veor q7, q15, q0 ; *oq0 = s^0x80
+ veor q6, q14, q0 ; *op0 = s^0x80
+
+ bx lr
+ ENDP ; |vp8_mbloop_filter_neon|
+
+ AREA mbloopfilter_dat, DATA, READONLY
+_mblf_coeff_
+ DCD mblf_coeff
+mblf_coeff
+ DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
+ DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
+ DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
+ DCD 0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
+ DCD 0x09090909, 0x09090909, 0x12121212, 0x12121212
+ DCD 0x1b1b1b1b, 0x1b1b1b1b
+
+ END
diff --git a/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm b/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
deleted file mode 100644
index 86eddaa2e..000000000
--- a/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
+++ /dev/null
@@ -1,257 +0,0 @@
-;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |vp8_mbloop_filter_horizontal_edge_uv_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0 unsigned char *u,
-; r1 int p, //pitch
-; r2 const signed char *flimit,
-; r3 const signed char *limit,
-; stack(r4) const signed char *thresh,
-; stack(r5) unsigned char *v
-|vp8_mbloop_filter_horizontal_edge_uv_neon| PROC
- sub r0, r0, r1, lsl #2 ; move u pointer down by 4 lines
- vld1.s8 {d2[], d3[]}, [r3] ; limit
- ldr r3, [sp, #4] ; load v ptr
- ldr r12, [sp, #0] ; load thresh pointer
- sub r3, r3, r1, lsl #2 ; move v pointer down by 4 lines
-
- vld1.u8 {d6}, [r0], r1 ; p3
- vld1.u8 {d7}, [r3], r1 ; p3
- vld1.u8 {d8}, [r0], r1 ; p2
- vld1.u8 {d9}, [r3], r1 ; p2
- vld1.u8 {d10}, [r0], r1 ; p1
- vld1.u8 {d11}, [r3], r1 ; p1
- vld1.u8 {d12}, [r0], r1 ; p0
- vld1.u8 {d13}, [r3], r1 ; p0
- vld1.u8 {d14}, [r0], r1 ; q0
- vld1.u8 {d15}, [r3], r1 ; q0
- vld1.u8 {d16}, [r0], r1 ; q1
- vld1.u8 {d17}, [r3], r1 ; q1
- vld1.u8 {d18}, [r0], r1 ; q2
- vld1.u8 {d19}, [r3], r1 ; q2
- vld1.u8 {d20}, [r0], r1 ; q3
- vld1.u8 {d21}, [r3], r1 ; q3
-
- vld1.s8 {d4[], d5[]}, [r12] ; thresh
-
- ldr r12, _mbhlfuv_coeff_
-
- ;vp8_filter_mask() function
- ;vp8_hevmask() function
- vabd.u8 q11, q3, q4 ; abs(p3 - p2)
- vabd.u8 q12, q4, q5 ; abs(p2 - p1)
- vabd.u8 q13, q5, q6 ; abs(p1 - p0)
- vabd.u8 q14, q8, q7 ; abs(q1 - q0)
- vabd.u8 q3, q9, q8 ; abs(q2 - q1)
- vabd.u8 q0, q10, q9 ; abs(q3 - q2)
-
- vcge.u8 q15, q1, q11 ; (abs(p3 - p2) > limit)*-1
- vcge.u8 q12, q1, q12 ; (abs(p2 - p1) > limit)*-1
- vcge.u8 q10, q1, q13 ; (abs(p1 - p0) > limit)*-1
- vcge.u8 q11, q1, q14 ; (abs(q1 - q0) > limit)*-1
- vcge.u8 q3, q1, q3 ; (abs(q2 - q1) > limit)*-1
- vcge.u8 q0, q1, q0 ; (abs(q3 - q2) > limit)*-1
-
- vand q15, q15, q12
-
- vabd.u8 q12, q6, q7 ; abs(p0 - q0)
-
- vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1
- vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1
-
- vld1.s8 {d4[], d5[]}, [r2] ; flimit
-
- vand q10, q10, q11
- vand q3, q3, q0
-
- vld1.u8 {q0}, [r12]!
-
- vadd.u8 q2, q2, q2 ; flimit * 2
- vadd.u8 q2, q2, q1 ; flimit * 2 + limit
-
- vabd.u8 q1, q5, q8 ; abs(p1 - q1)
- vqadd.u8 q12, q12, q12 ; abs(p0 - q0) * 2
- vshr.u8 q1, q1, #1 ; abs(p1 - q1) / 2
- vqadd.u8 q12, q12, q1 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
- vcge.u8 q12, q2, q12 ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1
-
- vand q15, q15, q10
-
- ;vp8_filter() function
- veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value
- veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value
- veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value
- veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value
- veor q4, q4, q0 ; ps2: p2 offset to convert to a signed value
- veor q9, q9, q0 ; qs2: q2 offset to convert to a signed value
-;;;;;;;;;;;;;
- vorr q14, q13, q14 ; q14: vp8_hevmask
-
- ;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0)
- vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
- vsubl.s8 q13, d15, d13
-
- vqsub.s8 q1, q5, q8 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-
- ;vadd.s8 q10, q2, q2 ; 3 * ( qs0 - ps0)
- vadd.s16 q10, q2, q2 ; 3 * ( qs0 - ps0)
- vadd.s16 q11, q13, q13
-
- vand q3, q3, q12
-
- ;vadd.s8 q2, q2, q10
- vadd.s16 q2, q2, q10
- vadd.s16 q13, q13, q11
-
- vld1.u8 {q12}, [r12]! ;#3
-
- ;vqadd.s8 q1, q1, q2 ; vp8_filter + 3 * ( qs0 - ps0)
- vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0)
- vaddw.s8 q13, q13, d3
-
- vand q15, q15, q3 ; q15: vp8_filter_mask
- vld1.u8 {q11}, [r12]! ;#4
-
- vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
- vqmovn.s16 d3, q13
-
-;;;;;;;;;;;;;;
- vand q1, q1, q15 ; vp8_filter &= mask
-
- vld1.u8 {q15}, [r12]! ;#63
- ;
- vand q13, q1, q14 ; Filter2: q13; Filter2 &= hev
-
- vld1.u8 {d7}, [r12]! ;#9
- ;
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7
-; vand q2, q13, q12 ; s = Filter2 & 7
-
-; vqadd.s8 q13, q13, q11 ; Filter2 = vp8_signed_char_clamp(Filter2+4)
-; vld1.u8 {d6}, [r12]! ;#18
-
-; sub r0, r0, r1, lsl #3
-; sub r3, r3, r1, lsl #3
-
-; vshr.s8 q13, q13, #3 ; Filter2 >>= 3
-; vceq.i8 q2, q2, q11 ; s = (s==4)*-1
-
-; add r0, r0, r1
-; add r3, r3, r1
-
-; vqsub.s8 q7, q7, q13 ; qs0 = vp8_signed_char_clamp(qs0 - Filter2)
-; vqadd.s8 q11, q2, q13 ; u = vp8_signed_char_clamp(s + Filter2)
-
-; vld1.u8 {d5}, [r12]! ;#27
-; vmov q10, q15
-; vmov q12, q15
-
-; vqadd.s8 q6, q6, q11 ; ps0 = vp8_signed_char_clamp(ps0 + u)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- vqadd.s8 q2, q13, q11 ; Filter1 = vp8_signed_char_clamp(Filter2+4)
- vqadd.s8 q13, q13, q12 ; Filter2 = vp8_signed_char_clamp(Filter2+3)
-
- vld1.u8 {d6}, [r12]! ;#18
-
- sub r0, r0, r1, lsl #3
- sub r3, r3, r1, lsl #3
-
- vshr.s8 q2, q2, #3 ; Filter1 >>= 3
- vshr.s8 q13, q13, #3 ; Filter2 >>= 3
-
- vmov q10, q15
- vmov q12, q15
-
- vqsub.s8 q7, q7, q2 ; qs0 = vp8_signed_char_clamp(qs0 - Filter1)
-
- vld1.u8 {d5}, [r12]! ;#27
-
- add r0, r0, r1
- add r3, r3, r1
-
- vqadd.s8 q6, q6, q13 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- vbic q1, q1, q14 ; Filter2: q1; vp8_filter &= ~hev; Filter2 = vp8_filter
-
- ; roughly 1/7th difference across boundary
- ; roughly 2/7th difference across boundary
- ; roughly 3/7th difference across boundary
- vmov q11, q15
- vmov q13, q15
- vmov q14, q15
-
- vmlal.s8 q10, d2, d7 ; Filter2 * 9
- vmlal.s8 q11, d3, d7
- vmlal.s8 q12, d2, d6 ; Filter2 * 18
- vmlal.s8 q13, d3, d6
- vmlal.s8 q14, d2, d5 ; Filter2 * 27
- vmlal.s8 q15, d3, d5
- vqshrn.s16 d20, q10, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7)
- vqshrn.s16 d21, q11, #7
- vqshrn.s16 d24, q12, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7)
- vqshrn.s16 d25, q13, #7
- vqshrn.s16 d28, q14, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7)
- vqshrn.s16 d29, q15, #7
-
- vqsub.s8 q11, q9, q10 ; s = vp8_signed_char_clamp(qs2 - u)
- vqadd.s8 q10, q4, q10 ; s = vp8_signed_char_clamp(ps2 + u)
- vqsub.s8 q13, q8, q12 ; s = vp8_signed_char_clamp(qs1 - u)
- vqadd.s8 q12, q5, q12 ; s = vp8_signed_char_clamp(ps1 + u)
- vqsub.s8 q15, q7, q14 ; s = vp8_signed_char_clamp(qs0 - u)
- vqadd.s8 q14, q6, q14 ; s = vp8_signed_char_clamp(ps0 + u)
- veor q9, q11, q0 ; *oq2 = s^0x80
- veor q4, q10, q0 ; *op2 = s^0x80
- veor q8, q13, q0 ; *oq1 = s^0x80
- veor q5, q12, q0 ; *op2 = s^0x80
- veor q7, q15, q0 ; *oq0 = s^0x80
- veor q6, q14, q0 ; *op0 = s^0x80
-
- vst1.u8 {d8}, [r0], r1 ; store u op2
- vst1.u8 {d9}, [r3], r1 ; store v op2
- vst1.u8 {d10}, [r0], r1 ; store u op1
- vst1.u8 {d11}, [r3], r1 ; store v op1
- vst1.u8 {d12}, [r0], r1 ; store u op0
- vst1.u8 {d13}, [r3], r1 ; store v op0
- vst1.u8 {d14}, [r0], r1 ; store u oq0
- vst1.u8 {d15}, [r3], r1 ; store v oq0
- vst1.u8 {d16}, [r0], r1 ; store u oq1
- vst1.u8 {d17}, [r3], r1 ; store v oq1
- vst1.u8 {d18}, [r0], r1 ; store u oq2
- vst1.u8 {d19}, [r3], r1 ; store v oq2
-
- bx lr
- ENDP ; |vp8_mbloop_filter_horizontal_edge_uv_neon|
-
-;-----------------
- AREA mbhloopfilteruv_dat, DATA, READWRITE ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_mbhlfuv_coeff_
- DCD mbhlfuv_coeff
-mbhlfuv_coeff
- DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
- DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
- DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
- DCD 0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
- DCD 0x09090909, 0x09090909, 0x12121212, 0x12121212
- DCD 0x1b1b1b1b, 0x1b1b1b1b
-
- END
diff --git a/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm b/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
deleted file mode 100644
index 2ab0fc240..000000000
--- a/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
+++ /dev/null
@@ -1,236 +0,0 @@
-;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |vp8_mbloop_filter_horizontal_edge_y_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0 unsigned char *s,
-; r1 int p, //pitch
-; r2 const signed char *flimit,
-; r3 const signed char *limit,
-; stack(r4) const signed char *thresh,
-; //stack(r5) int count --unused
-|vp8_mbloop_filter_horizontal_edge_y_neon| PROC
- sub r0, r0, r1, lsl #2 ; move src pointer down by 4 lines
- ldr r12, [sp, #0] ; load thresh pointer
-
- vld1.u8 {q3}, [r0], r1 ; p3
- vld1.s8 {d2[], d3[]}, [r3] ; limit
- vld1.u8 {q4}, [r0], r1 ; p2
- vld1.s8 {d4[], d5[]}, [r12] ; thresh
- vld1.u8 {q5}, [r0], r1 ; p1
- ldr r12, _mbhlfy_coeff_
- vld1.u8 {q6}, [r0], r1 ; p0
-
- ;vp8_filter_mask() function
- ;vp8_hevmask() function
- vabd.u8 q11, q3, q4 ; abs(p3 - p2)
- vld1.u8 {q7}, [r0], r1 ; q0
- vabd.u8 q12, q4, q5 ; abs(p2 - p1)
- vld1.u8 {q8}, [r0], r1 ; q1
- vabd.u8 q13, q5, q6 ; abs(p1 - p0)
- vld1.u8 {q9}, [r0], r1 ; q2
- vabd.u8 q14, q8, q7 ; abs(q1 - q0)
- vld1.u8 {q10}, [r0], r1 ; q3
- vabd.u8 q3, q9, q8 ; abs(q2 - q1)
- vabd.u8 q0, q10, q9 ; abs(q3 - q2)
-
- vcge.u8 q15, q1, q11 ; (abs(p3 - p2) > limit)*-1
- vcge.u8 q12, q1, q12 ; (abs(p2 - p1) > limit)*-1
- vcge.u8 q10, q1, q13 ; (abs(p1 - p0) > limit)*-1
- vcge.u8 q11, q1, q14 ; (abs(q1 - q0) > limit)*-1
- vcge.u8 q3, q1, q3 ; (abs(q2 - q1) > limit)*-1
- vcge.u8 q0, q1, q0 ; (abs(q3 - q2) > limit)*-1
-
- vand q15, q15, q12
-
- vabd.u8 q12, q6, q7 ; abs(p0 - q0)
-
- vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1
- vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1
-
- vld1.s8 {d4[], d5[]}, [r2] ; flimit
-
- vand q10, q10, q11
- vand q3, q3, q0
-
- vld1.u8 {q0}, [r12]!
-
- vadd.u8 q2, q2, q2 ; flimit * 2
- vadd.u8 q2, q2, q1 ; flimit * 2 + limit
-
- vabd.u8 q1, q5, q8 ; abs(p1 - q1)
- vqadd.u8 q12, q12, q12 ; abs(p0 - q0) * 2
- vshr.u8 q1, q1, #1 ; abs(p1 - q1) / 2
- vqadd.u8 q12, q12, q1 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
- vcge.u8 q12, q2, q12 ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1
-
- vand q15, q15, q10
-
- ;vp8_filter() function
- veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value
- veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value
- veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value
- veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value
- veor q4, q4, q0 ; ps2: p2 offset to convert to a signed value
- veor q9, q9, q0 ; qs2: q2 offset to convert to a signed value
-;;;;;;;;;;;;;
- vorr q14, q13, q14 ; q14: vp8_hevmask
-
- ;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0)
- vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
- vsubl.s8 q13, d15, d13
-
- vqsub.s8 q1, q5, q8 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-
- ;vadd.s8 q10, q2, q2 ; 3 * ( qs0 - ps0)
- vadd.s16 q10, q2, q2 ; 3 * ( qs0 - ps0)
- vadd.s16 q11, q13, q13
-
- vand q3, q3, q12
-
- ;vadd.s8 q2, q2, q10
- vadd.s16 q2, q2, q10
- vadd.s16 q13, q13, q11
-
- vld1.u8 {q12}, [r12]! ;#3
-
- ;vqadd.s8 q1, q1, q2 ; vp8_filter + 3 * ( qs0 - ps0)
- vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0)
- vaddw.s8 q13, q13, d3
-
- vand q15, q15, q3 ; q15: vp8_filter_mask
- vld1.u8 {q11}, [r12]! ;#4
-
- vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
- vqmovn.s16 d3, q13
-
-;;;;;;;;;;;;;;
- vand q1, q1, q15 ; vp8_filter &= mask
-
- vld1.u8 {q15}, [r12]! ;#63
- ;
- vand q13, q1, q14 ; Filter2: q13; Filter2 &= hev
-
- vld1.u8 {d7}, [r12]! ;#9
- sub r0, r0, r1, lsl #3
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7
-; vand q2, q13, q12 ; s = Filter2 & 7
-
-; vqadd.s8 q13, q13, q11 ; Filter2 = vp8_signed_char_clamp(Filter2+4)
-; vld1.u8 {d6}, [r12]! ;#18
-
-; add r0, r0, r1
-; add r2, r0, r1
-
-; vshr.s8 q13, q13, #3 ; Filter2 >>= 3
-; vceq.i8 q2, q2, q11 ; s = (s==4)*-1
-
-; add r3, r2, r1
-
-; vqsub.s8 q7, q7, q13 ; qs0 = vp8_signed_char_clamp(qs0 - Filter2)
-; vqadd.s8 q11, q2, q13 ; u = vp8_signed_char_clamp(s + Filter2)
-
-; vld1.u8 {d5}, [r12]! ;#27
-; vmov q10, q15
-; vmov q12, q15
-
-; vqadd.s8 q6, q6, q11 ; ps0 = vp8_signed_char_clamp(ps0 + u)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- vqadd.s8 q2, q13, q11 ; Filter1 = vp8_signed_char_clamp(Filter2+4)
- vqadd.s8 q13, q13, q12 ; Filter2 = vp8_signed_char_clamp(Filter2+3)
-
- vld1.u8 {d6}, [r12]! ;#18
- add r0, r0, r1
- add r2, r0, r1
-
- vshr.s8 q2, q2, #3 ; Filter1 >>= 3
- vshr.s8 q13, q13, #3 ; Filter2 >>= 3
-
- vmov q10, q15
- vmov q12, q15
-
- vqsub.s8 q7, q7, q2 ; qs0 = vp8_signed_char_clamp(qs0 - Filter1)
-
- vld1.u8 {d5}, [r12]! ;#27
- add r3, r2, r1
-
- vqadd.s8 q6, q6, q13 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- vbic q1, q1, q14 ; Filter2: q1; vp8_filter &= ~hev; Filter2 = vp8_filter
-
- ; roughly 1/7th difference across boundary
- ; roughly 2/7th difference across boundary
- ; roughly 3/7th difference across boundary
- vmov q11, q15
- vmov q13, q15
- vmov q14, q15
-
- vmlal.s8 q10, d2, d7 ; Filter2 * 9
- vmlal.s8 q11, d3, d7
- vmlal.s8 q12, d2, d6 ; Filter2 * 18
- vmlal.s8 q13, d3, d6
- vmlal.s8 q14, d2, d5 ; Filter2 * 27
- vmlal.s8 q15, d3, d5
- vqshrn.s16 d20, q10, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7)
- vqshrn.s16 d21, q11, #7
- vqshrn.s16 d24, q12, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7)
- vqshrn.s16 d25, q13, #7
- vqshrn.s16 d28, q14, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7)
- vqshrn.s16 d29, q15, #7
-
- vqsub.s8 q11, q9, q10 ; s = vp8_signed_char_clamp(qs2 - u)
- vqadd.s8 q10, q4, q10 ; s = vp8_signed_char_clamp(ps2 + u)
- vqsub.s8 q13, q8, q12 ; s = vp8_signed_char_clamp(qs1 - u)
- vqadd.s8 q12, q5, q12 ; s = vp8_signed_char_clamp(ps1 + u)
- vqsub.s8 q15, q7, q14 ; s = vp8_signed_char_clamp(qs0 - u)
- vqadd.s8 q14, q6, q14 ; s = vp8_signed_char_clamp(ps0 + u)
- veor q9, q11, q0 ; *oq2 = s^0x80
- veor q4, q10, q0 ; *op2 = s^0x80
- veor q5, q12, q0 ; *op2 = s^0x80
- veor q6, q14, q0 ; *op0 = s^0x80
- veor q8, q13, q0 ; *oq1 = s^0x80
- veor q7, q15, q0 ; *oq0 = s^0x80
-
- vst1.u8 {q4}, [r0] ; store op2
- vst1.u8 {q5}, [r2] ; store op1
- vst1.u8 {q6}, [r3], r1 ; store op0
- add r12, r3, r1
- vst1.u8 {q7}, [r3] ; store oq0
- vst1.u8 {q8}, [r12], r1 ; store oq1
- vst1.u8 {q9}, [r12] ; store oq2
-
- bx lr
- ENDP ; |vp8_mbloop_filter_horizontal_edge_y_neon|
-
-;-----------------
- AREA mbhloopfiltery_dat, DATA, READWRITE ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_mbhlfy_coeff_
- DCD mbhlfy_coeff
-mbhlfy_coeff
- DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
- DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
- DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
- DCD 0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
- DCD 0x09090909, 0x09090909, 0x12121212, 0x12121212
- DCD 0x1b1b1b1b, 0x1b1b1b1b
-
- END
diff --git a/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm b/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
deleted file mode 100644
index ad5afba34..000000000
--- a/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
+++ /dev/null
@@ -1,296 +0,0 @@
-;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |vp8_mbloop_filter_vertical_edge_uv_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0 unsigned char *u,
-; r1 int p, //pitch
-; r2 const signed char *flimit,
-; r3 const signed char *limit,
-; stack(r4) const signed char *thresh,
-; stack(r5) unsigned char *v
-|vp8_mbloop_filter_vertical_edge_uv_neon| PROC
- sub r0, r0, #4 ; move src pointer down by 4 columns
- vld1.s8 {d2[], d3[]}, [r3] ; limit
- ldr r3, [sp, #4] ; load v ptr
- ldr r12, [sp, #0] ; load thresh pointer
-
- sub r3, r3, #4 ; move v pointer down by 4 columns
-
- vld1.u8 {d6}, [r0], r1 ;load u data
- vld1.u8 {d7}, [r3], r1 ;load v data
- vld1.u8 {d8}, [r0], r1
- vld1.u8 {d9}, [r3], r1
- vld1.u8 {d10}, [r0], r1
- vld1.u8 {d11}, [r3], r1
- vld1.u8 {d12}, [r0], r1
- vld1.u8 {d13}, [r3], r1
- vld1.u8 {d14}, [r0], r1
- vld1.u8 {d15}, [r3], r1
- vld1.u8 {d16}, [r0], r1
- vld1.u8 {d17}, [r3], r1
- vld1.u8 {d18}, [r0], r1
- vld1.u8 {d19}, [r3], r1
- vld1.u8 {d20}, [r0], r1
- vld1.u8 {d21}, [r3], r1
-
- ;transpose to 8x16 matrix
- vtrn.32 q3, q7
- vtrn.32 q4, q8
- vtrn.32 q5, q9
- vtrn.32 q6, q10
-
- vtrn.16 q3, q5
- vtrn.16 q4, q6
- vtrn.16 q7, q9
- vtrn.16 q8, q10
-
- vtrn.8 q3, q4
- vtrn.8 q5, q6
- vtrn.8 q7, q8
- vtrn.8 q9, q10
-
- sub sp, sp, #32
- vld1.s8 {d4[], d5[]}, [r12] ; thresh
- vst1.u8 {q3}, [sp]!
- ldr r12, _mbvlfuv_coeff_
- vst1.u8 {q10}, [sp]!
-
- ;vp8_filter_mask() function
- ;vp8_hevmask() function
- vabd.u8 q11, q3, q4 ; abs(p3 - p2)
- vabd.u8 q12, q4, q5 ; abs(p2 - p1)
- vabd.u8 q13, q5, q6 ; abs(p1 - p0)
- vabd.u8 q14, q8, q7 ; abs(q1 - q0)
- vabd.u8 q3, q9, q8 ; abs(q2 - q1)
- vabd.u8 q0, q10, q9 ; abs(q3 - q2)
-
- vcge.u8 q15, q1, q11 ; (abs(p3 - p2) > limit)*-1
- vcge.u8 q12, q1, q12 ; (abs(p2 - p1) > limit)*-1
- vcge.u8 q10, q1, q13 ; (abs(p1 - p0) > limit)*-1
- vcge.u8 q11, q1, q14 ; (abs(q1 - q0) > limit)*-1
- vcge.u8 q3, q1, q3 ; (abs(q2 - q1) > limit)*-1
- vcge.u8 q0, q1, q0 ; (abs(q3 - q2) > limit)*-1
-
- vand q15, q15, q12
-
- vabd.u8 q12, q6, q7 ; abs(p0 - q0)
-
- vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1
- vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1
-
- vld1.s8 {d4[], d5[]}, [r2] ; flimit
-
- vand q10, q10, q11
- vand q3, q3, q0
-
- vld1.u8 {q0}, [r12]!
-
- vadd.u8 q2, q2, q2 ; flimit * 2
- vadd.u8 q2, q2, q1 ; flimit * 2 + limit
-
- vabd.u8 q1, q5, q8 ; abs(p1 - q1)
- vqadd.u8 q12, q12, q12 ; abs(p0 - q0) * 2
- vshr.u8 q1, q1, #1 ; abs(p1 - q1) / 2
- vqadd.u8 q12, q12, q1 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
- vcge.u8 q12, q2, q12 ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1
-
- vand q15, q15, q10
-
- ;vp8_filter() function
- veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value
- veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value
- veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value
- veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value
- veor q4, q4, q0 ; ps2: p2 offset to convert to a signed value
- veor q9, q9, q0 ; qs2: q2 offset to convert to a signed value
-;;;;;;;;;;;;;
- vorr q14, q13, q14 ; q14: vp8_hevmask
-
- ;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0)
- vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
- vsubl.s8 q13, d15, d13
-
- vqsub.s8 q1, q5, q8 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-
- ;vadd.s8 q10, q2, q2 ; 3 * ( qs0 - ps0)
- vadd.s16 q10, q2, q2 ; 3 * ( qs0 - ps0)
- vadd.s16 q11, q13, q13
-
- vand q3, q3, q12
-
- ;vadd.s8 q2, q2, q10
- vadd.s16 q2, q2, q10
- vadd.s16 q13, q13, q11
-
- vld1.u8 {q12}, [r12]! ;#3
-
- ;vqadd.s8 q1, q1, q2 ; vp8_filter + 3 * ( qs0 - ps0)
- vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0)
- vaddw.s8 q13, q13, d3
-
- vand q15, q15, q3 ; q15: vp8_filter_mask
- vld1.u8 {q11}, [r12]! ;#4
-
- vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
- vqmovn.s16 d3, q13
-
-;;;;;;;;;;;;;;
- vand q1, q1, q15 ; vp8_filter &= mask
-
- vld1.u8 {q15}, [r12]! ;#63
- ;
- vand q13, q1, q14 ; Filter2: q13; Filter2 &= hev
-
- vld1.u8 {d7}, [r12]! ;#9
- ;
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7
-; vand q2, q13, q12 ; s = Filter2 & 7
-
-; vqadd.s8 q13, q13, q11 ; Filter2 = vp8_signed_char_clamp(Filter2+4)
-; vld1.u8 {d6}, [r12]! ;#18
-
-; sub r0, r0, r1, lsl #3
-; sub r3, r3, r1, lsl #3
-; sub sp, sp, #32
-
-; vshr.s8 q13, q13, #3 ; Filter2 >>= 3
-; vceq.i8 q2, q2, q11 ; s = (s==4)*-1
-
-; vqsub.s8 q7, q7, q13 ; qs0 = vp8_signed_char_clamp(qs0 - Filter2)
-; vqadd.s8 q11, q2, q13 ; u = vp8_signed_char_clamp(s + Filter2)
-
-; vld1.u8 {d5}, [r12]! ;#27
-; vmov q10, q15
-; vmov q12, q15
-
-; vqadd.s8 q6, q6, q11 ; ps0 = vp8_signed_char_clamp(ps0 + u)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- vqadd.s8 q2, q13, q11 ; Filter1 = vp8_signed_char_clamp(Filter2+4)
- vqadd.s8 q13, q13, q12 ; Filter2 = vp8_signed_char_clamp(Filter2+3)
-
- vld1.u8 {d6}, [r12]! ;#18
-
- sub r0, r0, r1, lsl #3
- sub r3, r3, r1, lsl #3
-
- vshr.s8 q2, q2, #3 ; Filter1 >>= 3
- vshr.s8 q13, q13, #3 ; Filter2 >>= 3
-
- vmov q10, q15
- vmov q12, q15
-
- vqsub.s8 q7, q7, q2 ; qs0 = vp8_signed_char_clamp(qs0 - Filter1)
-
- vld1.u8 {d5}, [r12]! ;#27
-
- sub sp, sp, #32
-
- vqadd.s8 q6, q6, q13 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- vbic q1, q1, q14 ; Filter2: q1; vp8_filter &= ~hev; Filter2 = vp8_filter
-
- ; roughly 1/7th difference across boundary
- ; roughly 2/7th difference across boundary
- ; roughly 3/7th difference across boundary
- vmov q11, q15
- vmov q13, q15
- vmov q14, q15
-
- vmlal.s8 q10, d2, d7 ; Filter2 * 9
- vmlal.s8 q11, d3, d7
- vmlal.s8 q12, d2, d6 ; Filter2 * 18
- vmlal.s8 q13, d3, d6
- vmlal.s8 q14, d2, d5 ; Filter2 * 27
- vmlal.s8 q15, d3, d5
- vqshrn.s16 d20, q10, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7)
- vqshrn.s16 d21, q11, #7
- vqshrn.s16 d24, q12, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7)
- vqshrn.s16 d25, q13, #7
- vqshrn.s16 d28, q14, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7)
- vqshrn.s16 d29, q15, #7
-
- vqsub.s8 q11, q9, q10 ; s = vp8_signed_char_clamp(qs2 - u)
- vqadd.s8 q10, q4, q10 ; s = vp8_signed_char_clamp(ps2 + u)
- vqsub.s8 q13, q8, q12 ; s = vp8_signed_char_clamp(qs1 - u)
- vqadd.s8 q12, q5, q12 ; s = vp8_signed_char_clamp(ps1 + u)
- vqsub.s8 q15, q7, q14 ; s = vp8_signed_char_clamp(qs0 - u)
- vqadd.s8 q14, q6, q14 ; s = vp8_signed_char_clamp(ps0 + u)
- veor q9, q11, q0 ; *oq2 = s^0x80
- veor q4, q10, q0 ; *op2 = s^0x80
- veor q8, q13, q0 ; *oq1 = s^0x80
- veor q5, q12, q0 ; *op2 = s^0x80
- veor q7, q15, q0 ; *oq0 = s^0x80
- vld1.u8 {q3}, [sp]!
- veor q6, q14, q0 ; *op0 = s^0x80
- vld1.u8 {q10}, [sp]!
-
- ;transpose to 16x8 matrix
- vtrn.32 q3, q7
- vtrn.32 q4, q8
- vtrn.32 q5, q9
- vtrn.32 q6, q10
-
- vtrn.16 q3, q5
- vtrn.16 q4, q6
- vtrn.16 q7, q9
- vtrn.16 q8, q10
-
- vtrn.8 q3, q4
- vtrn.8 q5, q6
- vtrn.8 q7, q8
- vtrn.8 q9, q10
-
- ;store op2, op1, op0, oq0, oq1, oq2
- vst1.8 {d6}, [r0], r1
- vst1.8 {d7}, [r3], r1
- vst1.8 {d8}, [r0], r1
- vst1.8 {d9}, [r3], r1
- vst1.8 {d10}, [r0], r1
- vst1.8 {d11}, [r3], r1
- vst1.8 {d12}, [r0], r1
- vst1.8 {d13}, [r3], r1
- vst1.8 {d14}, [r0], r1
- vst1.8 {d15}, [r3], r1
- vst1.8 {d16}, [r0], r1
- vst1.8 {d17}, [r3], r1
- vst1.8 {d18}, [r0], r1
- vst1.8 {d19}, [r3], r1
- vst1.8 {d20}, [r0], r1
- vst1.8 {d21}, [r3], r1
-
- bx lr
- ENDP ; |vp8_mbloop_filter_vertical_edge_uv_neon|
-
-;-----------------
- AREA mbvloopfilteruv_dat, DATA, READWRITE ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_mbvlfuv_coeff_
- DCD mbvlfuv_coeff
-mbvlfuv_coeff
- DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
- DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
- DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
- DCD 0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
- DCD 0x09090909, 0x09090909, 0x12121212, 0x12121212
- DCD 0x1b1b1b1b, 0x1b1b1b1b
-
- END
diff --git a/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm b/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
deleted file mode 100644
index 60e517519..000000000
--- a/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
+++ /dev/null
@@ -1,303 +0,0 @@
-;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |vp8_mbloop_filter_vertical_edge_y_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0 unsigned char *s,
-; r1 int p, //pitch
-; r2 const signed char *flimit,
-; r3 const signed char *limit,
-; stack(r4) const signed char *thresh,
-; //stack(r5) int count --unused
-|vp8_mbloop_filter_vertical_edge_y_neon| PROC
- sub r0, r0, #4 ; move src pointer down by 4 columns
-
- vld1.u8 {d6}, [r0], r1 ; load first 8-line src data
- ldr r12, [sp, #0] ; load thresh pointer
- vld1.u8 {d8}, [r0], r1
- sub sp, sp, #32
- vld1.u8 {d10}, [r0], r1
- vld1.u8 {d12}, [r0], r1
- vld1.u8 {d14}, [r0], r1
- vld1.u8 {d16}, [r0], r1
- vld1.u8 {d18}, [r0], r1
- vld1.u8 {d20}, [r0], r1
-
- vld1.u8 {d7}, [r0], r1 ; load second 8-line src data
- vld1.u8 {d9}, [r0], r1
- vld1.u8 {d11}, [r0], r1
- vld1.u8 {d13}, [r0], r1
- vld1.u8 {d15}, [r0], r1
- vld1.u8 {d17}, [r0], r1
- vld1.u8 {d19}, [r0], r1
- vld1.u8 {d21}, [r0], r1
-
- ;transpose to 8x16 matrix
- vtrn.32 q3, q7
- vtrn.32 q4, q8
- vtrn.32 q5, q9
- vtrn.32 q6, q10
-
- vtrn.16 q3, q5
- vtrn.16 q4, q6
- vtrn.16 q7, q9
- vtrn.16 q8, q10
-
- vtrn.8 q3, q4
- vtrn.8 q5, q6
- vtrn.8 q7, q8
- vtrn.8 q9, q10
-
- vld1.s8 {d2[], d3[]}, [r3] ; limit
- vst1.u8 {q3}, [sp]!
- vld1.s8 {d4[], d5[]}, [r12] ; thresh
- ldr r12, _mbvlfy_coeff_
- vst1.u8 {q10}, [sp]!
-
- ;vp8_filter_mask() function
- ;vp8_hevmask() function
- vabd.u8 q11, q3, q4 ; abs(p3 - p2)
- vabd.u8 q12, q4, q5 ; abs(p2 - p1)
- vabd.u8 q13, q5, q6 ; abs(p1 - p0)
- vabd.u8 q14, q8, q7 ; abs(q1 - q0)
- vabd.u8 q3, q9, q8 ; abs(q2 - q1)
- vabd.u8 q0, q10, q9 ; abs(q3 - q2)
-
- vcge.u8 q15, q1, q11 ; (abs(p3 - p2) > limit)*-1
- vcge.u8 q12, q1, q12 ; (abs(p2 - p1) > limit)*-1
- vcge.u8 q10, q1, q13 ; (abs(p1 - p0) > limit)*-1
- vcge.u8 q11, q1, q14 ; (abs(q1 - q0) > limit)*-1
- vcge.u8 q3, q1, q3 ; (abs(q2 - q1) > limit)*-1
- vcge.u8 q0, q1, q0 ; (abs(q3 - q2) > limit)*-1
-
- vand q15, q15, q12
-
- vabd.u8 q12, q6, q7 ; abs(p0 - q0)
-
- vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1
- vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1
-
- vld1.s8 {d4[], d5[]}, [r2] ; flimit
-
- vand q10, q10, q11
- vand q3, q3, q0
-
- vld1.u8 {q0}, [r12]!
-
- vadd.u8 q2, q2, q2 ; flimit * 2
- vadd.u8 q2, q2, q1 ; flimit * 2 + limit
-
- vabd.u8 q1, q5, q8 ; abs(p1 - q1)
- vqadd.u8 q12, q12, q12 ; abs(p0 - q0) * 2
- vshr.u8 q1, q1, #1 ; abs(p1 - q1) / 2
- vqadd.u8 q12, q12, q1 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
- vcge.u8 q12, q2, q12 ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1
-
- vand q15, q15, q10
-
- ;vp8_filter() function
- veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value
- veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value
- veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value
- veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value
- veor q4, q4, q0 ; ps2: p2 offset to convert to a signed value
- veor q9, q9, q0 ; qs2: q2 offset to convert to a signed value
-;;;;;;;;;;;;;
- vorr q14, q13, q14 ; q14: vp8_hevmask
-
- ;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0)
- vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
- vsubl.s8 q13, d15, d13
-
- vqsub.s8 q1, q5, q8 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-
- ;vadd.s8 q10, q2, q2 ; 3 * ( qs0 - ps0)
- vadd.s16 q10, q2, q2 ; 3 * ( qs0 - ps0)
- vadd.s16 q11, q13, q13
-
- vand q3, q3, q12
-
- ;vadd.s8 q2, q2, q10
- vadd.s16 q2, q2, q10
- vadd.s16 q13, q13, q11
-
- vld1.u8 {q12}, [r12]! ;#3
-
- ;vqadd.s8 q1, q1, q2 ; vp8_filter + 3 * ( qs0 - ps0)
- vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0)
- vaddw.s8 q13, q13, d3
-
- vand q15, q15, q3 ; q15: vp8_filter_mask
- vld1.u8 {q11}, [r12]! ;#4
-
- vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
- vqmovn.s16 d3, q13
-
-;;;;;;;;;;;;;;
- vand q1, q1, q15 ; vp8_filter &= mask
-
- vld1.u8 {q15}, [r12]! ;#63
- ;
- vand q13, q1, q14 ; Filter2: q13; Filter2 &= hev
-
- vld1.u8 {d7}, [r12]! ;#9
- ;
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7
-; vand q2, q13, q12 ; s = Filter2 & 7
-
-; vqadd.s8 q13, q13, q11 ; Filter2 = vp8_signed_char_clamp(Filter2+4)
-; vld1.u8 {d6}, [r12]! ;#18
-
-; sub r0, r0, r1, lsl #4
-; sub sp, sp, #32
-; add r2, r0, r1
-
-; vshr.s8 q13, q13, #3 ; Filter2 >>= 3
-; vceq.i8 q2, q2, q11 ; s = (s==4)*-1
-
-; add r3, r2, r1
-
-; vqsub.s8 q7, q7, q13 ; qs0 = vp8_signed_char_clamp(qs0 - Filter2)
-; vqadd.s8 q11, q2, q13 ; u = vp8_signed_char_clamp(s + Filter2)
-
-; vld1.u8 {d5}, [r12]! ;#27
-; vmov q10, q15
-; vmov q12, q15
-
-; vqadd.s8 q6, q6, q11 ; ps0 = vp8_signed_char_clamp(ps0 + u)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- vqadd.s8 q2, q13, q11 ; Filter1 = vp8_signed_char_clamp(Filter2+4)
- vqadd.s8 q13, q13, q12 ; Filter2 = vp8_signed_char_clamp(Filter2+3)
-
- vld1.u8 {d6}, [r12]! ;#18
- sub r0, r0, r1, lsl #4
- sub sp, sp, #32
-
- add r2, r0, r1
-
- vshr.s8 q2, q2, #3 ; Filter1 >>= 3
- vshr.s8 q13, q13, #3 ; Filter2 >>= 3
-
- vmov q10, q15
- vmov q12, q15
-
- vqsub.s8 q7, q7, q2 ; qs0 = vp8_signed_char_clamp(qs0 - Filter1)
-
- vld1.u8 {d5}, [r12]! ;#27
- add r3, r2, r1
-
- vqadd.s8 q6, q6, q13 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- vbic q1, q1, q14 ; Filter2: q1; vp8_filter &= ~hev; Filter2 = vp8_filter
-
- ; roughly 1/7th difference across boundary
- ; roughly 2/7th difference across boundary
- ; roughly 3/7th difference across boundary
- vmov q11, q15
- vmov q13, q15
- vmov q14, q15
-
- vmlal.s8 q10, d2, d7 ; Filter2 * 9
- vmlal.s8 q11, d3, d7
- vmlal.s8 q12, d2, d6 ; Filter2 * 18
- vmlal.s8 q13, d3, d6
- vmlal.s8 q14, d2, d5 ; Filter2 * 27
- vmlal.s8 q15, d3, d5
- vqshrn.s16 d20, q10, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7)
- vqshrn.s16 d21, q11, #7
- vqshrn.s16 d24, q12, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7)
- vqshrn.s16 d25, q13, #7
- vqshrn.s16 d28, q14, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7)
- vqshrn.s16 d29, q15, #7
-
- vqsub.s8 q11, q9, q10 ; s = vp8_signed_char_clamp(qs2 - u)
- vqadd.s8 q10, q4, q10 ; s = vp8_signed_char_clamp(ps2 + u)
- vqsub.s8 q13, q8, q12 ; s = vp8_signed_char_clamp(qs1 - u)
- vqadd.s8 q12, q5, q12 ; s = vp8_signed_char_clamp(ps1 + u)
- vqsub.s8 q15, q7, q14 ; s = vp8_signed_char_clamp(qs0 - u)
- vqadd.s8 q14, q6, q14 ; s = vp8_signed_char_clamp(ps0 + u)
- veor q9, q11, q0 ; *oq2 = s^0x80
- veor q4, q10, q0 ; *op2 = s^0x80
- veor q8, q13, q0 ; *oq1 = s^0x80
- veor q5, q12, q0 ; *op2 = s^0x80
- veor q7, q15, q0 ; *oq0 = s^0x80
- vld1.u8 {q3}, [sp]!
- veor q6, q14, q0 ; *op0 = s^0x80
- vld1.u8 {q10}, [sp]!
-
- ;transpose to 16x8 matrix
- vtrn.32 q3, q7
- vtrn.32 q4, q8
- vtrn.32 q5, q9
- vtrn.32 q6, q10
- add r12, r3, r1
-
- vtrn.16 q3, q5
- vtrn.16 q4, q6
- vtrn.16 q7, q9
- vtrn.16 q8, q10
-
- vtrn.8 q3, q4
- vtrn.8 q5, q6
- vtrn.8 q7, q8
- vtrn.8 q9, q10
-
- ;store op2, op1, op0, oq0, oq1, oq2
- vst1.8 {d6}, [r0]
- vst1.8 {d8}, [r2]
- vst1.8 {d10}, [r3]
- vst1.8 {d12}, [r12], r1
- add r0, r12, r1
- vst1.8 {d14}, [r12]
- vst1.8 {d16}, [r0], r1
- add r2, r0, r1
- vst1.8 {d18}, [r0]
- vst1.8 {d20}, [r2], r1
- add r3, r2, r1
- vst1.8 {d7}, [r2]
- vst1.8 {d9}, [r3], r1
- add r12, r3, r1
- vst1.8 {d11}, [r3]
- vst1.8 {d13}, [r12], r1
- add r0, r12, r1
- vst1.8 {d15}, [r12]
- vst1.8 {d17}, [r0], r1
- add r2, r0, r1
- vst1.8 {d19}, [r0]
- vst1.8 {d21}, [r2]
-
- bx lr
- ENDP ; |vp8_mbloop_filter_vertical_edge_y_neon|
-
-;-----------------
- AREA mbvloopfiltery_dat, DATA, READWRITE ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_mbvlfy_coeff_
- DCD mbvlfy_coeff
-mbvlfy_coeff
- DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
- DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
- DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
- DCD 0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
- DCD 0x09090909, 0x09090909, 0x12121212, 0x12121212
- DCD 0x1b1b1b1b, 0x1b1b1b1b
-
- END
diff --git a/vp8/common/arm/neon/recon16x16mb_neon.asm b/vp8/common/arm/neon/recon16x16mb_neon.asm
index b9ba1cbc3..3f1a30f48 100644
--- a/vp8/common/arm/neon/recon16x16mb_neon.asm
+++ b/vp8/common/arm/neon/recon16x16mb_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/neon/recon2b_neon.asm b/vp8/common/arm/neon/recon2b_neon.asm
index 25aaf8c8e..99b251c91 100644
--- a/vp8/common/arm/neon/recon2b_neon.asm
+++ b/vp8/common/arm/neon/recon2b_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/neon/recon4b_neon.asm b/vp8/common/arm/neon/recon4b_neon.asm
index a4f5b806b..991727746 100644
--- a/vp8/common/arm/neon/recon4b_neon.asm
+++ b/vp8/common/arm/neon/recon4b_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/neon/recon_neon.c b/vp8/common/arm/neon/recon_neon.c
new file mode 100644
index 000000000..f7930ee5f
--- /dev/null
+++ b/vp8/common/arm/neon/recon_neon.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "recon.h"
+#include "blockd.h"
+
+extern void vp8_recon16x16mb_neon(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int ystride, unsigned char *udst_ptr, unsigned char *vdst_ptr);
+
+void vp8_recon_mb_neon(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
+{
+ unsigned char *pred_ptr = &x->predictor[0];
+ short *diff_ptr = &x->diff[0];
+ unsigned char *dst_ptr = x->dst.y_buffer;
+ unsigned char *udst_ptr = x->dst.u_buffer;
+ unsigned char *vdst_ptr = x->dst.v_buffer;
+ int ystride = x->dst.y_stride;
+ /*int uv_stride = x->dst.uv_stride;*/
+
+ vp8_recon16x16mb_neon(pred_ptr, diff_ptr, dst_ptr, ystride, udst_ptr, vdst_ptr);
+}
diff --git a/vp8/common/arm/neon/reconb_neon.asm b/vp8/common/arm/neon/reconb_neon.asm
index 16d85a0d5..288c0ef01 100644
--- a/vp8/common/arm/neon/reconb_neon.asm
+++ b/vp8/common/arm/neon/reconb_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/neon/save_neon_reg.asm b/vp8/common/arm/neon/save_neon_reg.asm
index 4873e447f..fd7002e7a 100644
--- a/vp8/common/arm/neon/save_neon_reg.asm
+++ b/vp8/common/arm/neon/save_neon_reg.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm b/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm
index 7d06ff908..d7bdbae75 100644
--- a/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm
+++ b/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/neon/shortidct4x4llm_neon.asm b/vp8/common/arm/neon/shortidct4x4llm_neon.asm
index ffecfbfbc..d77a2879e 100644
--- a/vp8/common/arm/neon/shortidct4x4llm_neon.asm
+++ b/vp8/common/arm/neon/shortidct4x4llm_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/neon/sixtappredict16x16_neon.asm b/vp8/common/arm/neon/sixtappredict16x16_neon.asm
index 9f5f0d2ce..e434a709c 100644
--- a/vp8/common/arm/neon/sixtappredict16x16_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict16x16_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/neon/sixtappredict4x4_neon.asm b/vp8/common/arm/neon/sixtappredict4x4_neon.asm
index c23a9dbd1..3d22d775a 100644
--- a/vp8/common/arm/neon/sixtappredict4x4_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict4x4_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/neon/sixtappredict8x4_neon.asm b/vp8/common/arm/neon/sixtappredict8x4_neon.asm
index 18e19f958..1dd6b1b37 100644
--- a/vp8/common/arm/neon/sixtappredict8x4_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict8x4_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/neon/sixtappredict8x8_neon.asm b/vp8/common/arm/neon/sixtappredict8x8_neon.asm
index d27485e6c..37255c758 100644
--- a/vp8/common/arm/neon/sixtappredict8x8_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict8x8_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/arm/recon_arm.c b/vp8/common/arm/recon_arm.c
deleted file mode 100644
index 130059e64..000000000
--- a/vp8/common/arm/recon_arm.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "recon.h"
-#include "blockd.h"
-
-extern void vp8_recon16x16mb_neon(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int ystride, unsigned char *udst_ptr, unsigned char *vdst_ptr);
-
-/*
-void vp8_recon16x16mby(MACROBLOCKD *x)
-{
- int i;
- for(i=0;i<16;i+=4)
- {
- //vp8_recon4b(&x->block[i]);
- BLOCKD *b = &x->block[i];
- vp8_recon4b (b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- }
-}
-*/
-void vp8_recon16x16mby(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
-{
- BLOCKD *b = &x->block[0];
- RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-
- //b = &x->block[4];
- b += 4;
- RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-
- //b = &x->block[8];
- b += 4;
- RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-
- //b = &x->block[12];
- b += 4;
- RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-}
-
-#if HAVE_ARMV7
-void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
-{
- unsigned char *pred_ptr = &x->predictor[0];
- short *diff_ptr = &x->diff[0];
- unsigned char *dst_ptr = x->dst.y_buffer;
- unsigned char *udst_ptr = x->dst.u_buffer;
- unsigned char *vdst_ptr = x->dst.v_buffer;
- int ystride = x->dst.y_stride;
- //int uv_stride = x->dst.uv_stride;
-
- vp8_recon16x16mb_neon(pred_ptr, diff_ptr, dst_ptr, ystride, udst_ptr, vdst_ptr);
-}
-
-#else
-/*
-void vp8_recon16x16mb(MACROBLOCKD *x)
-{
- int i;
-
- for(i=0;i<16;i+=4)
- {
-// vp8_recon4b(&x->block[i]);
- BLOCKD *b = &x->block[i];
- vp8_recon4b (b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-
- }
- for(i=16;i<24;i+=2)
- {
-// vp8_recon2b(&x->block[i]);
- BLOCKD *b = &x->block[i];
- vp8_recon2b (b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- }
-}
-*/
-void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
-{
- BLOCKD *b = &x->block[0];
-
- RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- b += 4;
- RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- b += 4;
- RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- b += 4;
- RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- b += 4;
-
- //b = &x->block[16];
-
- RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- b++;
- b++;
- RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- b++;
- b++;
- RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- b++;
- b++;
- RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-}
-#endif
diff --git a/vp8/common/arm/recon_arm.h b/vp8/common/arm/recon_arm.h
index fd9f85eea..b46b7fc7d 100644
--- a/vp8/common/arm/recon_arm.h
+++ b/vp8/common/arm/recon_arm.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -20,6 +21,7 @@ extern prototype_copy_block(vp8_copy_mem8x8_v6);
extern prototype_copy_block(vp8_copy_mem8x4_v6);
extern prototype_copy_block(vp8_copy_mem16x16_v6);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_recon_recon
#define vp8_recon_recon vp8_recon_b_armv6
@@ -38,6 +40,7 @@ extern prototype_copy_block(vp8_copy_mem16x16_v6);
#undef vp8_recon_copy16x16
#define vp8_recon_copy16x16 vp8_copy_mem16x16_v6
#endif
+#endif
#if HAVE_ARMV7
extern prototype_recon_block(vp8_recon_b_neon);
@@ -48,6 +51,9 @@ extern prototype_copy_block(vp8_copy_mem8x8_neon);
extern prototype_copy_block(vp8_copy_mem8x4_neon);
extern prototype_copy_block(vp8_copy_mem16x16_neon);
+extern prototype_recon_macroblock(vp8_recon_mb_neon);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_recon_recon
#define vp8_recon_recon vp8_recon_b_neon
@@ -65,6 +71,10 @@ extern prototype_copy_block(vp8_copy_mem16x16_neon);
#undef vp8_recon_copy16x16
#define vp8_recon_copy16x16 vp8_copy_mem16x16_neon
+
+#undef vp8_recon_recon_mb
+#define vp8_recon_recon_mb vp8_recon_mb_neon
+#endif
#endif
#endif
diff --git a/vp8/common/arm/reconintra4x4_arm.c b/vp8/common/arm/reconintra4x4_arm.c
deleted file mode 100644
index 334d35236..000000000
--- a/vp8/common/arm/reconintra4x4_arm.c
+++ /dev/null
@@ -1,408 +0,0 @@
-/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "recon.h"
-#include "vpx_mem/vpx_mem.h"
-#include "reconintra.h"
-
-void vp8_predict_intra4x4(BLOCKD *x,
- int b_mode,
- unsigned char *predictor)
-{
- int i, r, c;
-
- unsigned char *Above = *(x->base_dst) + x->dst - x->dst_stride;
- unsigned char Left[4];
- unsigned char top_left = Above[-1];
-
- Left[0] = (*(x->base_dst))[x->dst - 1];
- Left[1] = (*(x->base_dst))[x->dst - 1 + x->dst_stride];
- Left[2] = (*(x->base_dst))[x->dst - 1 + 2 * x->dst_stride];
- Left[3] = (*(x->base_dst))[x->dst - 1 + 3 * x->dst_stride];
-
- switch (b_mode)
- {
- case B_DC_PRED:
- {
- int expected_dc = 0;
-
- for (i = 0; i < 4; i++)
- {
- expected_dc += Above[i];
- expected_dc += Left[i];
- }
-
- expected_dc = (expected_dc + 4) >> 3;
-
- for (r = 0; r < 4; r++)
- {
- for (c = 0; c < 4; c++)
- {
- predictor[c] = expected_dc;
- }
-
- predictor += 16;
- }
- }
- break;
- case B_TM_PRED:
- {
- // prediction similar to true_motion prediction
- for (r = 0; r < 4; r++)
- {
- for (c = 0; c < 4; c++)
- {
- int pred = Above[c] - top_left + Left[r];
-
- if (pred < 0)
- pred = 0;
-
- if (pred > 255)
- pred = 255;
-
- predictor[c] = pred;
- }
-
- predictor += 16;
- }
- }
- break;
-
- case B_VE_PRED:
- {
-
- unsigned int ap[4];
- ap[0] = (top_left + 2 * Above[0] + Above[1] + 2) >> 2;
- ap[1] = (Above[0] + 2 * Above[1] + Above[2] + 2) >> 2;
- ap[2] = (Above[1] + 2 * Above[2] + Above[3] + 2) >> 2;
- ap[3] = (Above[2] + 2 * Above[3] + Above[4] + 2) >> 2;
-
- for (r = 0; r < 4; r++)
- {
- for (c = 0; c < 4; c++)
- {
-
- predictor[c] = ap[c];
- }
-
- predictor += 16;
- }
-
- }
- break;
-
-
- case B_HE_PRED:
- {
-
- unsigned int lp[4];
- lp[0] = (top_left + 2 * Left[0] + Left[1] + 2) >> 2;
- lp[1] = (Left[0] + 2 * Left[1] + Left[2] + 2) >> 2;
- lp[2] = (Left[1] + 2 * Left[2] + Left[3] + 2) >> 2;
- lp[3] = (Left[2] + 2 * Left[3] + Left[3] + 2) >> 2;
-
- for (r = 0; r < 4; r++)
- {
- for (c = 0; c < 4; c++)
- {
- predictor[c] = lp[r];
- }
-
- predictor += 16;
- }
- }
- break;
- case B_LD_PRED:
- {
- unsigned char *ptr = Above;
- predictor[0 * 16 + 0] = (ptr[0] + ptr[1] * 2 + ptr[2] + 2) >> 2;
- predictor[0 * 16 + 1] =
- predictor[1 * 16 + 0] = (ptr[1] + ptr[2] * 2 + ptr[3] + 2) >> 2;
- predictor[0 * 16 + 2] =
- predictor[1 * 16 + 1] =
- predictor[2 * 16 + 0] = (ptr[2] + ptr[3] * 2 + ptr[4] + 2) >> 2;
- predictor[0 * 16 + 3] =
- predictor[1 * 16 + 2] =
- predictor[2 * 16 + 1] =
- predictor[3 * 16 + 0] = (ptr[3] + ptr[4] * 2 + ptr[5] + 2) >> 2;
- predictor[1 * 16 + 3] =
- predictor[2 * 16 + 2] =
- predictor[3 * 16 + 1] = (ptr[4] + ptr[5] * 2 + ptr[6] + 2) >> 2;
- predictor[2 * 16 + 3] =
- predictor[3 * 16 + 2] = (ptr[5] + ptr[6] * 2 + ptr[7] + 2) >> 2;
- predictor[3 * 16 + 3] = (ptr[6] + ptr[7] * 2 + ptr[7] + 2) >> 2;
-
- }
- break;
- case B_RD_PRED:
- {
-
- unsigned char pp[9];
-
- pp[0] = Left[3];
- pp[1] = Left[2];
- pp[2] = Left[1];
- pp[3] = Left[0];
- pp[4] = top_left;
- pp[5] = Above[0];
- pp[6] = Above[1];
- pp[7] = Above[2];
- pp[8] = Above[3];
-
- predictor[3 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
- predictor[3 * 16 + 1] =
- predictor[2 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
- predictor[3 * 16 + 2] =
- predictor[2 * 16 + 1] =
- predictor[1 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
- predictor[3 * 16 + 3] =
- predictor[2 * 16 + 2] =
- predictor[1 * 16 + 1] =
- predictor[0 * 16 + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
- predictor[2 * 16 + 3] =
- predictor[1 * 16 + 2] =
- predictor[0 * 16 + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
- predictor[1 * 16 + 3] =
- predictor[0 * 16 + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
- predictor[0 * 16 + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2;
-
- }
- break;
- case B_VR_PRED:
- {
-
- unsigned char pp[9];
-
- pp[0] = Left[3];
- pp[1] = Left[2];
- pp[2] = Left[1];
- pp[3] = Left[0];
- pp[4] = top_left;
- pp[5] = Above[0];
- pp[6] = Above[1];
- pp[7] = Above[2];
- pp[8] = Above[3];
-
-
- predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
- predictor[2 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
- predictor[3 * 16 + 1] =
- predictor[1 * 16 + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
- predictor[2 * 16 + 1] =
- predictor[0 * 16 + 0] = (pp[4] + pp[5] + 1) >> 1;
- predictor[3 * 16 + 2] =
- predictor[1 * 16 + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
- predictor[2 * 16 + 2] =
- predictor[0 * 16 + 1] = (pp[5] + pp[6] + 1) >> 1;
- predictor[3 * 16 + 3] =
- predictor[1 * 16 + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
- predictor[2 * 16 + 3] =
- predictor[0 * 16 + 2] = (pp[6] + pp[7] + 1) >> 1;
- predictor[1 * 16 + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2;
- predictor[0 * 16 + 3] = (pp[7] + pp[8] + 1) >> 1;
-
- }
- break;
- case B_VL_PRED:
- {
-
- unsigned char *pp = Above;
-
- predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1;
- predictor[1 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
- predictor[2 * 16 + 0] =
- predictor[0 * 16 + 1] = (pp[1] + pp[2] + 1) >> 1;
- predictor[1 * 16 + 1] =
- predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
- predictor[2 * 16 + 1] =
- predictor[0 * 16 + 2] = (pp[2] + pp[3] + 1) >> 1;
- predictor[3 * 16 + 1] =
- predictor[1 * 16 + 2] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
- predictor[0 * 16 + 3] =
- predictor[2 * 16 + 2] = (pp[3] + pp[4] + 1) >> 1;
- predictor[1 * 16 + 3] =
- predictor[3 * 16 + 2] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
- predictor[2 * 16 + 3] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
- predictor[3 * 16 + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
- }
- break;
-
- case B_HD_PRED:
- {
- unsigned char pp[9];
- pp[0] = Left[3];
- pp[1] = Left[2];
- pp[2] = Left[1];
- pp[3] = Left[0];
- pp[4] = top_left;
- pp[5] = Above[0];
- pp[6] = Above[1];
- pp[7] = Above[2];
- pp[8] = Above[3];
-
-
- predictor[3 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1;
- predictor[3 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
- predictor[2 * 16 + 0] =
- predictor[3 * 16 + 2] = (pp[1] + pp[2] + 1) >> 1;
- predictor[2 * 16 + 1] =
- predictor[3 * 16 + 3] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
- predictor[2 * 16 + 2] =
- predictor[1 * 16 + 0] = (pp[2] + pp[3] + 1) >> 1;
- predictor[2 * 16 + 3] =
- predictor[1 * 16 + 1] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
- predictor[1 * 16 + 2] =
- predictor[0 * 16 + 0] = (pp[3] + pp[4] + 1) >> 1;
- predictor[1 * 16 + 3] =
- predictor[0 * 16 + 1] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
- predictor[0 * 16 + 2] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
- predictor[0 * 16 + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
- }
- break;
-
-
- case B_HU_PRED:
- {
- unsigned char *pp = Left;
- predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1;
- predictor[0 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
- predictor[0 * 16 + 2] =
- predictor[1 * 16 + 0] = (pp[1] + pp[2] + 1) >> 1;
- predictor[0 * 16 + 3] =
- predictor[1 * 16 + 1] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
- predictor[1 * 16 + 2] =
- predictor[2 * 16 + 0] = (pp[2] + pp[3] + 1) >> 1;
- predictor[1 * 16 + 3] =
- predictor[2 * 16 + 1] = (pp[2] + pp[3] * 2 + pp[3] + 2) >> 2;
- predictor[2 * 16 + 2] =
- predictor[2 * 16 + 3] =
- predictor[3 * 16 + 0] =
- predictor[3 * 16 + 1] =
- predictor[3 * 16 + 2] =
- predictor[3 * 16 + 3] = pp[3];
- }
- break;
-
-
- }
-}
-// copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and
-// to the right prediction have filled in pixels to use.
-void vp8_intra_prediction_down_copy(MACROBLOCKD *x)
-{
- unsigned char *above_right = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16;
-
- unsigned int *src_ptr = (unsigned int *)above_right;
- unsigned int *dst_ptr0 = (unsigned int *)(above_right + 4 * x->block[0].dst_stride);
- unsigned int *dst_ptr1 = (unsigned int *)(above_right + 8 * x->block[0].dst_stride);
- unsigned int *dst_ptr2 = (unsigned int *)(above_right + 12 * x->block[0].dst_stride);
-
- *dst_ptr0 = *src_ptr;
- *dst_ptr1 = *src_ptr;
- *dst_ptr2 = *src_ptr;
-}
-
-
-
-/*
-void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
-{
- int i;
-
- vp8_intra_prediction_down_copy(x);
-
- for(i=0;i<16;i++)
- {
- BLOCKD *b = &x->block[i];
-
- vp8_predict_intra4x4(b, x->block[i].bmi.mode,x->block[i].predictor);
- RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- }
-
- vp8_recon_intra_mbuv(x);
-
-}
-*/
-void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
-{
- int i;
- BLOCKD *b = &x->block[0];
-
- vp8_intra_prediction_down_copy(x);
-
- {
- vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
- RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- b += 1;
-
- vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
- RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- b += 1;
-
- vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
- RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- b += 1;
-
- vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
- RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- b += 1;
-
- vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
- RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- b += 1;
-
- vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
- RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- b += 1;
-
- vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
- RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- b += 1;
-
- vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
- RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- b += 1;
-
- vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
- RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- b += 1;
-
- vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
- RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- b += 1;
-
- vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
- RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- b += 1;
-
- vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
- RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- b += 1;
-
- vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
- RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- b += 1;
-
- vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
- RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- b += 1;
-
- vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
- RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- b += 1;
-
- vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
- RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
- }
-
- vp8_recon_intra_mbuv(rtcd, x);
-
-}
diff --git a/vp8/common/arm/reconintra_arm.c b/vp8/common/arm/reconintra_arm.c
index d7ee1ddfa..4cc93d134 100644
--- a/vp8/common/arm/reconintra_arm.c
+++ b/vp8/common/arm/reconintra_arm.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -28,7 +29,7 @@ void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x)
unsigned char *y_buffer = x->dst.y_buffer;
unsigned char *ypred_ptr = x->predictor;
int y_stride = x->dst.y_stride;
- int mode = x->mbmi.mode;
+ int mode = x->mode_info_context->mbmi.mode;
int Up = x->up_available;
int Left = x->left_available;
@@ -51,7 +52,7 @@ void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x)
unsigned char *y_buffer = x->dst.y_buffer;
unsigned char *ypred_ptr = x->predictor;
int y_stride = x->dst.y_stride;
- int mode = x->mbmi.mode;
+ int mode = x->mode_info_context->mbmi.mode;
int Up = x->up_available;
int Left = x->left_available;
diff --git a/vp8/common/arm/subpixel_arm.h b/vp8/common/arm/subpixel_arm.h
index 56aec55b9..6288538d0 100644
--- a/vp8/common/arm/subpixel_arm.h
+++ b/vp8/common/arm/subpixel_arm.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -21,6 +22,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_armv6);
extern prototype_subpixel_predict(vp8_bilinear_predict8x4_armv6);
extern prototype_subpixel_predict(vp8_bilinear_predict4x4_armv6);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_subpix_sixtap16x16
#define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_armv6
@@ -45,6 +47,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict4x4_armv6);
#undef vp8_subpix_bilinear4x4
#define vp8_subpix_bilinear4x4 vp8_bilinear_predict4x4_armv6
#endif
+#endif
#if HAVE_ARMV7
extern prototype_subpixel_predict(vp8_sixtap_predict16x16_neon);
@@ -56,6 +59,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_neon);
extern prototype_subpixel_predict(vp8_bilinear_predict8x4_neon);
extern prototype_subpixel_predict(vp8_bilinear_predict4x4_neon);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_subpix_sixtap16x16
#define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_neon
@@ -80,5 +84,6 @@ extern prototype_subpixel_predict(vp8_bilinear_predict4x4_neon);
#undef vp8_subpix_bilinear4x4
#define vp8_subpix_bilinear4x4 vp8_bilinear_predict4x4_neon
#endif
+#endif
#endif
diff --git a/vp8/common/arm/systemdependent.c b/vp8/common/arm/systemdependent.c
deleted file mode 100644
index ecc6929c0..000000000
--- a/vp8/common/arm/systemdependent.c
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "g_common.h"
-#include "pragmas.h"
-#include "subpixel.h"
-#include "loopfilter.h"
-#include "recon.h"
-#include "idct.h"
-#include "onyxc_int.h"
-
-void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x);
-
-void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x);
-
-void vp8_machine_specific_config(VP8_COMMON *ctx)
-{
-#if CONFIG_RUNTIME_CPU_DETECT
- VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
-
-#if HAVE_ARMV7
- rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_neon;
- rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_neon;
- rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_neon;
- rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_neon;
- rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_neon;
- rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_neon;
- rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_neon;
- rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_neon;
-
- rtcd->idct.idct1 = vp8_short_idct4x4llm_1_neon;
- rtcd->idct.idct16 = vp8_short_idct4x4llm_neon;
- rtcd->idct.idct1_scalar = vp8_dc_only_idct_neon;
- rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_neon;
- rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_neon;
-
- rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_neon;
- rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_neon;
- rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_neon;
- rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_neon;
- rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_neon;
- rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_neon;
- rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_neon;
- rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_neon;
-
- rtcd->recon.copy16x16 = vp8_copy_mem16x16_neon;
- rtcd->recon.copy8x8 = vp8_copy_mem8x8_neon;
- rtcd->recon.copy8x4 = vp8_copy_mem8x4_neon;
- rtcd->recon.recon = vp8_recon_b_neon;
- rtcd->recon.recon2 = vp8_recon2b_neon;
- rtcd->recon.recon4 = vp8_recon4b_neon;
-#elif HAVE_ARMV6
-
- rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_armv6;
- rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_armv6;
- rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_armv6;
- rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_armv6;
- rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_armv6;
- rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_armv6;
- rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_armv6;
- rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_armv6;
-
- rtcd->idct.idct1 = vp8_short_idct4x4llm_1_v6;
- rtcd->idct.idct16 = vp8_short_idct4x4llm_v6_dual;
- rtcd->idct.idct1_scalar = vp8_dc_only_idct_armv6;
- rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_armv6;
- rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_armv6;
-
- rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_armv6;
- rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_armv6;
- rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
- rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_armv6;
- rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6;
- rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_armv6;
- rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6;
- rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_armv6;
-
- rtcd->recon.copy16x16 = vp8_copy_mem16x16_v6;
- rtcd->recon.copy8x8 = vp8_copy_mem8x8_v6;
- rtcd->recon.copy8x4 = vp8_copy_mem8x4_v6;
- rtcd->recon.recon = vp8_recon_b_armv6;
- rtcd->recon.recon2 = vp8_recon2b_armv6;
- rtcd->recon.recon4 = vp8_recon4b_armv6;
-#else
-//pure c
- rtcd->idct.idct1 = vp8_short_idct4x4llm_1_c;
- rtcd->idct.idct16 = vp8_short_idct4x4llm_c;
- rtcd->idct.idct1_scalar = vp8_dc_only_idct_c;
- rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_c;
- rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_c;
-
- rtcd->recon.copy16x16 = vp8_copy_mem16x16_c;
- rtcd->recon.copy8x8 = vp8_copy_mem8x8_c;
- rtcd->recon.copy8x4 = vp8_copy_mem8x4_c;
- rtcd->recon.recon = vp8_recon_b_c;
- rtcd->recon.recon2 = vp8_recon2b_c;
- rtcd->recon.recon4 = vp8_recon4b_c;
-
- rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_c;
- rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_c;
- rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_c;
- rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_c;
- rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_c;
- rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_c;
- rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_c;
- rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_c;
-
- rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_c;
- rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_c;
- rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_c;
- rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_c;
- rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_c;
- rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_c;
- rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_c;
- rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_c;
-#endif
-
- rtcd->postproc.down = vp8_mbpost_proc_down_c;
- rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;
- rtcd->postproc.downacross = vp8_post_proc_down_and_across_c;
- rtcd->postproc.addnoise = vp8_plane_add_noise_c;
-#endif
-
-#if HAVE_ARMV7
- vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby_neon;
- vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s_neon;
-#elif HAVE_ARMV6
- vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
- vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
-#else
- vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
- vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
-
-#endif
-
-}
diff --git a/vp8/common/arm/vpx_asm_offsets.c b/vp8/common/arm/vpx_asm_offsets.c
index 68634bf55..5baf8ccf5 100644
--- a/vp8/common/arm/vpx_asm_offsets.c
+++ b/vp8/common/arm/vpx_asm_offsets.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -31,55 +32,50 @@
*/
#if CONFIG_VP8_DECODER || CONFIG_VP8_ENCODER
-DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width));
-DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height));
-DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride));
-DEFINE(yv12_buffer_config_uv_width, offsetof(YV12_BUFFER_CONFIG, uv_width));
-DEFINE(yv12_buffer_config_uv_height, offsetof(YV12_BUFFER_CONFIG, uv_height));
-DEFINE(yv12_buffer_config_uv_stride, offsetof(YV12_BUFFER_CONFIG, uv_stride));
-DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_buffer));
-DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer));
-DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer));
+DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width));
+DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height));
+DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride));
+DEFINE(yv12_buffer_config_uv_width, offsetof(YV12_BUFFER_CONFIG, uv_width));
+DEFINE(yv12_buffer_config_uv_height, offsetof(YV12_BUFFER_CONFIG, uv_height));
+DEFINE(yv12_buffer_config_uv_stride, offsetof(YV12_BUFFER_CONFIG, uv_stride));
+DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_buffer));
+DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer));
+DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer));
DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border));
#endif
#if CONFIG_VP8_DECODER
DEFINE(mb_diff, offsetof(MACROBLOCKD, diff));
DEFINE(mb_predictor, offsetof(MACROBLOCKD, predictor));
-DEFINE(mb_dst_y_stride, offsetof(MACROBLOCKD, dst.y_stride));
-DEFINE(mb_dst_y_buffer, offsetof(MACROBLOCKD, dst.y_buffer));
-DEFINE(mb_dst_u_buffer, offsetof(MACROBLOCKD, dst.u_buffer));
-DEFINE(mb_dst_v_buffer, offsetof(MACROBLOCKD, dst.v_buffer));
-DEFINE(mb_mbmi_mode, offsetof(MACROBLOCKD, mbmi.mode));
-DEFINE(mb_up_available, offsetof(MACROBLOCKD, up_available));
-DEFINE(mb_left_available, offsetof(MACROBLOCKD, left_available));
+DEFINE(mb_dst_y_stride, offsetof(MACROBLOCKD, dst.y_stride));
+DEFINE(mb_dst_y_buffer, offsetof(MACROBLOCKD, dst.y_buffer));
+DEFINE(mb_dst_u_buffer, offsetof(MACROBLOCKD, dst.u_buffer));
+DEFINE(mb_dst_v_buffer, offsetof(MACROBLOCKD, dst.v_buffer));
+DEFINE(mb_up_available, offsetof(MACROBLOCKD, up_available));
+DEFINE(mb_left_available, offsetof(MACROBLOCKD, left_available));
DEFINE(detok_scan, offsetof(DETOK, scan));
-DEFINE(detok_ptr_onyxblock2context_leftabove, offsetof(DETOK, ptr_onyxblock2context_leftabove));
-DEFINE(detok_onyx_coef_tree_ptr, offsetof(DETOK, vp8_coef_tree_ptr));
-DEFINE(detok_teb_base_ptr, offsetof(DETOK, teb_base_ptr));
-DEFINE(detok_norm_ptr, offsetof(DETOK, norm_ptr));
-DEFINE(detok_ptr_onyx_coef_bands_x, offsetof(DETOK, ptr_onyx_coef_bands_x));
+DEFINE(detok_ptr_block2leftabove, offsetof(DETOK, ptr_block2leftabove));
+DEFINE(detok_coef_tree_ptr, offsetof(DETOK, vp8_coef_tree_ptr));
+DEFINE(detok_teb_base_ptr, offsetof(DETOK, teb_base_ptr));
+DEFINE(detok_norm_ptr, offsetof(DETOK, norm_ptr));
+DEFINE(detok_ptr_coef_bands_x, offsetof(DETOK, ptr_coef_bands_x));
-DEFINE(DETOK_A, offsetof(DETOK, A));
-DEFINE(DETOK_L, offsetof(DETOK, L));
+DEFINE(detok_A, offsetof(DETOK, A));
+DEFINE(detok_L, offsetof(DETOK, L));
-DEFINE(detok_qcoeff_start_ptr, offsetof(DETOK, qcoeff_start_ptr));
-DEFINE(detok_current_bc, offsetof(DETOK, current_bc));
-DEFINE(detok_coef_probs, offsetof(DETOK, coef_probs));
+DEFINE(detok_qcoeff_start_ptr, offsetof(DETOK, qcoeff_start_ptr));
+DEFINE(detok_current_bc, offsetof(DETOK, current_bc));
+DEFINE(detok_coef_probs, offsetof(DETOK, coef_probs));
DEFINE(detok_eob, offsetof(DETOK, eob));
-DEFINE(bool_decoder_lowvalue, offsetof(BOOL_DECODER, lowvalue));
-DEFINE(bool_decoder_range, offsetof(BOOL_DECODER, range));
+DEFINE(bool_decoder_user_buffer_end, offsetof(BOOL_DECODER, user_buffer_end));
+DEFINE(bool_decoder_user_buffer, offsetof(BOOL_DECODER, user_buffer));
DEFINE(bool_decoder_value, offsetof(BOOL_DECODER, value));
DEFINE(bool_decoder_count, offsetof(BOOL_DECODER, count));
-DEFINE(bool_decoder_user_buffer, offsetof(BOOL_DECODER, user_buffer));
-DEFINE(bool_decoder_user_buffer_sz, offsetof(BOOL_DECODER, user_buffer_sz));
-DEFINE(bool_decoder_decode_buffer, offsetof(BOOL_DECODER, decode_buffer));
-DEFINE(bool_decoder_read_ptr, offsetof(BOOL_DECODER, read_ptr));
-DEFINE(bool_decoder_write_ptr, offsetof(BOOL_DECODER, write_ptr));
+DEFINE(bool_decoder_range, offsetof(BOOL_DECODER, range));
-DEFINE(tokenextrabits_min_val, offsetof(TOKENEXTRABITS, min_val));
+DEFINE(tokenextrabits_min_val, offsetof(TOKENEXTRABITS, min_val));
DEFINE(tokenextrabits_length, offsetof(TOKENEXTRABITS, Length));
#endif
diff --git a/vp8/common/bigend.h b/vp8/common/bigend.h
index 6a91ba1ae..6ac3f8b5a 100644
--- a/vp8/common/bigend.h
+++ b/vp8/common/bigend.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/blockd.c b/vp8/common/blockd.c
index 53f5e72d2..7f75a72c5 100644
--- a/vp8/common/blockd.c
+++ b/vp8/common/blockd.c
@@ -1,23 +1,24 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
#include "blockd.h"
#include "vpx_mem/vpx_mem.h"
-void vp8_setup_temp_context(TEMP_CONTEXT *t, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int count)
-{
- vpx_memcpy(t->l, l, sizeof(ENTROPY_CONTEXT) * count);
- vpx_memcpy(t->a, a, sizeof(ENTROPY_CONTEXT) * count);
-}
-
-const int vp8_block2left[25] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 1, 1, 0, 0, 1, 1, 0};
-const int vp8_block2above[25] = { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0};
const int vp8_block2type[25] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 1};
-const int vp8_block2context[25] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3};
+
+const unsigned char vp8_block2left[25] =
+{
+ 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
+};
+const unsigned char vp8_block2above[25] =
+{
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
+};
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 84ed53ad2..a38f0b72b 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -23,7 +24,7 @@ void vpx_log(const char *format, ...);
#define TRUE 1
#define FALSE 0
-//#define DCPRED 1
+/*#define DCPRED 1*/
#define DCPREDSIMTHRESH 0
#define DCPREDCNTTHRESH 3
@@ -38,7 +39,7 @@ void vpx_log(const char *format, ...);
#define MAX_REF_LF_DELTAS 4
#define MAX_MODE_LF_DELTAS 4
-// Segment Feature Masks
+/* Segment Feature Masks */
#define SEGMENT_DELTADATA 0
#define SEGMENT_ABSDATA 1
@@ -48,19 +49,19 @@ typedef struct
} POS;
-typedef int ENTROPY_CONTEXT;
-
+typedef char ENTROPY_CONTEXT;
typedef struct
{
- ENTROPY_CONTEXT l[4];
- ENTROPY_CONTEXT a[4];
-} TEMP_CONTEXT;
+ ENTROPY_CONTEXT y1[4];
+ ENTROPY_CONTEXT u[2];
+ ENTROPY_CONTEXT v[2];
+ ENTROPY_CONTEXT y2;
+} ENTROPY_CONTEXT_PLANES;
-extern void vp8_setup_temp_context(TEMP_CONTEXT *t, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int count);
-extern const int vp8_block2left[25];
-extern const int vp8_block2above[25];
extern const int vp8_block2type[25];
-extern const int vp8_block2context[25];
+
+extern const unsigned char vp8_block2left[25];
+extern const unsigned char vp8_block2above[25];
#define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \
Dest = ((A)!=0) + ((B)!=0);
@@ -74,11 +75,11 @@ typedef enum
typedef enum
{
- DC_PRED, // average of above and left pixels
- V_PRED, // vertical prediction
- H_PRED, // horizontal prediction
- TM_PRED, // Truemotion prediction
- B_PRED, // block based prediction, each block has its own prediction mode
+ DC_PRED, /* average of above and left pixels */
+ V_PRED, /* vertical prediction */
+ H_PRED, /* horizontal prediction */
+ TM_PRED, /* Truemotion prediction */
+ B_PRED, /* block based prediction, each block has its own prediction mode */
NEARESTMV,
NEARMV,
@@ -89,16 +90,16 @@ typedef enum
MB_MODE_COUNT
} MB_PREDICTION_MODE;
-// Macroblock level features
+/* Macroblock level features */
typedef enum
{
- MB_LVL_ALT_Q = 0, // Use alternate Quantizer ....
- MB_LVL_ALT_LF = 1, // Use alternate loop filter value...
- MB_LVL_MAX = 2, // Number of MB level features supported
+ MB_LVL_ALT_Q = 0, /* Use alternate Quantizer .... */
+ MB_LVL_ALT_LF = 1, /* Use alternate loop filter value... */
+ MB_LVL_MAX = 2 /* Number of MB level features supported */
} MB_LVL_FEATURES;
-// Segment Feature Masks
+/* Segment Feature Masks */
#define SEGMENT_ALTQ 0x01
#define SEGMENT_ALT_LF 0x02
@@ -109,11 +110,11 @@ typedef enum
typedef enum
{
- B_DC_PRED, // average of above and left pixels
+ B_DC_PRED, /* average of above and left pixels */
B_TM_PRED,
- B_VE_PRED, // vertical prediction
- B_HE_PRED, // horizontal prediction
+ B_VE_PRED, /* vertical prediction */
+ B_HE_PRED, /* horizontal prediction */
B_LD_PRED,
B_RD_PRED,
@@ -167,15 +168,15 @@ typedef struct
int as_int;
MV as_mv;
} mv;
- int partitioning;
- int partition_count;
- int mb_skip_coeff; //does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens
- int dc_diff;
- unsigned char segment_id; // Which set of segmentation parameters should be used for this MB
- int force_no_skip;
- B_MODE_INFO partition_bmi[16];
+ unsigned char partitioning;
+ unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
+ unsigned char dc_diff;
+ unsigned char need_to_clamp_mvs;
+ unsigned char segment_id; /* Which set of segmentation parameters should be used for this MB */
+
+ unsigned char force_no_skip; /* encoder only */
} MB_MODE_INFO;
@@ -194,9 +195,9 @@ typedef struct
short *diff;
short *reference;
- short(*dequant)[4];
+ short *dequant;
- // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
+ /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */
unsigned char **base_pre;
int pre;
int pre_stride;
@@ -213,71 +214,65 @@ typedef struct
typedef struct
{
- DECLARE_ALIGNED(16, short, diff[400]); // from idct diff
+ DECLARE_ALIGNED(16, short, diff[400]); /* from idct diff */
DECLARE_ALIGNED(16, unsigned char, predictor[384]);
- DECLARE_ALIGNED(16, short, reference[384]);
+/* not used DECLARE_ALIGNED(16, short, reference[384]); */
DECLARE_ALIGNED(16, short, qcoeff[400]);
DECLARE_ALIGNED(16, short, dqcoeff[400]);
+ DECLARE_ALIGNED(16, char, eobs[25]);
- // 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries.
+ /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
BLOCKD block[25];
- YV12_BUFFER_CONFIG pre; // Filtered copy of previous frame reconstruction
+ YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
YV12_BUFFER_CONFIG dst;
MODE_INFO *mode_info_context;
- MODE_INFO *mode_info;
-
int mode_info_stride;
FRAME_TYPE frame_type;
- MB_MODE_INFO mbmi;
-
int up_available;
int left_available;
- // Y,U,V,Y2
- ENTROPY_CONTEXT *above_context[4]; // row of context for each plane
- ENTROPY_CONTEXT(*left_context)[4]; // (up to) 4 contexts ""
+ /* Y,U,V,Y2 */
+ ENTROPY_CONTEXT_PLANES *above_context;
+ ENTROPY_CONTEXT_PLANES *left_context;
- // 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active.
+ /* 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. */
unsigned char segmentation_enabled;
- // 0 (do not update) 1 (update) the macroblock segmentation map.
+ /* 0 (do not update) 1 (update) the macroblock segmentation map. */
unsigned char update_mb_segmentation_map;
- // 0 (do not update) 1 (update) the macroblock segmentation feature data.
+ /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
unsigned char update_mb_segmentation_data;
- // 0 (do not update) 1 (update) the macroblock segmentation feature data.
+ /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
unsigned char mb_segement_abs_delta;
- // Per frame flags that define which MB level features (such as quantizer or loop filter level)
- // are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO
- vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; // Probability Tree used to code Segment number
+ /* Per frame flags that define which MB level features (such as quantizer or loop filter level) */
+ /* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */
+ vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; /* Probability Tree used to code Segment number */
- signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; // Segment parameters
+ signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; /* Segment parameters */
- // mode_based Loop filter adjustment
+ /* mode_based Loop filter adjustment */
unsigned char mode_ref_lf_delta_enabled;
unsigned char mode_ref_lf_delta_update;
- // Delta values have the range +/- MAX_LOOP_FILTER
- //char ref_lf_deltas[MAX_REF_LF_DELTAS]; // 0 = Intra, Last, GF, ARF
- //char mode_lf_deltas[MAX_MODE_LF_DELTAS]; // 0 = BPRED, ZERO_MV, MV, SPLIT
- signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; // 0 = Intra, Last, GF, ARF
- signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; // 0 = BPRED, ZERO_MV, MV, SPLIT
+ /* Delta values have the range +/- MAX_LOOP_FILTER */
+ signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
+ signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
+ signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
+ signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
- // Distance of MB away from frame edges
+ /* Distance of MB away from frame edges */
int mb_to_left_edge;
int mb_to_right_edge;
int mb_to_top_edge;
int mb_to_bottom_edge;
- //char * gf_active_ptr;
- signed char *gf_active_ptr;
-
unsigned int frames_since_golden;
unsigned int frames_till_alt_ref_frame;
vp8_subpix_fn_t subpixel_predict;
diff --git a/vp8/common/boolcoder.h b/vp8/common/boolcoder.h
index 0659d4873..5658868a6 100644
--- a/vp8/common/boolcoder.h
+++ b/vp8/common/boolcoder.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/codec_common_interface.h b/vp8/common/codec_common_interface.h
index 7881b0a41..7a7db3847 100644
--- a/vp8/common/codec_common_interface.h
+++ b/vp8/common/codec_common_interface.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
#ifndef CODEC_COMMON_INTERFACE_H
diff --git a/vp8/common/coefupdateprobs.h b/vp8/common/coefupdateprobs.h
index 99affd618..785e3ff70 100644
--- a/vp8/common/coefupdateprobs.h
+++ b/vp8/common/coefupdateprobs.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/common.h b/vp8/common/common.h
index 29f6d371b..9a93da991 100644
--- a/vp8/common/common.h
+++ b/vp8/common/common.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/common_types.h b/vp8/common/common_types.h
index deb5ed8e5..4e6248697 100644
--- a/vp8/common/common_types.h
+++ b/vp8/common/common_types.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/context.c b/vp8/common/context.c
index 17ee8c338..99e95d30f 100644
--- a/vp8/common/context.c
+++ b/vp8/common/context.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/debugmodes.c b/vp8/common/debugmodes.c
index e2d2d2c0f..8c03480fa 100644
--- a/vp8/common/debugmodes.c
+++ b/vp8/common/debugmodes.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -20,7 +21,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
int mb_index = 0;
FILE *mvs = fopen("mvs.stt", "a");
- // print out the macroblock Y modes
+ /* print out the macroblock Y modes */
mb_index = 0;
fprintf(mvs, "Mb Modes for Frame %d\n", frame);
@@ -59,7 +60,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
fprintf(mvs, "\n");
- // print out the macroblock UV modes
+ /* print out the macroblock UV modes */
mb_index = 0;
fprintf(mvs, "UV Modes for Frame %d\n", frame);
@@ -79,7 +80,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
fprintf(mvs, "\n");
- // print out the block modes
+ /* print out the block modes */
mb_index = 0;
fprintf(mvs, "Mbs for Frame %d\n", frame);
{
@@ -107,7 +108,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
}
fprintf(mvs, "\n");
- // print out the macroblock mvs
+ /* print out the macroblock mvs */
mb_index = 0;
fprintf(mvs, "MVs for Frame %d\n", frame);
@@ -127,7 +128,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
fprintf(mvs, "\n");
- // print out the block modes
+ /* print out the block modes */
mb_index = 0;
fprintf(mvs, "MVs for Frame %d\n", frame);
{
diff --git a/vp8/common/defaultcoefcounts.h b/vp8/common/defaultcoefcounts.h
index ccdf326e6..ca58d565a 100644
--- a/vp8/common/defaultcoefcounts.h
+++ b/vp8/common/defaultcoefcounts.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -14,204 +15,204 @@ static const unsigned int default_coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_C
{
{
- // Block Type ( 0 )
+ /* Block Type ( 0 ) */
{
- // Coeff Band ( 0 )
+ /* Coeff Band ( 0 ) */
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
},
{
- // Coeff Band ( 1 )
+ /* Coeff Band ( 1 ) */
{30190, 26544, 225, 24, 4, 0, 0, 0, 0, 0, 0, 4171593,},
{26846, 25157, 1241, 130, 26, 6, 1, 0, 0, 0, 0, 149987,},
{10484, 9538, 1006, 160, 36, 18, 0, 0, 0, 0, 0, 15104,},
},
{
- // Coeff Band ( 2 )
+ /* Coeff Band ( 2 ) */
{25842, 40456, 1126, 83, 11, 2, 0, 0, 0, 0, 0, 0,},
{9338, 8010, 512, 73, 7, 3, 2, 0, 0, 0, 0, 43294,},
{1047, 751, 149, 31, 13, 6, 1, 0, 0, 0, 0, 879,},
},
{
- // Coeff Band ( 3 )
+ /* Coeff Band ( 3 ) */
{26136, 9826, 252, 13, 0, 0, 0, 0, 0, 0, 0, 0,},
{8134, 5574, 191, 14, 2, 0, 0, 0, 0, 0, 0, 35302,},
{ 605, 677, 116, 9, 1, 0, 0, 0, 0, 0, 0, 611,},
},
{
- // Coeff Band ( 4 )
+ /* Coeff Band ( 4 ) */
{10263, 15463, 283, 17, 0, 0, 0, 0, 0, 0, 0, 0,},
{2773, 2191, 128, 9, 2, 2, 0, 0, 0, 0, 0, 10073,},
{ 134, 125, 32, 4, 0, 2, 0, 0, 0, 0, 0, 50,},
},
{
- // Coeff Band ( 5 )
+ /* Coeff Band ( 5 ) */
{10483, 2663, 23, 1, 0, 0, 0, 0, 0, 0, 0, 0,},
{2137, 1251, 27, 1, 1, 0, 0, 0, 0, 0, 0, 14362,},
{ 116, 156, 14, 2, 1, 0, 0, 0, 0, 0, 0, 190,},
},
{
- // Coeff Band ( 6 )
+ /* Coeff Band ( 6 ) */
{40977, 27614, 412, 28, 0, 0, 0, 0, 0, 0, 0, 0,},
{6113, 5213, 261, 22, 3, 0, 0, 0, 0, 0, 0, 26164,},
{ 382, 312, 50, 14, 2, 0, 0, 0, 0, 0, 0, 345,},
},
{
- // Coeff Band ( 7 )
+ /* Coeff Band ( 7 ) */
{ 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 319,},
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8,},
},
},
{
- // Block Type ( 1 )
+ /* Block Type ( 1 ) */
{
- // Coeff Band ( 0 )
+ /* Coeff Band ( 0 ) */
{3268, 19382, 1043, 250, 93, 82, 49, 26, 17, 8, 25, 82289,},
{8758, 32110, 5436, 1832, 827, 668, 420, 153, 24, 0, 3, 52914,},
{9337, 23725, 8487, 3954, 2107, 1836, 1069, 399, 59, 0, 0, 18620,},
},
{
- // Coeff Band ( 1 )
+ /* Coeff Band ( 1 ) */
{12419, 8420, 452, 62, 9, 1, 0, 0, 0, 0, 0, 0,},
{11715, 8705, 693, 92, 15, 7, 2, 0, 0, 0, 0, 53988,},
{7603, 8585, 2306, 778, 270, 145, 39, 5, 0, 0, 0, 9136,},
},
{
- // Coeff Band ( 2 )
+ /* Coeff Band ( 2 ) */
{15938, 14335, 1207, 184, 55, 13, 4, 1, 0, 0, 0, 0,},
{7415, 6829, 1138, 244, 71, 26, 7, 0, 0, 0, 0, 9980,},
{1580, 1824, 655, 241, 89, 46, 10, 2, 0, 0, 0, 429,},
},
{
- // Coeff Band ( 3 )
+ /* Coeff Band ( 3 ) */
{19453, 5260, 201, 19, 0, 0, 0, 0, 0, 0, 0, 0,},
{9173, 3758, 213, 22, 1, 1, 0, 0, 0, 0, 0, 9820,},
{1689, 1277, 276, 51, 17, 4, 0, 0, 0, 0, 0, 679,},
},
{
- // Coeff Band ( 4 )
+ /* Coeff Band ( 4 ) */
{12076, 10667, 620, 85, 19, 9, 5, 0, 0, 0, 0, 0,},
{4665, 3625, 423, 55, 19, 9, 0, 0, 0, 0, 0, 5127,},
{ 415, 440, 143, 34, 20, 7, 2, 0, 0, 0, 0, 101,},
},
{
- // Coeff Band ( 5 )
+ /* Coeff Band ( 5 ) */
{12183, 4846, 115, 11, 1, 0, 0, 0, 0, 0, 0, 0,},
{4226, 3149, 177, 21, 2, 0, 0, 0, 0, 0, 0, 7157,},
{ 375, 621, 189, 51, 11, 4, 1, 0, 0, 0, 0, 198,},
},
{
- // Coeff Band ( 6 )
+ /* Coeff Band ( 6 ) */
{61658, 37743, 1203, 94, 10, 3, 0, 0, 0, 0, 0, 0,},
{15514, 11563, 903, 111, 14, 5, 0, 0, 0, 0, 0, 25195,},
{ 929, 1077, 291, 78, 14, 7, 1, 0, 0, 0, 0, 507,},
},
{
- // Coeff Band ( 7 )
+ /* Coeff Band ( 7 ) */
{ 0, 990, 15, 3, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 412, 13, 0, 0, 0, 0, 0, 0, 0, 0, 1641,},
{ 0, 18, 7, 1, 0, 0, 0, 0, 0, 0, 0, 30,},
},
},
{
- // Block Type ( 2 )
+ /* Block Type ( 2 ) */
{
- // Coeff Band ( 0 )
+ /* Coeff Band ( 0 ) */
{ 953, 24519, 628, 120, 28, 12, 4, 0, 0, 0, 0, 2248798,},
{1525, 25654, 2647, 617, 239, 143, 42, 5, 0, 0, 0, 66837,},
{1180, 11011, 3001, 1237, 532, 448, 239, 54, 5, 0, 0, 7122,},
},
{
- // Coeff Band ( 1 )
+ /* Coeff Band ( 1 ) */
{1356, 2220, 67, 10, 4, 1, 0, 0, 0, 0, 0, 0,},
{1450, 2544, 102, 18, 4, 3, 0, 0, 0, 0, 0, 57063,},
{1182, 2110, 470, 130, 41, 21, 0, 0, 0, 0, 0, 6047,},
},
{
- // Coeff Band ( 2 )
+ /* Coeff Band ( 2 ) */
{ 370, 3378, 200, 30, 5, 4, 1, 0, 0, 0, 0, 0,},
{ 293, 1006, 131, 29, 11, 0, 0, 0, 0, 0, 0, 5404,},
{ 114, 387, 98, 23, 4, 8, 1, 0, 0, 0, 0, 236,},
},
{
- // Coeff Band ( 3 )
+ /* Coeff Band ( 3 ) */
{ 579, 194, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 395, 213, 5, 1, 0, 0, 0, 0, 0, 0, 0, 4157,},
{ 119, 122, 4, 0, 0, 0, 0, 0, 0, 0, 0, 300,},
},
{
- // Coeff Band ( 4 )
+ /* Coeff Band ( 4 ) */
{ 38, 557, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 21, 114, 12, 1, 0, 0, 0, 0, 0, 0, 0, 427,},
{ 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7,},
},
{
- // Coeff Band ( 5 )
+ /* Coeff Band ( 5 ) */
{ 52, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 18, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 652,},
{ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30,},
},
{
- // Coeff Band ( 6 )
+ /* Coeff Band ( 6 ) */
{ 640, 569, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 25, 77, 2, 0, 0, 0, 0, 0, 0, 0, 0, 517,},
{ 4, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,},
},
{
- // Coeff Band ( 7 )
+ /* Coeff Band ( 7 ) */
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
},
},
{
- // Block Type ( 3 )
+ /* Block Type ( 3 ) */
{
- // Coeff Band ( 0 )
+ /* Coeff Band ( 0 ) */
{2506, 20161, 2707, 767, 261, 178, 107, 30, 14, 3, 0, 100694,},
{8806, 36478, 8817, 3268, 1280, 850, 401, 114, 42, 0, 0, 58572,},
{11003, 27214, 11798, 5716, 2482, 2072, 1048, 175, 32, 0, 0, 19284,},
},
{
- // Coeff Band ( 1 )
+ /* Coeff Band ( 1 ) */
{9738, 11313, 959, 205, 70, 18, 11, 1, 0, 0, 0, 0,},
{12628, 15085, 1507, 273, 52, 19, 9, 0, 0, 0, 0, 54280,},
{10701, 15846, 5561, 1926, 813, 570, 249, 36, 0, 0, 0, 6460,},
},
{
- // Coeff Band ( 2 )
+ /* Coeff Band ( 2 ) */
{6781, 22539, 2784, 634, 182, 123, 20, 4, 0, 0, 0, 0,},
{6263, 11544, 2649, 790, 259, 168, 27, 5, 0, 0, 0, 20539,},
{3109, 4075, 2031, 896, 457, 386, 158, 29, 0, 0, 0, 1138,},
},
{
- // Coeff Band ( 3 )
+ /* Coeff Band ( 3 ) */
{11515, 4079, 465, 73, 5, 14, 2, 0, 0, 0, 0, 0,},
{9361, 5834, 650, 96, 24, 8, 4, 0, 0, 0, 0, 22181,},
{4343, 3974, 1360, 415, 132, 96, 14, 1, 0, 0, 0, 1267,},
},
{
- // Coeff Band ( 4 )
+ /* Coeff Band ( 4 ) */
{4787, 9297, 823, 168, 44, 12, 4, 0, 0, 0, 0, 0,},
{3619, 4472, 719, 198, 60, 31, 3, 0, 0, 0, 0, 8401,},
{1157, 1175, 483, 182, 88, 31, 8, 0, 0, 0, 0, 268,},
},
{
- // Coeff Band ( 5 )
+ /* Coeff Band ( 5 ) */
{8299, 1226, 32, 5, 1, 0, 0, 0, 0, 0, 0, 0,},
{3502, 1568, 57, 4, 1, 1, 0, 0, 0, 0, 0, 9811,},
{1055, 1070, 166, 29, 6, 1, 0, 0, 0, 0, 0, 527,},
},
{
- // Coeff Band ( 6 )
+ /* Coeff Band ( 6 ) */
{27414, 27927, 1989, 347, 69, 26, 0, 0, 0, 0, 0, 0,},
{5876, 10074, 1574, 341, 91, 24, 4, 0, 0, 0, 0, 21954,},
{1571, 2171, 778, 324, 124, 65, 16, 0, 0, 0, 0, 979,},
},
{
- // Coeff Band ( 7 )
+ /* Coeff Band ( 7 ) */
{ 0, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 459,},
{ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13,},
diff --git a/vp8/common/dma_desc.h b/vp8/common/dma_desc.h
index 5e6fa0ca9..b923da6e0 100644
--- a/vp8/common/dma_desc.h
+++ b/vp8/common/dma_desc.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/duck_io.h b/vp8/common/duck_io.h
index f63a5cdc1..43daa65bc 100644
--- a/vp8/common/duck_io.h
+++ b/vp8/common/duck_io.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c
index e524c2acc..1438e7e0f 100644
--- a/vp8/common/entropy.c
+++ b/vp8/common/entropy.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h
index 1415832d5..0685cd0ae 100644
--- a/vp8/common/entropy.h
+++ b/vp8/common/entropy.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -16,18 +17,18 @@
/* Coefficient token alphabet */
-#define ZERO_TOKEN 0 //0 Extra Bits 0+0
-#define ONE_TOKEN 1 //1 Extra Bits 0+1
-#define TWO_TOKEN 2 //2 Extra Bits 0+1
-#define THREE_TOKEN 3 //3 Extra Bits 0+1
-#define FOUR_TOKEN 4 //4 Extra Bits 0+1
-#define DCT_VAL_CATEGORY1 5 //5-6 Extra Bits 1+1
-#define DCT_VAL_CATEGORY2 6 //7-10 Extra Bits 2+1
-#define DCT_VAL_CATEGORY3 7 //11-26 Extra Bits 4+1
-#define DCT_VAL_CATEGORY4 8 //11-26 Extra Bits 5+1
-#define DCT_VAL_CATEGORY5 9 //27-58 Extra Bits 5+1
-#define DCT_VAL_CATEGORY6 10 //59+ Extra Bits 11+1
-#define DCT_EOB_TOKEN 11 //EOB Extra Bits 0+0
+#define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */
+#define ONE_TOKEN 1 /* 1 Extra Bits 0+1 */
+#define TWO_TOKEN 2 /* 2 Extra Bits 0+1 */
+#define THREE_TOKEN 3 /* 3 Extra Bits 0+1 */
+#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */
+#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */
+#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */
+#define DCT_VAL_CATEGORY3 7 /* 11-26 Extra Bits 4+1 */
+#define DCT_VAL_CATEGORY4 8 /* 11-26 Extra Bits 5+1 */
+#define DCT_VAL_CATEGORY5 9 /* 27-58 Extra Bits 5+1 */
+#define DCT_VAL_CATEGORY6 10 /* 59+ Extra Bits 11+1 */
+#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */
#define vp8_coef_tokens 12
#define MAX_ENTROPY_TOKENS vp8_coef_tokens
@@ -82,7 +83,7 @@ extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]);
coefficient band (and since zigzag positions 0, 1, and 2 are in
distinct bands). */
-/*# define DC_TOKEN_CONTEXTS 3 // 00, 0!0, !0!0 */
+/*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */
# define PREV_COEF_CONTEXTS 3
extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[vp8_coef_tokens]);
diff --git a/vp8/common/entropymode.c b/vp8/common/entropymode.c
index 7dc1acde0..e9dc668b2 100644
--- a/vp8/common/entropymode.c
+++ b/vp8/common/entropymode.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -263,8 +264,10 @@ void vp8_entropy_mode_init()
vp8_tokens_from_tree(vp8_uv_mode_encodings, vp8_uv_mode_tree);
vp8_tokens_from_tree(vp8_mbsplit_encodings, vp8_mbsplit_tree);
- vp8_tokens_from_tree(VP8_MVREFENCODINGS, vp8_mv_ref_tree);
- vp8_tokens_from_tree(VP8_SUBMVREFENCODINGS, vp8_sub_mv_ref_tree);
+ vp8_tokens_from_tree_offset(vp8_mv_ref_encoding_array,
+ vp8_mv_ref_tree, NEARESTMV);
+ vp8_tokens_from_tree_offset(vp8_sub_mv_ref_encoding_array,
+ vp8_sub_mv_ref_tree, LEFT4X4);
vp8_tokens_from_tree(vp8_small_mvencodings, vp8_small_mvtree);
}
diff --git a/vp8/common/entropymode.h b/vp8/common/entropymode.h
index ff630a477..da6ae8ead 100644
--- a/vp8/common/entropymode.h
+++ b/vp8/common/entropymode.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -53,10 +54,6 @@ extern struct vp8_token_struct vp8_mbsplit_encodings [VP8_NUMMBSPLITS];
extern struct vp8_token_struct vp8_mv_ref_encoding_array [VP8_MVREFS];
extern struct vp8_token_struct vp8_sub_mv_ref_encoding_array [VP8_SUBMVREFS];
-#define VP8_MVREFENCODINGS (vp8_mv_ref_encoding_array - NEARESTMV)
-#define VP8_SUBMVREFENCODINGS (vp8_sub_mv_ref_encoding_array - LEFT4X4)
-
-
extern const vp8_tree_index vp8_small_mvtree[];
extern struct vp8_token_struct vp8_small_mvencodings [8];
diff --git a/vp8/common/entropymv.c b/vp8/common/entropymv.c
index 2b00c17a9..e5df1f095 100644
--- a/vp8/common/entropymv.c
+++ b/vp8/common/entropymv.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -28,21 +29,21 @@ const MV_CONTEXT vp8_mv_update_probs[2] =
const MV_CONTEXT vp8_default_mv_context[2] =
{
{{
- // row
- 162, // is short
- 128, // sign
- 225, 146, 172, 147, 214, 39, 156, // short tree
- 128, 129, 132, 75, 145, 178, 206, 239, 254, 254 // long bits
+ /* row */
+ 162, /* is short */
+ 128, /* sign */
+ 225, 146, 172, 147, 214, 39, 156, /* short tree */
+ 128, 129, 132, 75, 145, 178, 206, 239, 254, 254 /* long bits */
}},
{{
- // same for column
- 164, // is short
+ /* same for column */
+ 164, /* is short */
128,
204, 170, 119, 235, 140, 230, 228,
- 128, 130, 130, 74, 148, 180, 203, 236, 254, 254 // long bits
+ 128, 130, 130, 74, 148, 180, 203, 236, 254, 254 /* long bits */
}}
};
diff --git a/vp8/common/entropymv.h b/vp8/common/entropymv.h
index d940c599b..911507ddc 100644
--- a/vp8/common/entropymv.h
+++ b/vp8/common/entropymv.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/extend.c b/vp8/common/extend.c
index 74079527c..47207fa79 100644
--- a/vp8/common/extend.c
+++ b/vp8/common/extend.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -14,14 +15,14 @@
static void extend_plane_borders
(
- unsigned char *s, // source
- int sp, // pitch
- int h, // height
- int w, // width
- int et, // extend top border
- int el, // extend left border
- int eb, // extend bottom border
- int er // extend right border
+ unsigned char *s, /* source */
+ int sp, /* pitch */
+ int h, /* height */
+ int w, /* width */
+ int et, /* extend top border */
+ int el, /* extend left border */
+ int eb, /* extend bottom border */
+ int er /* extend right border */
)
{
@@ -30,7 +31,7 @@ static void extend_plane_borders
unsigned char *dest_ptr1, *dest_ptr2;
int linesize;
- // copy the left and right most columns out
+ /* copy the left and right most columns out */
src_ptr1 = s;
src_ptr2 = s + w - 1;
dest_ptr1 = s - el;
@@ -38,7 +39,11 @@ static void extend_plane_borders
for (i = 0; i < h - 0 + 1; i++)
{
- vpx_memset(dest_ptr1, src_ptr1[0], el);
+ /* Some linkers will complain if we call vpx_memset with el set to a
+ * constant 0.
+ */
+ if (el)
+ vpx_memset(dest_ptr1, src_ptr1[0], el);
vpx_memset(dest_ptr2, src_ptr2[0], er);
src_ptr1 += sp;
src_ptr2 += sp;
@@ -46,7 +51,7 @@ static void extend_plane_borders
dest_ptr2 += sp;
}
- // Now copy the top and bottom source lines into each line of the respective borders
+ /* Now copy the top and bottom source lines into each line of the respective borders */
src_ptr1 = s - el;
src_ptr2 = s + sp * (h - 1) - el;
dest_ptr1 = s + sp * (-et) - el;
@@ -72,12 +77,12 @@ void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height)
int er = 0xf & (16 - (width & 0xf));
int eb = 0xf & (16 - (height & 0xf));
- // check for non multiples of 16
+ /* check for non multiples of 16 */
if (er != 0 || eb != 0)
{
extend_plane_borders(ybf->y_buffer, ybf->y_stride, height, width, 0, 0, eb, er);
- //adjust for uv
+ /* adjust for uv */
height = (height + 1) >> 1;
width = (width + 1) >> 1;
er = 0x7 & (8 - (width & 0x7));
@@ -91,7 +96,7 @@ void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height)
}
}
-// note the extension is only for the last row, for intra prediction purpose
+/* note the extension is only for the last row, for intra prediction purpose */
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr)
{
int i;
diff --git a/vp8/common/extend.h b/vp8/common/extend.h
index 6809ae756..fd0a608e5 100644
--- a/vp8/common/extend.h
+++ b/vp8/common/extend.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/filter_c.c b/vp8/common/filter_c.c
index 38991cb28..399a847d5 100644
--- a/vp8/common/filter_c.c
+++ b/vp8/common/filter_c.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -31,13 +32,13 @@ static const int bilinear_filters[8][2] =
static const short sub_pel_filters[8][6] =
{
- { 0, 0, 128, 0, 0, 0 }, // note that 1/8 pel positions are just as per alpha -0.5 bicubic
+ { 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
{ 0, -6, 123, 12, -1, 0 },
- { 2, -11, 108, 36, -8, 1 }, // New 1/4 pel 6 tap filter
+ { 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */
{ 0, -9, 93, 50, -6, 0 },
- { 3, -16, 77, 77, -16, 3 }, // New 1/2 pel 6 tap filter
+ { 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */
{ 0, -6, 50, 93, -9, 0 },
- { 1, -8, 36, 108, -11, 2 }, // New 1/4 pel 6 tap filter
+ { 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
{ 0, -1, 12, 123, -6, 0 },
@@ -68,9 +69,9 @@ void vp8_filter_block2d_first_pass
((int)src_ptr[pixel_step] * vp8_filter[3]) +
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
- (VP8_FILTER_WEIGHT >> 1); // Rounding
+ (VP8_FILTER_WEIGHT >> 1); /* Rounding */
- // Normalize back to 0-255
+ /* Normalize back to 0-255 */
Temp = Temp >> VP8_FILTER_SHIFT;
if (Temp < 0)
@@ -82,7 +83,7 @@ void vp8_filter_block2d_first_pass
src_ptr++;
}
- // Next row...
+ /* Next row... */
src_ptr += src_pixels_per_line - output_width;
output_ptr += output_width;
}
@@ -107,16 +108,16 @@ void vp8_filter_block2d_second_pass
{
for (j = 0; j < output_width; j++)
{
- // Apply filter
+ /* Apply filter */
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
((int)src_ptr[0] * vp8_filter[2]) +
((int)src_ptr[pixel_step] * vp8_filter[3]) +
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
- (VP8_FILTER_WEIGHT >> 1); // Rounding
+ (VP8_FILTER_WEIGHT >> 1); /* Rounding */
- // Normalize back to 0-255
+ /* Normalize back to 0-255 */
Temp = Temp >> VP8_FILTER_SHIFT;
if (Temp < 0)
@@ -128,7 +129,7 @@ void vp8_filter_block2d_second_pass
src_ptr++;
}
- // Start next row
+ /* Start next row */
src_ptr += src_pixels_per_line - output_width;
output_ptr += output_pitch;
}
@@ -145,12 +146,12 @@ void vp8_filter_block2d
const short *VFilter
)
{
- int FData[9*4]; // Temp data bufffer used in filtering
+ int FData[9*4]; /* Temp data bufffer used in filtering */
- // First filter 1-D horizontally...
+ /* First filter 1-D horizontally... */
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
- // then filter verticaly...
+ /* then filter verticaly... */
vp8_filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
}
@@ -194,8 +195,8 @@ void vp8_sixtap_predict_c
const short *HFilter;
const short *VFilter;
- HFilter = sub_pel_filters[xoffset]; // 6 tap
- VFilter = sub_pel_filters[yoffset]; // 6 tap
+ HFilter = sub_pel_filters[xoffset]; /* 6 tap */
+ VFilter = sub_pel_filters[yoffset]; /* 6 tap */
vp8_filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
}
@@ -211,16 +212,16 @@ void vp8_sixtap_predict8x8_c
{
const short *HFilter;
const short *VFilter;
- int FData[13*16]; // Temp data bufffer used in filtering
+ int FData[13*16]; /* Temp data bufffer used in filtering */
- HFilter = sub_pel_filters[xoffset]; // 6 tap
- VFilter = sub_pel_filters[yoffset]; // 6 tap
+ HFilter = sub_pel_filters[xoffset]; /* 6 tap */
+ VFilter = sub_pel_filters[yoffset]; /* 6 tap */
- // First filter 1-D horizontally...
+ /* First filter 1-D horizontally... */
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
- // then filter verticaly...
+ /* then filter verticaly... */
vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
}
@@ -237,16 +238,16 @@ void vp8_sixtap_predict8x4_c
{
const short *HFilter;
const short *VFilter;
- int FData[13*16]; // Temp data bufffer used in filtering
+ int FData[13*16]; /* Temp data bufffer used in filtering */
- HFilter = sub_pel_filters[xoffset]; // 6 tap
- VFilter = sub_pel_filters[yoffset]; // 6 tap
+ HFilter = sub_pel_filters[xoffset]; /* 6 tap */
+ VFilter = sub_pel_filters[yoffset]; /* 6 tap */
- // First filter 1-D horizontally...
+ /* First filter 1-D horizontally... */
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
- // then filter verticaly...
+ /* then filter verticaly... */
vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
}
@@ -263,16 +264,16 @@ void vp8_sixtap_predict16x16_c
{
const short *HFilter;
const short *VFilter;
- int FData[21*24]; // Temp data bufffer used in filtering
+ int FData[21*24]; /* Temp data bufffer used in filtering */
- HFilter = sub_pel_filters[xoffset]; // 6 tap
- VFilter = sub_pel_filters[yoffset]; // 6 tap
+ HFilter = sub_pel_filters[xoffset]; /* 6 tap */
+ VFilter = sub_pel_filters[yoffset]; /* 6 tap */
- // First filter 1-D horizontally...
+ /* First filter 1-D horizontally... */
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
- // then filter verticaly...
+ /* then filter verticaly... */
vp8_filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
}
@@ -323,14 +324,14 @@ void vp8_filter_block2d_bil_first_pass
{
for (j = 0; j < output_width; j++)
{
- // Apply bilinear filter
+ /* Apply bilinear filter */
output_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
((int)src_ptr[pixel_step] * vp8_filter[1]) +
(VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
src_ptr++;
}
- // Next row...
+ /* Next row... */
src_ptr += src_pixels_per_line - output_width;
output_ptr += output_width;
}
@@ -383,7 +384,7 @@ void vp8_filter_block2d_bil_second_pass
{
for (j = 0; j < output_width; j++)
{
- // Apply filter
+ /* Apply filter */
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
((int)src_ptr[pixel_step] * vp8_filter[1]) +
(VP8_FILTER_WEIGHT / 2);
@@ -391,7 +392,7 @@ void vp8_filter_block2d_bil_second_pass
src_ptr++;
}
- // Next row...
+ /* Next row... */
src_ptr += src_pixels_per_line - output_width;
output_ptr += output_pitch;
}
@@ -431,12 +432,12 @@ void vp8_filter_block2d_bil
)
{
- unsigned short FData[17*16]; // Temp data bufffer used in filtering
+ unsigned short FData[17*16]; /* Temp data bufffer used in filtering */
- // First filter 1-D horizontally...
+ /* First filter 1-D horizontally... */
vp8_filter_block2d_bil_first_pass(src_ptr, FData, src_pixels_per_line, 1, Height + 1, Width, HFilter);
- // then 1-D vertically...
+ /* then 1-D vertically... */
vp8_filter_block2d_bil_second_pass(FData, output_ptr, dst_pitch, Width, Width, Height, Width, VFilter);
}
diff --git a/vp8/common/findnearmv.c b/vp8/common/findnearmv.c
index fcb1f202c..e63d4ef8d 100644
--- a/vp8/common/findnearmv.c
+++ b/vp8/common/findnearmv.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -167,7 +168,7 @@ void vp8_find_near_mvs
vp8_clamp_mv(nearest, xd);
vp8_clamp_mv(nearby, xd);
- vp8_clamp_mv(best_mv, xd); //TODO: move this up before the copy
+ vp8_clamp_mv(best_mv, xd); /*TODO: move this up before the copy*/
}
vp8_prob *vp8_mv_ref_probs(
@@ -178,7 +179,7 @@ vp8_prob *vp8_mv_ref_probs(
p[1] = vp8_mode_contexts [near_mv_ref_ct[1]] [1];
p[2] = vp8_mode_contexts [near_mv_ref_ct[2]] [2];
p[3] = vp8_mode_contexts [near_mv_ref_ct[3]] [3];
- //p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];
+ /*p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];*/
return p;
}
diff --git a/vp8/common/findnearmv.h b/vp8/common/findnearmv.h
index 2c02033e6..1a6c72bcd 100644
--- a/vp8/common/findnearmv.h
+++ b/vp8/common/findnearmv.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/fourcc.hpp b/vp8/common/fourcc.hpp
index 5f1faed2f..c5826285e 100644
--- a/vp8/common/fourcc.hpp
+++ b/vp8/common/fourcc.hpp
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/g_common.h b/vp8/common/g_common.h
index e68c53e1c..5f523980b 100644
--- a/vp8/common/g_common.h
+++ b/vp8/common/g_common.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index 0011ae0dc..b3eadaf27 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -17,6 +18,7 @@
#include "onyxc_int.h"
extern void vp8_arch_x86_common_init(VP8_COMMON *ctx);
+extern void vp8_arch_arm_common_init(VP8_COMMON *ctx);
void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
@@ -31,16 +33,18 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
rtcd->idct.idct1 = vp8_short_idct4x4llm_1_c;
rtcd->idct.idct16 = vp8_short_idct4x4llm_c;
- rtcd->idct.idct1_scalar = vp8_dc_only_idct_c;
+ rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_c;
rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_c;
rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_c;
rtcd->recon.copy16x16 = vp8_copy_mem16x16_c;
rtcd->recon.copy8x8 = vp8_copy_mem8x8_c;
rtcd->recon.copy8x4 = vp8_copy_mem8x4_c;
- rtcd->recon.recon = vp8_recon_b_c;
+ rtcd->recon.recon = vp8_recon_b_c;
rtcd->recon.recon2 = vp8_recon2b_c;
- rtcd->recon.recon4 = vp8_recon4b_c;
+ rtcd->recon.recon4 = vp8_recon4b_c;
+ rtcd->recon.recon_mb = vp8_recon_mb_c;
+ rtcd->recon.recon_mby = vp8_recon_mby_c;
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_c;
rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_c;
@@ -60,15 +64,18 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_c;
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_c;
-#if CONFIG_POSTPROC || CONFIG_VP8_ENCODER
- rtcd->postproc.down = vp8_mbpost_proc_down_c;
- rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;
- rtcd->postproc.downacross = vp8_post_proc_down_and_across_c;
- rtcd->postproc.addnoise = vp8_plane_add_noise_c;
+#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR)
+ rtcd->postproc.down = vp8_mbpost_proc_down_c;
+ rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;
+ rtcd->postproc.downacross = vp8_post_proc_down_and_across_c;
+ rtcd->postproc.addnoise = vp8_plane_add_noise_c;
+ rtcd->postproc.blend_mb_inner = vp8_blend_mb_inner_c;
+ rtcd->postproc.blend_mb_outer = vp8_blend_mb_outer_c;
+ rtcd->postproc.blend_b = vp8_blend_b_c;
#endif
#endif
- // Pure C:
+ /* Pure C: */
vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
@@ -76,4 +83,8 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
vp8_arch_x86_common_init(ctx);
#endif
+#if ARCH_ARM
+ vp8_arch_arm_common_init(ctx);
+#endif
+
}
diff --git a/vp8/common/header.h b/vp8/common/header.h
index 8b2b0094a..3e98eeb3c 100644
--- a/vp8/common/header.h
+++ b/vp8/common/header.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/idct.h b/vp8/common/idct.h
index 47b5f0576..f5fd94dfd 100644
--- a/vp8/common/idct.h
+++ b/vp8/common/idct.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -17,8 +18,10 @@
#define prototype_idct(sym) \
void sym(short *input, short *output, int pitch)
-#define prototype_idct_scalar(sym) \
- void sym(short input, short *output, int pitch)
+#define prototype_idct_scalar_add(sym) \
+ void sym(short input, \
+ unsigned char *pred, unsigned char *output, \
+ int pitch, int stride)
#if ARCH_X86 || ARCH_X86_64
#include "x86/idct_x86.h"
@@ -38,10 +41,10 @@ extern prototype_idct(vp8_idct_idct1);
#endif
extern prototype_idct(vp8_idct_idct16);
-#ifndef vp8_idct_idct1_scalar
-#define vp8_idct_idct1_scalar vp8_dc_only_idct_c
+#ifndef vp8_idct_idct1_scalar_add
+#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_c
#endif
-extern prototype_idct_scalar(vp8_idct_idct1_scalar);
+extern prototype_idct_scalar_add(vp8_idct_idct1_scalar_add);
#ifndef vp8_idct_iwalsh1
@@ -55,14 +58,14 @@ extern prototype_second_order(vp8_idct_iwalsh1);
extern prototype_second_order(vp8_idct_iwalsh16);
typedef prototype_idct((*vp8_idct_fn_t));
-typedef prototype_idct_scalar((*vp8_idct_scalar_fn_t));
+typedef prototype_idct_scalar_add((*vp8_idct_scalar_add_fn_t));
typedef prototype_second_order((*vp8_second_order_fn_t));
typedef struct
{
- vp8_idct_fn_t idct1;
- vp8_idct_fn_t idct16;
- vp8_idct_scalar_fn_t idct1_scalar;
+ vp8_idct_fn_t idct1;
+ vp8_idct_fn_t idct16;
+ vp8_idct_scalar_add_fn_t idct1_scalar_add;
vp8_second_order_fn_t iwalsh1;
vp8_second_order_fn_t iwalsh16;
diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c
index 57cf8584e..196062df6 100644
--- a/vp8/common/idctllm.c
+++ b/vp8/common/idctllm.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -103,23 +104,30 @@ void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch)
}
}
-
-void vp8_dc_only_idct_c(short input_dc, short *output, int pitch)
+void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
{
- int i;
- int a1;
- short *op = output;
- int shortpitch = pitch >> 1;
- a1 = ((input_dc + 4) >> 3);
+ int a1 = ((input_dc + 4) >> 3);
+ int r, c;
- for (i = 0; i < 4; i++)
+ for (r = 0; r < 4; r++)
{
- op[0] = a1;
- op[1] = a1;
- op[2] = a1;
- op[3] = a1;
- op += shortpitch;
+ for (c = 0; c < 4; c++)
+ {
+ int a = a1 + pred_ptr[c] ;
+
+ if (a < 0)
+ a = 0;
+
+ if (a > 255)
+ a = 255;
+
+ dst_ptr[c] = (unsigned char) a ;
+ }
+
+ dst_ptr += stride;
+ pred_ptr += pitch;
}
+
}
void vp8_short_inv_walsh4x4_c(short *input, short *output)
diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c
index 1ff596ead..81a3f2d89 100644
--- a/vp8/common/invtrans.c
+++ b/vp8/common/invtrans.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -37,7 +38,7 @@ void vp8_inverse_transform_mby(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *
{
int i;
- // do 2nd order transform on the dc block
+ /* do 2nd order transform on the dc block */
IDCT_INVOKE(rtcd, iwalsh16)(x->block[24].dqcoeff, x->block[24].diff);
recon_dcblock(x);
@@ -64,9 +65,10 @@ void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x
{
int i;
- if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
+ if (x->mode_info_context->mbmi.mode != B_PRED &&
+ x->mode_info_context->mbmi.mode != SPLITMV)
{
- // do 2nd order transform on the dc block
+ /* do 2nd order transform on the dc block */
IDCT_INVOKE(rtcd, iwalsh16)(&x->block[24].dqcoeff[0], x->block[24].diff);
recon_dcblock(x);
diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h
index 93a40f956..b3ffb7073 100644
--- a/vp8/common/invtrans.h
+++ b/vp8/common/invtrans.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/littlend.h b/vp8/common/littlend.h
index 08c525c5d..99df1164c 100644
--- a/vp8/common/littlend.h
+++ b/vp8/common/littlend.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c
index 79e617754..f9d082304 100644
--- a/vp8/common/loopfilter.c
+++ b/vp8/common/loopfilter.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -22,7 +23,7 @@ prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c);
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_c);
-// Horizontal MB filtering
+/* Horizontal MB filtering */
void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
@@ -46,7 +47,7 @@ void vp8_loop_filter_mbhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
vp8_loop_filter_simple_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
}
-// Vertical MB Filtering
+/* Vertical MB Filtering */
void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
@@ -70,7 +71,7 @@ void vp8_loop_filter_mbvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
vp8_loop_filter_simple_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
}
-// Horizontal B Filtering
+/* Horizontal B Filtering */
void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
@@ -98,7 +99,7 @@ void vp8_loop_filter_bhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
}
-// Vertical B Filtering
+/* Vertical B Filtering */
void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
@@ -139,7 +140,7 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
const int yhedge_boost = 2;
const int uvhedge_boost = 2;
- // For each possible value for the loop filter fill out a "loop_filter_info" entry.
+ /* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
for (i = 0; i <= MAX_LOOP_FILTER; i++)
{
int filt_lvl = i;
@@ -165,7 +166,7 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
HEVThresh = 0;
}
- // Set loop filter paramaeters that control sharpness.
+ /* Set loop filter paramaeters that control sharpness. */
block_inside_limit = filt_lvl >> (sharpness_lvl > 0);
block_inside_limit = block_inside_limit >> (sharpness_lvl > 4);
@@ -194,7 +195,7 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
}
- // Set up the function pointers depending on the type of loop filtering selected
+ /* Set up the function pointers depending on the type of loop filtering selected */
if (lft == NORMAL_LOOPFILTER)
{
cm->lf_mbv = LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v);
@@ -211,14 +212,15 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
}
}
-// Put vp8_init_loop_filter() in vp8dx_create_decompressor(). Only call vp8_frame_init_loop_filter() while decoding
-// each frame. Check last_frame_type to skip the function most of times.
+/* Put vp8_init_loop_filter() in vp8dx_create_decompressor(). Only call vp8_frame_init_loop_filter() while decoding
+ * each frame. Check last_frame_type to skip the function most of times.
+ */
void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type)
{
int HEVThresh;
int i, j;
- // For each possible value for the loop filter fill out a "loop_filter_info" entry.
+ /* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
for (i = 0; i <= MAX_LOOP_FILTER; i++)
{
int filt_lvl = i;
@@ -246,15 +248,15 @@ void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type)
for (j = 0; j < 16; j++)
{
- //lfi[i].lim[j] = block_inside_limit;
- //lfi[i].mbflim[j] = filt_lvl+yhedge_boost;
+ /*lfi[i].lim[j] = block_inside_limit;
+ lfi[i].mbflim[j] = filt_lvl+yhedge_boost;*/
lfi[i].mbthr[j] = HEVThresh;
- //lfi[i].flim[j] = filt_lvl;
+ /*lfi[i].flim[j] = filt_lvl;*/
lfi[i].thr[j] = HEVThresh;
- //lfi[i].uvlim[j] = block_inside_limit;
- //lfi[i].uvmbflim[j] = filt_lvl+uvhedge_boost;
+ /*lfi[i].uvlim[j] = block_inside_limit;
+ lfi[i].uvmbflim[j] = filt_lvl+uvhedge_boost;*/
lfi[i].uvmbthr[j] = HEVThresh;
- //lfi[i].uvflim[j] = filt_lvl;
+ /*lfi[i].uvflim[j] = filt_lvl;*/
lfi[i].uvthr[j] = HEVThresh;
}
}
@@ -267,32 +269,32 @@ void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level)
if (mbd->mode_ref_lf_delta_enabled)
{
- // Aplly delta for reference frame
+ /* Apply delta for reference frame */
*filter_level += mbd->ref_lf_deltas[mbmi->ref_frame];
- // Apply delta for mode
+ /* Apply delta for mode */
if (mbmi->ref_frame == INTRA_FRAME)
{
- // Only the split mode BPRED has a further special case
+ /* Only the split mode BPRED has a further special case */
if (mbmi->mode == B_PRED)
*filter_level += mbd->mode_lf_deltas[0];
}
else
{
- // Zero motion mode
+ /* Zero motion mode */
if (mbmi->mode == ZEROMV)
*filter_level += mbd->mode_lf_deltas[1];
- // Split MB motion mode
+ /* Split MB motion mode */
else if (mbmi->mode == SPLITMV)
*filter_level += mbd->mode_lf_deltas[3];
- // All other inter motion modes (Nearest, Near, New)
+ /* All other inter motion modes (Nearest, Near, New) */
else
*filter_level += mbd->mode_lf_deltas[2];
}
- // Range check
+ /* Range check */
if (*filter_level > MAX_LOOP_FILTER)
*filter_level = MAX_LOOP_FILTER;
else if (*filter_level < 0)
@@ -310,7 +312,7 @@ void vp8_loop_filter_frame
{
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
loop_filter_info *lfi = cm->lf_info;
- int frame_type = cm->frame_type;
+ FRAME_TYPE frame_type = cm->frame_type;
int mb_row;
int mb_col;
@@ -323,21 +325,21 @@ void vp8_loop_filter_frame
int i;
unsigned char *y_ptr, *u_ptr, *v_ptr;
- mbd->mode_info_context = cm->mi; // Point at base of Mb MODE_INFO list
+ mbd->mode_info_context = cm->mi; /* Point at base of Mb MODE_INFO list */
- // Note the baseline filter values for each segment
+ /* Note the baseline filter values for each segment */
if (alt_flt_enabled)
{
for (i = 0; i < MAX_MB_SEGMENTS; i++)
{
- // Abs value
+ /* Abs value */
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
- // Delta Value
+ /* Delta Value */
else
{
baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
- baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; // Clamp to valid range
+ baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
}
}
}
@@ -347,18 +349,18 @@ void vp8_loop_filter_frame
baseline_filter_level[i] = default_filt_lvl;
}
- // Initialize the loop filter for this frame.
+ /* Initialize the loop filter for this frame. */
if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
vp8_init_loop_filter(cm);
else if (frame_type != cm->last_frame_type)
vp8_frame_init_loop_filter(lfi, frame_type);
- // Set up the buffer pointers
+ /* Set up the buffer pointers */
y_ptr = post->y_buffer;
u_ptr = post->u_buffer;
v_ptr = post->v_buffer;
- // vp8_filter each macro block
+ /* vp8_filter each macro block */
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
{
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
@@ -367,9 +369,10 @@ void vp8_loop_filter_frame
filter_level = baseline_filter_level[Segment];
- // Distance of Mb to the various image edges.
- // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
- // Apply any context driven MB level adjustment
+ /* Distance of Mb to the various image edges.
+ * These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+ * Apply any context driven MB level adjustment
+ */
vp8_adjust_mb_lf_value(mbd, &filter_level);
if (filter_level)
@@ -380,7 +383,7 @@ void vp8_loop_filter_frame
if (mbd->mode_info_context->mbmi.dc_diff > 0)
cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
- // don't apply across umv border
+ /* don't apply across umv border */
if (mb_row > 0)
cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
@@ -392,14 +395,14 @@ void vp8_loop_filter_frame
u_ptr += 8;
v_ptr += 8;
- mbd->mode_info_context++; // step to next MB
+ mbd->mode_info_context++; /* step to next MB */
}
y_ptr += post->y_stride * 16 - post->y_width;
u_ptr += post->uv_stride * 8 - post->uv_width;
v_ptr += post->uv_stride * 8 - post->uv_width;
- mbd->mode_info_context++; // Skip border mb
+ mbd->mode_info_context++; /* Skip border mb */
}
}
@@ -423,26 +426,26 @@ void vp8_loop_filter_frame_yonly
int baseline_filter_level[MAX_MB_SEGMENTS];
int filter_level;
int alt_flt_enabled = mbd->segmentation_enabled;
- int frame_type = cm->frame_type;
+ FRAME_TYPE frame_type = cm->frame_type;
(void) sharpness_lvl;
- //MODE_INFO * this_mb_mode_info = cm->mi; // Point at base of Mb MODE_INFO list
- mbd->mode_info_context = cm->mi; // Point at base of Mb MODE_INFO list
+ /*MODE_INFO * this_mb_mode_info = cm->mi;*/ /* Point at base of Mb MODE_INFO list */
+ mbd->mode_info_context = cm->mi; /* Point at base of Mb MODE_INFO list */
- // Note the baseline filter values for each segment
+ /* Note the baseline filter values for each segment */
if (alt_flt_enabled)
{
for (i = 0; i < MAX_MB_SEGMENTS; i++)
{
- // Abs value
+ /* Abs value */
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
- // Delta Value
+ /* Delta Value */
else
{
baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
- baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; // Clamp to valid range
+ baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
}
}
}
@@ -452,16 +455,16 @@ void vp8_loop_filter_frame_yonly
baseline_filter_level[i] = default_filt_lvl;
}
- // Initialize the loop filter for this frame.
+ /* Initialize the loop filter for this frame. */
if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
vp8_init_loop_filter(cm);
else if (frame_type != cm->last_frame_type)
vp8_frame_init_loop_filter(lfi, frame_type);
- // Set up the buffer pointers
+ /* Set up the buffer pointers */
y_ptr = post->y_buffer;
- // vp8_filter each macro block
+ /* vp8_filter each macro block */
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
{
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
@@ -469,7 +472,7 @@ void vp8_loop_filter_frame_yonly
int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
filter_level = baseline_filter_level[Segment];
- // Apply any context driven MB level adjustment
+ /* Apply any context driven MB level adjustment */
vp8_adjust_mb_lf_value(mbd, &filter_level);
if (filter_level)
@@ -480,7 +483,7 @@ void vp8_loop_filter_frame_yonly
if (mbd->mode_info_context->mbmi.dc_diff > 0)
cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
- // don't apply across umv border
+ /* don't apply across umv border */
if (mb_row > 0)
cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
@@ -489,12 +492,12 @@ void vp8_loop_filter_frame_yonly
}
y_ptr += 16;
- mbd->mode_info_context ++; // step to next MB
+ mbd->mode_info_context ++; /* step to next MB */
}
y_ptr += post->y_stride * 16 - post->y_width;
- mbd->mode_info_context ++; // Skip border mb
+ mbd->mode_info_context ++; /* Skip border mb */
}
}
@@ -515,7 +518,7 @@ void vp8_loop_filter_partial_frame
unsigned char *y_ptr;
int mb_row;
int mb_col;
- //int mb_rows = post->y_height >> 4;
+ /*int mb_rows = post->y_height >> 4;*/
int mb_cols = post->y_width >> 4;
int linestocopy;
@@ -524,12 +527,12 @@ void vp8_loop_filter_partial_frame
int baseline_filter_level[MAX_MB_SEGMENTS];
int filter_level;
int alt_flt_enabled = mbd->segmentation_enabled;
- int frame_type = cm->frame_type;
+ FRAME_TYPE frame_type = cm->frame_type;
(void) sharpness_lvl;
- //MODE_INFO * this_mb_mode_info = cm->mi + (post->y_height>>5) * (mb_cols + 1); // Point at base of Mb MODE_INFO list
- mbd->mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1); // Point at base of Mb MODE_INFO list
+ /*MODE_INFO * this_mb_mode_info = cm->mi + (post->y_height>>5) * (mb_cols + 1);*/ /* Point at base of Mb MODE_INFO list */
+ mbd->mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1); /* Point at base of Mb MODE_INFO list */
linestocopy = (post->y_height >> (4 + Fraction));
@@ -538,19 +541,19 @@ void vp8_loop_filter_partial_frame
linestocopy <<= 4;
- // Note the baseline filter values for each segment
+ /* Note the baseline filter values for each segment */
if (alt_flt_enabled)
{
for (i = 0; i < MAX_MB_SEGMENTS; i++)
{
- // Abs value
+ /* Abs value */
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
- // Delta Value
+ /* Delta Value */
else
{
baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
- baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; // Clamp to valid range
+ baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
}
}
}
@@ -560,16 +563,16 @@ void vp8_loop_filter_partial_frame
baseline_filter_level[i] = default_filt_lvl;
}
- // Initialize the loop filter for this frame.
+ /* Initialize the loop filter for this frame. */
if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
vp8_init_loop_filter(cm);
else if (frame_type != cm->last_frame_type)
vp8_frame_init_loop_filter(lfi, frame_type);
- // Set up the buffer pointers
+ /* Set up the buffer pointers */
y_ptr = post->y_buffer + (post->y_height >> 5) * 16 * post->y_stride;
- // vp8_filter each macro block
+ /* vp8_filter each macro block */
for (mb_row = 0; mb_row<(linestocopy >> 4); mb_row++)
{
for (mb_col = 0; mb_col < mb_cols; mb_col++)
@@ -592,10 +595,10 @@ void vp8_loop_filter_partial_frame
}
y_ptr += 16;
- mbd->mode_info_context += 1; // step to next MB
+ mbd->mode_info_context += 1; /* step to next MB */
}
y_ptr += post->y_stride * 16 - post->y_width;
- mbd->mode_info_context += 1; // Skip border mb
+ mbd->mode_info_context += 1; /* Skip border mb */
}
}
diff --git a/vp8/common/loopfilter.h b/vp8/common/loopfilter.h
index c6ce508cc..e45683460 100644
--- a/vp8/common/loopfilter.h
+++ b/vp8/common/loopfilter.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -21,10 +22,10 @@ typedef enum
SIMPLE_LOOPFILTER = 1
} LOOPFILTERTYPE;
-// FRK
-// Need to align this structure so when it is declared and
-// passed it can be loaded into vector registers.
-// FRK
+/* FRK
+ * Need to align this structure so when it is declared and
+ * passed it can be loaded into vector registers.
+ */
typedef struct
{
DECLARE_ALIGNED(16, signed char, lim[16]);
@@ -116,5 +117,14 @@ typedef struct
#define LF_INVOKE(ctx,fn) vp8_lf_##fn
#endif
+typedef void loop_filter_uvfunction
+(
+ unsigned char *u, /* source pointer */
+ int p, /* pitch */
+ const signed char *flimit,
+ const signed char *limit,
+ const signed char *thresh,
+ unsigned char *v
+);
#endif
diff --git a/vp8/common/loopfilter_filters.c b/vp8/common/loopfilter_filters.c
index 7d16e4843..694052924 100644
--- a/vp8/common/loopfilter_filters.c
+++ b/vp8/common/loopfilter_filters.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -12,12 +13,9 @@
#include "loopfilter.h"
#include "onyxc_int.h"
-
-#define NEW_LOOPFILTER_MASK
-
typedef unsigned char uc;
-__inline signed char vp8_signed_char_clamp(int t)
+static __inline signed char vp8_signed_char_clamp(int t)
{
t = (t < -128 ? -128 : t);
t = (t > 127 ? 127 : t);
@@ -25,8 +23,8 @@ __inline signed char vp8_signed_char_clamp(int t)
}
-// should we apply any filter at all ( 11111111 yes, 00000000 no)
-__inline signed char vp8_filter_mask(signed char limit, signed char flimit,
+/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
+static __inline signed char vp8_filter_mask(signed char limit, signed char flimit,
uc p3, uc p2, uc p1, uc p0, uc q0, uc q1, uc q2, uc q3)
{
signed char mask = 0;
@@ -36,17 +34,13 @@ __inline signed char vp8_filter_mask(signed char limit, signed char flimit,
mask |= (abs(q1 - q0) > limit) * -1;
mask |= (abs(q2 - q1) > limit) * -1;
mask |= (abs(q3 - q2) > limit) * -1;
-#ifndef NEW_LOOPFILTER_MASK
- mask |= (abs(p0 - q0) > flimit) * -1;
-#else
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit) * -1;
-#endif
mask = ~mask;
return mask;
}
-// is there high variance internal edge ( 11111111 yes, 00000000 no)
-__inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
+/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
+static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
{
signed char hev = 0;
hev |= (abs(p1 - p0) > thresh) * -1;
@@ -54,7 +48,7 @@ __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
return hev;
}
-__inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc *oq0, uc *oq1)
+static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc *oq0, uc *oq1)
{
signed char ps0, qs0;
@@ -67,17 +61,18 @@ __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc
qs0 = (signed char) * oq0 ^ 0x80;
qs1 = (signed char) * oq1 ^ 0x80;
- // add outer taps if we have high edge variance
+ /* add outer taps if we have high edge variance */
vp8_filter = vp8_signed_char_clamp(ps1 - qs1);
vp8_filter &= hev;
- // inner taps
+ /* inner taps */
vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0));
vp8_filter &= mask;
- // save bottom 3 bits so that we round one side +4 and the other +3
- // if it equals 4 we'll set to adjust by -1 to account for the fact
- // we'd round 3 the other way
+ /* save bottom 3 bits so that we round one side +4 and the other +3
+ * if it equals 4 we'll set to adjust by -1 to account for the fact
+ * we'd round 3 the other way
+ */
Filter1 = vp8_signed_char_clamp(vp8_filter + 4);
Filter2 = vp8_signed_char_clamp(vp8_filter + 3);
Filter1 >>= 3;
@@ -88,7 +83,7 @@ __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc
*op0 = u ^ 0x80;
vp8_filter = Filter1;
- // outer tap adjustments
+ /* outer tap adjustments */
vp8_filter += 1;
vp8_filter >>= 1;
vp8_filter &= ~hev;
@@ -102,19 +97,20 @@ __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc
void vp8_loop_filter_horizontal_edge_c
(
unsigned char *s,
- int p, //pitch
+ int p, /* pitch */
const signed char *flimit,
const signed char *limit,
const signed char *thresh,
int count
)
{
- int hev = 0; // high edge variance
+ int hev = 0; /* high edge variance */
signed char mask = 0;
int i = 0;
- // loop filter designed to work using chars so that we can make maximum use
- // of 8 bit simd instructions.
+ /* loop filter designed to work using chars so that we can make maximum use
+ * of 8 bit simd instructions.
+ */
do
{
mask = vp8_filter_mask(limit[i], flimit[i],
@@ -140,12 +136,13 @@ void vp8_loop_filter_vertical_edge_c
int count
)
{
- int hev = 0; // high edge variance
+ int hev = 0; /* high edge variance */
signed char mask = 0;
int i = 0;
- // loop filter designed to work using chars so that we can make maximum use
- // of 8 bit simd instructions.
+ /* loop filter designed to work using chars so that we can make maximum use
+ * of 8 bit simd instructions.
+ */
do
{
mask = vp8_filter_mask(limit[i], flimit[i],
@@ -160,7 +157,7 @@ void vp8_loop_filter_vertical_edge_c
while (++i < count * 8);
}
-__inline void vp8_mbfilter(signed char mask, signed char hev,
+static __inline void vp8_mbfilter(signed char mask, signed char hev,
uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
{
signed char s, u;
@@ -172,7 +169,7 @@ __inline void vp8_mbfilter(signed char mask, signed char hev,
signed char qs1 = (signed char) * oq1 ^ 0x80;
signed char qs2 = (signed char) * oq2 ^ 0x80;
- // add outer taps if we have high edge variance
+ /* add outer taps if we have high edge variance */
vp8_filter = vp8_signed_char_clamp(ps1 - qs1);
vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0));
vp8_filter &= mask;
@@ -180,7 +177,7 @@ __inline void vp8_mbfilter(signed char mask, signed char hev,
Filter2 = vp8_filter;
Filter2 &= hev;
- // save bottom 3 bits so that we round one side +4 and the other +3
+ /* save bottom 3 bits so that we round one side +4 and the other +3 */
Filter1 = vp8_signed_char_clamp(Filter2 + 4);
Filter2 = vp8_signed_char_clamp(Filter2 + 3);
Filter1 >>= 3;
@@ -189,25 +186,25 @@ __inline void vp8_mbfilter(signed char mask, signed char hev,
ps0 = vp8_signed_char_clamp(ps0 + Filter2);
- // only apply wider filter if not high edge variance
+ /* only apply wider filter if not high edge variance */
vp8_filter &= ~hev;
Filter2 = vp8_filter;
- // roughly 3/7th difference across boundary
+ /* roughly 3/7th difference across boundary */
u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7);
s = vp8_signed_char_clamp(qs0 - u);
*oq0 = s ^ 0x80;
s = vp8_signed_char_clamp(ps0 + u);
*op0 = s ^ 0x80;
- // roughly 2/7th difference across boundary
+ /* roughly 2/7th difference across boundary */
u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7);
s = vp8_signed_char_clamp(qs1 - u);
*oq1 = s ^ 0x80;
s = vp8_signed_char_clamp(ps1 + u);
*op1 = s ^ 0x80;
- // roughly 1/7th difference across boundary
+ /* roughly 1/7th difference across boundary */
u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7);
s = vp8_signed_char_clamp(qs2 - u);
*oq2 = s ^ 0x80;
@@ -225,12 +222,13 @@ void vp8_mbloop_filter_horizontal_edge_c
int count
)
{
- signed char hev = 0; // high edge variance
+ signed char hev = 0; /* high edge variance */
signed char mask = 0;
int i = 0;
- // loop filter designed to work using chars so that we can make maximum use
- // of 8 bit simd instructions.
+ /* loop filter designed to work using chars so that we can make maximum use
+ * of 8 bit simd instructions.
+ */
do
{
@@ -259,7 +257,7 @@ void vp8_mbloop_filter_vertical_edge_c
int count
)
{
- signed char hev = 0; // high edge variance
+ signed char hev = 0; /* high edge variance */
signed char mask = 0;
int i = 0;
@@ -279,21 +277,18 @@ void vp8_mbloop_filter_vertical_edge_c
}
-// should we apply any filter at all ( 11111111 yes, 00000000 no)
-__inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1)
+/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
+static __inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1)
{
-// Why does this cause problems for win32?
-// error C2143: syntax error : missing ';' before 'type'
-// (void) limit;
-#ifndef NEW_LOOPFILTER_MASK
- signed char mask = (abs(p0 - q0) <= flimit) * -1;
-#else
+/* Why does this cause problems for win32?
+ * error C2143: syntax error : missing ';' before 'type'
+ * (void) limit;
+ */
signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= flimit * 2 + limit) * -1;
-#endif
return mask;
}
-__inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
+static __inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
{
signed char vp8_filter, Filter1, Filter2;
signed char p1 = (signed char) * op1 ^ 0x80;
@@ -306,7 +301,7 @@ __inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc
vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (q0 - p0));
vp8_filter &= mask;
- // save bottom 3 bits so that we round one side +4 and the other +3
+ /* save bottom 3 bits so that we round one side +4 and the other +3 */
Filter1 = vp8_signed_char_clamp(vp8_filter + 4);
Filter1 >>= 3;
u = vp8_signed_char_clamp(q0 - Filter1);
@@ -334,7 +329,7 @@ void vp8_loop_filter_simple_horizontal_edge_c
do
{
- //mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1*p],s[0*p]);
+ /*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1*p],s[0*p]);*/
mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
++s;
@@ -358,7 +353,7 @@ void vp8_loop_filter_simple_vertical_edge_c
do
{
- //mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1],s[0]);
+ /*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1],s[0]);*/
mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2], s[-1], s[0], s[1]);
vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
s += p;
diff --git a/vp8/common/mac_specs.h b/vp8/common/mac_specs.h
index 97bffc776..4b8ee5877 100644
--- a/vp8/common/mac_specs.h
+++ b/vp8/common/mac_specs.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/mbpitch.c b/vp8/common/mbpitch.c
index a7e0ce99a..af55e2fe0 100644
--- a/vp8/common/mbpitch.c
+++ b/vp8/common/mbpitch.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -13,7 +14,7 @@
typedef enum
{
PRED = 0,
- DEST = 1,
+ DEST = 1
} BLOCKSET;
void vp8_setup_block
@@ -61,13 +62,13 @@ void vp8_setup_macroblock(MACROBLOCKD *x, BLOCKSET bs)
v = &x->pre.v_buffer;
}
- for (block = 0; block < 16; block++) // y blocks
+ for (block = 0; block < 16; block++) /* y blocks */
{
vp8_setup_block(&x->block[block], x->dst.y_stride, y, x->dst.y_stride,
(block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4, bs);
}
- for (block = 16; block < 20; block++) // U and V blocks
+ for (block = 16; block < 20; block++) /* U and V blocks */
{
vp8_setup_block(&x->block[block], x->dst.uv_stride, u, x->dst.uv_stride,
((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4, bs);
@@ -122,7 +123,7 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x)
void vp8_build_block_doffsets(MACROBLOCKD *x)
{
- // handle the destination pitch features
+ /* handle the destination pitch features */
vp8_setup_macroblock(x, DEST);
vp8_setup_macroblock(x, PRED);
}
diff --git a/vp8/common/modecont.c b/vp8/common/modecont.c
index 9301a2567..86a74bc0f 100644
--- a/vp8/common/modecont.c
+++ b/vp8/common/modecont.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -13,27 +14,27 @@
const int vp8_mode_contexts[6][4] =
{
{
- // 0
+ /* 0 */
7, 1, 1, 143,
},
{
- // 1
+ /* 1 */
14, 18, 14, 107,
},
{
- // 2
+ /* 2 */
135, 64, 57, 68,
},
{
- // 3
+ /* 3 */
60, 56, 128, 65,
},
{
- // 4
+ /* 4 */
159, 134, 128, 34,
},
{
- // 5
+ /* 5 */
234, 188, 128, 28,
},
};
diff --git a/vp8/common/modecont.h b/vp8/common/modecont.h
index 0c57651ed..24db88295 100644
--- a/vp8/common/modecont.h
+++ b/vp8/common/modecont.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/modecontext.c b/vp8/common/modecontext.c
index ceee74c70..a31a561c8 100644
--- a/vp8/common/modecontext.c
+++ b/vp8/common/modecontext.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -13,133 +14,133 @@
const unsigned int vp8_kf_default_bmode_counts [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES] =
{
{
- //Above Mode : 0
- { 43438, 2195, 470, 316, 615, 171, 217, 412, 124, 160, }, // left_mode 0
- { 5722, 2751, 296, 291, 81, 68, 80, 101, 100, 170, }, // left_mode 1
- { 1629, 201, 307, 25, 47, 16, 34, 72, 19, 28, }, // left_mode 2
- { 332, 266, 36, 500, 20, 65, 23, 14, 154, 106, }, // left_mode 3
- { 450, 97, 10, 24, 117, 10, 2, 12, 8, 71, }, // left_mode 4
- { 384, 49, 29, 44, 12, 162, 51, 5, 87, 42, }, // left_mode 5
- { 495, 53, 157, 27, 14, 57, 180, 17, 17, 34, }, // left_mode 6
- { 695, 64, 62, 9, 27, 5, 3, 147, 10, 26, }, // left_mode 7
- { 230, 54, 20, 124, 16, 125, 29, 12, 283, 37, }, // left_mode 8
- { 260, 87, 21, 120, 32, 16, 33, 16, 33, 203, }, // left_mode 9
+ /*Above Mode : 0*/
+ { 43438, 2195, 470, 316, 615, 171, 217, 412, 124, 160, }, /* left_mode 0 */
+ { 5722, 2751, 296, 291, 81, 68, 80, 101, 100, 170, }, /* left_mode 1 */
+ { 1629, 201, 307, 25, 47, 16, 34, 72, 19, 28, }, /* left_mode 2 */
+ { 332, 266, 36, 500, 20, 65, 23, 14, 154, 106, }, /* left_mode 3 */
+ { 450, 97, 10, 24, 117, 10, 2, 12, 8, 71, }, /* left_mode 4 */
+ { 384, 49, 29, 44, 12, 162, 51, 5, 87, 42, }, /* left_mode 5 */
+ { 495, 53, 157, 27, 14, 57, 180, 17, 17, 34, }, /* left_mode 6 */
+ { 695, 64, 62, 9, 27, 5, 3, 147, 10, 26, }, /* left_mode 7 */
+ { 230, 54, 20, 124, 16, 125, 29, 12, 283, 37, }, /* left_mode 8 */
+ { 260, 87, 21, 120, 32, 16, 33, 16, 33, 203, }, /* left_mode 9 */
},
{
- //Above Mode : 1
- { 3934, 2573, 355, 137, 128, 87, 133, 117, 37, 27, }, // left_mode 0
- { 1036, 1929, 278, 135, 27, 37, 48, 55, 41, 91, }, // left_mode 1
- { 223, 256, 253, 15, 13, 9, 28, 64, 3, 3, }, // left_mode 2
- { 120, 129, 17, 316, 15, 11, 9, 4, 53, 74, }, // left_mode 3
- { 129, 58, 6, 11, 38, 2, 0, 5, 2, 67, }, // left_mode 4
- { 53, 22, 11, 16, 8, 26, 14, 3, 19, 12, }, // left_mode 5
- { 59, 26, 61, 11, 4, 9, 35, 13, 8, 8, }, // left_mode 6
- { 101, 52, 40, 8, 5, 2, 8, 59, 2, 20, }, // left_mode 7
- { 48, 34, 10, 52, 8, 15, 6, 6, 63, 20, }, // left_mode 8
- { 96, 48, 22, 63, 11, 14, 5, 8, 9, 96, }, // left_mode 9
+ /*Above Mode : 1*/
+ { 3934, 2573, 355, 137, 128, 87, 133, 117, 37, 27, }, /* left_mode 0 */
+ { 1036, 1929, 278, 135, 27, 37, 48, 55, 41, 91, }, /* left_mode 1 */
+ { 223, 256, 253, 15, 13, 9, 28, 64, 3, 3, }, /* left_mode 2 */
+ { 120, 129, 17, 316, 15, 11, 9, 4, 53, 74, }, /* left_mode 3 */
+ { 129, 58, 6, 11, 38, 2, 0, 5, 2, 67, }, /* left_mode 4 */
+ { 53, 22, 11, 16, 8, 26, 14, 3, 19, 12, }, /* left_mode 5 */
+ { 59, 26, 61, 11, 4, 9, 35, 13, 8, 8, }, /* left_mode 6 */
+ { 101, 52, 40, 8, 5, 2, 8, 59, 2, 20, }, /* left_mode 7 */
+ { 48, 34, 10, 52, 8, 15, 6, 6, 63, 20, }, /* left_mode 8 */
+ { 96, 48, 22, 63, 11, 14, 5, 8, 9, 96, }, /* left_mode 9 */
},
{
- //Above Mode : 2
- { 709, 461, 506, 36, 27, 33, 151, 98, 24, 6, }, // left_mode 0
- { 201, 375, 442, 27, 13, 8, 46, 58, 6, 19, }, // left_mode 1
- { 122, 140, 417, 4, 13, 3, 33, 59, 4, 2, }, // left_mode 2
- { 36, 17, 22, 16, 6, 8, 12, 17, 9, 21, }, // left_mode 3
- { 51, 15, 7, 1, 14, 0, 4, 5, 3, 22, }, // left_mode 4
- { 18, 11, 30, 9, 7, 20, 11, 5, 2, 6, }, // left_mode 5
- { 38, 21, 103, 9, 4, 12, 79, 13, 2, 5, }, // left_mode 6
- { 64, 17, 66, 2, 12, 4, 2, 65, 4, 5, }, // left_mode 7
- { 14, 7, 7, 16, 3, 11, 4, 13, 15, 16, }, // left_mode 8
- { 36, 8, 32, 9, 9, 4, 14, 7, 6, 24, }, // left_mode 9
+ /*Above Mode : 2*/
+ { 709, 461, 506, 36, 27, 33, 151, 98, 24, 6, }, /* left_mode 0 */
+ { 201, 375, 442, 27, 13, 8, 46, 58, 6, 19, }, /* left_mode 1 */
+ { 122, 140, 417, 4, 13, 3, 33, 59, 4, 2, }, /* left_mode 2 */
+ { 36, 17, 22, 16, 6, 8, 12, 17, 9, 21, }, /* left_mode 3 */
+ { 51, 15, 7, 1, 14, 0, 4, 5, 3, 22, }, /* left_mode 4 */
+ { 18, 11, 30, 9, 7, 20, 11, 5, 2, 6, }, /* left_mode 5 */
+ { 38, 21, 103, 9, 4, 12, 79, 13, 2, 5, }, /* left_mode 6 */
+ { 64, 17, 66, 2, 12, 4, 2, 65, 4, 5, }, /* left_mode 7 */
+ { 14, 7, 7, 16, 3, 11, 4, 13, 15, 16, }, /* left_mode 8 */
+ { 36, 8, 32, 9, 9, 4, 14, 7, 6, 24, }, /* left_mode 9 */
},
{
- //Above Mode : 3
- { 1340, 173, 36, 119, 30, 10, 13, 10, 20, 26, }, // left_mode 0
- { 156, 293, 26, 108, 5, 16, 2, 4, 23, 30, }, // left_mode 1
- { 60, 34, 13, 7, 3, 3, 0, 8, 4, 5, }, // left_mode 2
- { 72, 64, 1, 235, 3, 9, 2, 7, 28, 38, }, // left_mode 3
- { 29, 14, 1, 3, 5, 0, 2, 2, 5, 13, }, // left_mode 4
- { 22, 7, 4, 11, 2, 5, 1, 2, 6, 4, }, // left_mode 5
- { 18, 14, 5, 6, 4, 3, 14, 0, 9, 2, }, // left_mode 6
- { 41, 10, 7, 1, 2, 0, 0, 10, 2, 1, }, // left_mode 7
- { 23, 19, 2, 33, 1, 5, 2, 0, 51, 8, }, // left_mode 8
- { 33, 26, 7, 53, 3, 9, 3, 3, 9, 19, }, // left_mode 9
+ /*Above Mode : 3*/
+ { 1340, 173, 36, 119, 30, 10, 13, 10, 20, 26, }, /* left_mode 0 */
+ { 156, 293, 26, 108, 5, 16, 2, 4, 23, 30, }, /* left_mode 1 */
+ { 60, 34, 13, 7, 3, 3, 0, 8, 4, 5, }, /* left_mode 2 */
+ { 72, 64, 1, 235, 3, 9, 2, 7, 28, 38, }, /* left_mode 3 */
+ { 29, 14, 1, 3, 5, 0, 2, 2, 5, 13, }, /* left_mode 4 */
+ { 22, 7, 4, 11, 2, 5, 1, 2, 6, 4, }, /* left_mode 5 */
+ { 18, 14, 5, 6, 4, 3, 14, 0, 9, 2, }, /* left_mode 6 */
+ { 41, 10, 7, 1, 2, 0, 0, 10, 2, 1, }, /* left_mode 7 */
+ { 23, 19, 2, 33, 1, 5, 2, 0, 51, 8, }, /* left_mode 8 */
+ { 33, 26, 7, 53, 3, 9, 3, 3, 9, 19, }, /* left_mode 9 */
},
{
- //Above Mode : 4
- { 410, 165, 43, 31, 66, 15, 30, 54, 8, 17, }, // left_mode 0
- { 115, 64, 27, 18, 30, 7, 11, 15, 4, 19, }, // left_mode 1
- { 31, 23, 25, 1, 7, 2, 2, 10, 0, 5, }, // left_mode 2
- { 17, 4, 1, 6, 8, 2, 7, 5, 5, 21, }, // left_mode 3
- { 120, 12, 1, 2, 83, 3, 0, 4, 1, 40, }, // left_mode 4
- { 4, 3, 1, 2, 1, 2, 5, 0, 3, 6, }, // left_mode 5
- { 10, 2, 13, 6, 6, 6, 8, 2, 4, 5, }, // left_mode 6
- { 58, 10, 5, 1, 28, 1, 1, 33, 1, 9, }, // left_mode 7
- { 8, 2, 1, 4, 2, 5, 1, 1, 2, 10, }, // left_mode 8
- { 76, 7, 5, 7, 18, 2, 2, 0, 5, 45, }, // left_mode 9
+ /*Above Mode : 4*/
+ { 410, 165, 43, 31, 66, 15, 30, 54, 8, 17, }, /* left_mode 0 */
+ { 115, 64, 27, 18, 30, 7, 11, 15, 4, 19, }, /* left_mode 1 */
+ { 31, 23, 25, 1, 7, 2, 2, 10, 0, 5, }, /* left_mode 2 */
+ { 17, 4, 1, 6, 8, 2, 7, 5, 5, 21, }, /* left_mode 3 */
+ { 120, 12, 1, 2, 83, 3, 0, 4, 1, 40, }, /* left_mode 4 */
+ { 4, 3, 1, 2, 1, 2, 5, 0, 3, 6, }, /* left_mode 5 */
+ { 10, 2, 13, 6, 6, 6, 8, 2, 4, 5, }, /* left_mode 6 */
+ { 58, 10, 5, 1, 28, 1, 1, 33, 1, 9, }, /* left_mode 7 */
+ { 8, 2, 1, 4, 2, 5, 1, 1, 2, 10, }, /* left_mode 8 */
+ { 76, 7, 5, 7, 18, 2, 2, 0, 5, 45, }, /* left_mode 9 */
},
{
- //Above Mode : 5
- { 444, 46, 47, 20, 14, 110, 60, 14, 60, 7, }, // left_mode 0
- { 59, 57, 25, 18, 3, 17, 21, 6, 14, 6, }, // left_mode 1
- { 24, 17, 20, 6, 4, 13, 7, 2, 3, 2, }, // left_mode 2
- { 13, 11, 5, 14, 4, 9, 2, 4, 15, 7, }, // left_mode 3
- { 8, 5, 2, 1, 4, 0, 1, 1, 2, 12, }, // left_mode 4
- { 19, 5, 5, 7, 4, 40, 6, 3, 10, 4, }, // left_mode 5
- { 16, 5, 9, 1, 1, 16, 26, 2, 10, 4, }, // left_mode 6
- { 11, 4, 8, 1, 1, 4, 4, 5, 4, 1, }, // left_mode 7
- { 15, 1, 3, 7, 3, 21, 7, 1, 34, 5, }, // left_mode 8
- { 18, 5, 1, 3, 4, 3, 7, 1, 2, 9, }, // left_mode 9
+ /*Above Mode : 5*/
+ { 444, 46, 47, 20, 14, 110, 60, 14, 60, 7, }, /* left_mode 0 */
+ { 59, 57, 25, 18, 3, 17, 21, 6, 14, 6, }, /* left_mode 1 */
+ { 24, 17, 20, 6, 4, 13, 7, 2, 3, 2, }, /* left_mode 2 */
+ { 13, 11, 5, 14, 4, 9, 2, 4, 15, 7, }, /* left_mode 3 */
+ { 8, 5, 2, 1, 4, 0, 1, 1, 2, 12, }, /* left_mode 4 */
+ { 19, 5, 5, 7, 4, 40, 6, 3, 10, 4, }, /* left_mode 5 */
+ { 16, 5, 9, 1, 1, 16, 26, 2, 10, 4, }, /* left_mode 6 */
+ { 11, 4, 8, 1, 1, 4, 4, 5, 4, 1, }, /* left_mode 7 */
+ { 15, 1, 3, 7, 3, 21, 7, 1, 34, 5, }, /* left_mode 8 */
+ { 18, 5, 1, 3, 4, 3, 7, 1, 2, 9, }, /* left_mode 9 */
},
{
- //Above Mode : 6
- { 476, 149, 94, 13, 14, 77, 291, 27, 23, 3, }, // left_mode 0
- { 79, 83, 42, 14, 2, 12, 63, 2, 4, 14, }, // left_mode 1
- { 43, 36, 55, 1, 3, 8, 42, 11, 5, 1, }, // left_mode 2
- { 9, 9, 6, 16, 1, 5, 6, 3, 11, 10, }, // left_mode 3
- { 10, 3, 1, 3, 10, 1, 0, 1, 1, 4, }, // left_mode 4
- { 14, 6, 15, 5, 1, 20, 25, 2, 5, 0, }, // left_mode 5
- { 28, 7, 51, 1, 0, 8, 127, 6, 2, 5, }, // left_mode 6
- { 13, 3, 3, 2, 3, 1, 2, 8, 1, 2, }, // left_mode 7
- { 10, 3, 3, 3, 3, 8, 2, 2, 9, 3, }, // left_mode 8
- { 13, 7, 11, 4, 0, 4, 6, 2, 5, 8, }, // left_mode 9
+ /*Above Mode : 6*/
+ { 476, 149, 94, 13, 14, 77, 291, 27, 23, 3, }, /* left_mode 0 */
+ { 79, 83, 42, 14, 2, 12, 63, 2, 4, 14, }, /* left_mode 1 */
+ { 43, 36, 55, 1, 3, 8, 42, 11, 5, 1, }, /* left_mode 2 */
+ { 9, 9, 6, 16, 1, 5, 6, 3, 11, 10, }, /* left_mode 3 */
+ { 10, 3, 1, 3, 10, 1, 0, 1, 1, 4, }, /* left_mode 4 */
+ { 14, 6, 15, 5, 1, 20, 25, 2, 5, 0, }, /* left_mode 5 */
+ { 28, 7, 51, 1, 0, 8, 127, 6, 2, 5, }, /* left_mode 6 */
+ { 13, 3, 3, 2, 3, 1, 2, 8, 1, 2, }, /* left_mode 7 */
+ { 10, 3, 3, 3, 3, 8, 2, 2, 9, 3, }, /* left_mode 8 */
+ { 13, 7, 11, 4, 0, 4, 6, 2, 5, 8, }, /* left_mode 9 */
},
{
- //Above Mode : 7
- { 376, 135, 119, 6, 32, 8, 31, 224, 9, 3, }, // left_mode 0
- { 93, 60, 54, 6, 13, 7, 8, 92, 2, 12, }, // left_mode 1
- { 74, 36, 84, 0, 3, 2, 9, 67, 2, 1, }, // left_mode 2
- { 19, 4, 4, 8, 8, 2, 4, 7, 6, 16, }, // left_mode 3
- { 51, 7, 4, 1, 77, 3, 0, 14, 1, 15, }, // left_mode 4
- { 7, 7, 5, 7, 4, 7, 4, 5, 0, 3, }, // left_mode 5
- { 18, 2, 19, 2, 2, 4, 12, 11, 1, 2, }, // left_mode 6
- { 129, 6, 27, 1, 21, 3, 0, 189, 0, 6, }, // left_mode 7
- { 9, 1, 2, 8, 3, 7, 0, 5, 3, 3, }, // left_mode 8
- { 20, 4, 5, 10, 4, 2, 7, 17, 3, 16, }, // left_mode 9
+ /*Above Mode : 7*/
+ { 376, 135, 119, 6, 32, 8, 31, 224, 9, 3, }, /* left_mode 0 */
+ { 93, 60, 54, 6, 13, 7, 8, 92, 2, 12, }, /* left_mode 1 */
+ { 74, 36, 84, 0, 3, 2, 9, 67, 2, 1, }, /* left_mode 2 */
+ { 19, 4, 4, 8, 8, 2, 4, 7, 6, 16, }, /* left_mode 3 */
+ { 51, 7, 4, 1, 77, 3, 0, 14, 1, 15, }, /* left_mode 4 */
+ { 7, 7, 5, 7, 4, 7, 4, 5, 0, 3, }, /* left_mode 5 */
+ { 18, 2, 19, 2, 2, 4, 12, 11, 1, 2, }, /* left_mode 6 */
+ { 129, 6, 27, 1, 21, 3, 0, 189, 0, 6, }, /* left_mode 7 */
+ { 9, 1, 2, 8, 3, 7, 0, 5, 3, 3, }, /* left_mode 8 */
+ { 20, 4, 5, 10, 4, 2, 7, 17, 3, 16, }, /* left_mode 9 */
},
{
- //Above Mode : 8
- { 617, 68, 34, 79, 11, 27, 25, 14, 75, 13, }, // left_mode 0
- { 51, 82, 21, 26, 6, 12, 13, 1, 26, 16, }, // left_mode 1
- { 29, 9, 12, 11, 3, 7, 1, 10, 2, 2, }, // left_mode 2
- { 17, 19, 11, 74, 4, 3, 2, 0, 58, 13, }, // left_mode 3
- { 10, 1, 1, 3, 4, 1, 0, 2, 1, 8, }, // left_mode 4
- { 14, 4, 5, 5, 1, 13, 2, 0, 27, 8, }, // left_mode 5
- { 10, 3, 5, 4, 1, 7, 6, 4, 5, 1, }, // left_mode 6
- { 10, 2, 6, 2, 1, 1, 1, 4, 2, 1, }, // left_mode 7
- { 14, 8, 5, 23, 2, 12, 6, 2, 117, 5, }, // left_mode 8
- { 9, 6, 2, 19, 1, 6, 3, 2, 9, 9, }, // left_mode 9
+ /*Above Mode : 8*/
+ { 617, 68, 34, 79, 11, 27, 25, 14, 75, 13, }, /* left_mode 0 */
+ { 51, 82, 21, 26, 6, 12, 13, 1, 26, 16, }, /* left_mode 1 */
+ { 29, 9, 12, 11, 3, 7, 1, 10, 2, 2, }, /* left_mode 2 */
+ { 17, 19, 11, 74, 4, 3, 2, 0, 58, 13, }, /* left_mode 3 */
+ { 10, 1, 1, 3, 4, 1, 0, 2, 1, 8, }, /* left_mode 4 */
+ { 14, 4, 5, 5, 1, 13, 2, 0, 27, 8, }, /* left_mode 5 */
+ { 10, 3, 5, 4, 1, 7, 6, 4, 5, 1, }, /* left_mode 6 */
+ { 10, 2, 6, 2, 1, 1, 1, 4, 2, 1, }, /* left_mode 7 */
+ { 14, 8, 5, 23, 2, 12, 6, 2, 117, 5, }, /* left_mode 8 */
+ { 9, 6, 2, 19, 1, 6, 3, 2, 9, 9, }, /* left_mode 9 */
},
{
- //Above Mode : 9
- { 680, 73, 22, 38, 42, 5, 11, 9, 6, 28, }, // left_mode 0
- { 113, 112, 21, 22, 10, 2, 8, 4, 6, 42, }, // left_mode 1
- { 44, 20, 24, 6, 5, 4, 3, 3, 1, 2, }, // left_mode 2
- { 40, 23, 7, 71, 5, 2, 4, 1, 7, 22, }, // left_mode 3
- { 85, 9, 4, 4, 17, 2, 0, 3, 2, 23, }, // left_mode 4
- { 13, 4, 2, 6, 1, 7, 0, 1, 7, 6, }, // left_mode 5
- { 26, 6, 8, 3, 2, 3, 8, 1, 5, 4, }, // left_mode 6
- { 54, 8, 9, 6, 7, 0, 1, 11, 1, 3, }, // left_mode 7
- { 9, 10, 4, 13, 2, 5, 4, 2, 14, 8, }, // left_mode 8
- { 92, 9, 5, 19, 15, 3, 3, 1, 6, 58, }, // left_mode 9
+ /*Above Mode : 9*/
+ { 680, 73, 22, 38, 42, 5, 11, 9, 6, 28, }, /* left_mode 0 */
+ { 113, 112, 21, 22, 10, 2, 8, 4, 6, 42, }, /* left_mode 1 */
+ { 44, 20, 24, 6, 5, 4, 3, 3, 1, 2, }, /* left_mode 2 */
+ { 40, 23, 7, 71, 5, 2, 4, 1, 7, 22, }, /* left_mode 3 */
+ { 85, 9, 4, 4, 17, 2, 0, 3, 2, 23, }, /* left_mode 4 */
+ { 13, 4, 2, 6, 1, 7, 0, 1, 7, 6, }, /* left_mode 5 */
+ { 26, 6, 8, 3, 2, 3, 8, 1, 5, 4, }, /* left_mode 6 */
+ { 54, 8, 9, 6, 7, 0, 1, 11, 1, 3, }, /* left_mode 7 */
+ { 9, 10, 4, 13, 2, 5, 4, 2, 14, 8, }, /* left_mode 8 */
+ { 92, 9, 5, 19, 15, 3, 3, 1, 6, 58, }, /* left_mode 9 */
},
};
diff --git a/vp8/common/mv.h b/vp8/common/mv.h
index 3d8418108..73c91b9e7 100644
--- a/vp8/common/mv.h
+++ b/vp8/common/mv.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h
index 428721996..a006306db 100644
--- a/vp8/common/onyx.h
+++ b/vp8/common/onyx.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index 94632dac9..f60b0f3f5 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -20,9 +21,9 @@
#include "recon.h"
#include "postproc.h"
-//#ifdef PACKET_TESTING
+/*#ifdef PACKET_TESTING*/
#include "header.h"
-//#endif
+/*#endif*/
/* Create/destroy static data structures. */
@@ -32,6 +33,7 @@ void vp8_initialize_common(void);
#define MAXQ 127
#define QINDEX_RANGE (MAXQ + 1)
+#define NUM_YV12_BUFFERS 4
typedef struct frame_contexts
{
@@ -41,7 +43,7 @@ typedef struct frame_contexts
vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1];
vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1];
MV_CONTEXT mvc[2];
- MV_CONTEXT pre_mvc[2]; //not to caculate the mvcost for the frame if mvc doesn't change.
+ MV_CONTEXT pre_mvc[2]; /* not to caculate the mvcost for the frame if mvc doesn't change. */
} FRAME_CONTEXT;
typedef enum
@@ -72,6 +74,7 @@ typedef struct VP8_COMMON_RTCD
vp8_subpix_rtcd_vtable_t subpix;
vp8_loopfilter_rtcd_vtable_t loopfilter;
vp8_postproc_rtcd_vtable_t postproc;
+ int flags;
#else
int unused;
#endif
@@ -81,9 +84,9 @@ typedef struct VP8Common
{
struct vpx_internal_error_info error;
- DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][4][4]);
- DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][4][4]);
- DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][4][4]);
+ DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][16]);
int Width;
int Height;
@@ -93,15 +96,16 @@ typedef struct VP8Common
YUV_TYPE clr_type;
CLAMP_TYPE clamp_type;
- YV12_BUFFER_CONFIG last_frame;
- YV12_BUFFER_CONFIG golden_frame;
- YV12_BUFFER_CONFIG alt_ref_frame;
- YV12_BUFFER_CONFIG new_frame;
YV12_BUFFER_CONFIG *frame_to_show;
+
+ YV12_BUFFER_CONFIG yv12_fb[NUM_YV12_BUFFERS];
+ int fb_idx_ref_cnt[NUM_YV12_BUFFERS];
+ int new_fb_idx, lst_fb_idx, gld_fb_idx, alt_fb_idx;
+
YV12_BUFFER_CONFIG post_proc_buffer;
YV12_BUFFER_CONFIG temp_scale_frame;
- FRAME_TYPE last_frame_type; //Add to check if vp8_frame_init_loop_filter() can be skiped.
+ FRAME_TYPE last_frame_type; /* Add to check if vp8_frame_init_loop_filter() can be skipped. */
FRAME_TYPE frame_type;
int show_frame;
@@ -112,7 +116,7 @@ typedef struct VP8Common
int mb_cols;
int mode_info_stride;
- // prfile settings
+ /* profile settings */
int experimental;
int mb_no_coeff_skip;
int no_lpf;
@@ -121,7 +125,7 @@ typedef struct VP8Common
int full_pixel;
int base_qindex;
- int last_kf_gf_q; // Q used on the last GF or KF
+ int last_kf_gf_q; /* Q used on the last GF or KF */
int y1dc_delta_q;
int y2dc_delta_q;
@@ -131,8 +135,6 @@ typedef struct VP8Common
unsigned int frames_since_golden;
unsigned int frames_till_alt_ref_frame;
- unsigned char *gf_active_flags; // Record of which MBs still refer to last golden frame either directly or through 0,0
- int gf_active_count;
/* We allocate a MODE_INFO struct for each macroblock, together with
an extra row on top and column on the left to simplify prediction. */
@@ -153,31 +155,31 @@ typedef struct VP8Common
int last_sharpness_level;
int sharpness_level;
- int refresh_last_frame; // Two state 0 = NO, 1 = YES
- int refresh_golden_frame; // Two state 0 = NO, 1 = YES
- int refresh_alt_ref_frame; // Two state 0 = NO, 1 = YES
+ int refresh_last_frame; /* Two state 0 = NO, 1 = YES */
+ int refresh_golden_frame; /* Two state 0 = NO, 1 = YES */
+ int refresh_alt_ref_frame; /* Two state 0 = NO, 1 = YES */
- int copy_buffer_to_gf; // 0 none, 1 Last to GF, 2 ARF to GF
- int copy_buffer_to_arf; // 0 none, 1 Last to ARF, 2 GF to ARF
+ int copy_buffer_to_gf; /* 0 none, 1 Last to GF, 2 ARF to GF */
+ int copy_buffer_to_arf; /* 0 none, 1 Last to ARF, 2 GF to ARF */
- int refresh_entropy_probs; // Two state 0 = NO, 1 = YES
+ int refresh_entropy_probs; /* Two state 0 = NO, 1 = YES */
- int ref_frame_sign_bias[MAX_REF_FRAMES]; // Two state 0, 1
+ int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */
- // Y,U,V,Y2
- ENTROPY_CONTEXT *above_context[4]; // row of context for each plane
- ENTROPY_CONTEXT left_context[4][4]; // (up to) 4 contexts ""
+ /* Y,U,V,Y2 */
+ ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */
+ ENTROPY_CONTEXT_PLANES left_context; /* (up to) 4 contexts "" */
- // keyframe block modes are predicted by their above, left neighbors
+ /* keyframe block modes are predicted by their above, left neighbors */
vp8_prob kf_bmode_prob [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1];
vp8_prob kf_ymode_prob [VP8_YMODES-1]; /* keyframe "" */
vp8_prob kf_uv_mode_prob [VP8_UV_MODES-1];
- FRAME_CONTEXT lfc; // last frame entropy
- FRAME_CONTEXT fc; // this frame entropy
+ FRAME_CONTEXT lfc; /* last frame entropy */
+ FRAME_CONTEXT fc; /* this frame entropy */
unsigned int current_video_frame;
@@ -201,6 +203,7 @@ typedef struct VP8Common
void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level);
void vp8_init_loop_filter(VP8_COMMON *cm);
+void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type);
extern void vp8_loop_filter_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val);
#endif
diff --git a/vp8/common/onyxd.h b/vp8/common/onyxd.h
index 644c0ec77..00a97d97d 100644
--- a/vp8/common/onyxd.h
+++ b/vp8/common/onyxd.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/partialgfupdate.h b/vp8/common/partialgfupdate.h
index 32a55ee6c..115134a53 100644
--- a/vp8/common/partialgfupdate.h
+++ b/vp8/common/partialgfupdate.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 0979185d6..e797e1036 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -18,7 +19,53 @@
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
-// global constants
+
+#define RGB_TO_YUV(t) \
+ ( (0.257*(float)(t>>16)) + (0.504*(float)(t>>8&0xff)) + (0.098*(float)(t&0xff)) + 16), \
+ (-(0.148*(float)(t>>16)) - (0.291*(float)(t>>8&0xff)) + (0.439*(float)(t&0xff)) + 128), \
+ ( (0.439*(float)(t>>16)) - (0.368*(float)(t>>8&0xff)) - (0.071*(float)(t&0xff)) + 128)
+
+/* global constants */
+
+static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] =
+{
+ { RGB_TO_YUV(0x98FB98) }, /* PaleGreen */
+ { RGB_TO_YUV(0x00FF00) }, /* Green */
+ { RGB_TO_YUV(0xADFF2F) }, /* GreenYellow */
+ { RGB_TO_YUV(0x228B22) }, /* ForestGreen */
+ { RGB_TO_YUV(0x006400) }, /* DarkGreen */
+ { RGB_TO_YUV(0x98F5FF) }, /* Cadet Blue */
+ { RGB_TO_YUV(0x6CA6CD) }, /* Sky Blue */
+ { RGB_TO_YUV(0x00008B) }, /* Dark blue */
+ { RGB_TO_YUV(0x551A8B) }, /* Purple */
+ { RGB_TO_YUV(0xFF0000) } /* Red */
+};
+
+static const unsigned char B_PREDICTION_MODE_colors[B_MODE_COUNT][3] =
+{
+ { RGB_TO_YUV(0x6633ff) }, /* Purple */
+ { RGB_TO_YUV(0xcc33ff) }, /* Magenta */
+ { RGB_TO_YUV(0xff33cc) }, /* Pink */
+ { RGB_TO_YUV(0xff3366) }, /* Coral */
+ { RGB_TO_YUV(0x3366ff) }, /* Blue */
+ { RGB_TO_YUV(0xed00f5) }, /* Dark Blue */
+ { RGB_TO_YUV(0x2e00b8) }, /* Dark Purple */
+ { RGB_TO_YUV(0xff6633) }, /* Orange */
+ { RGB_TO_YUV(0x33ccff) }, /* Light Blue */
+ { RGB_TO_YUV(0x8ab800) }, /* Green */
+ { RGB_TO_YUV(0xffcc33) }, /* Light Orange */
+ { RGB_TO_YUV(0x33ffcc) }, /* Aqua */
+ { RGB_TO_YUV(0x66ff33) }, /* Light Green */
+ { RGB_TO_YUV(0xccff33) }, /* Yellow */
+};
+
+static const unsigned char MV_REFERENCE_FRAME_colors[MB_MODE_COUNT][3] =
+{
+ { RGB_TO_YUV(0x00ff00) }, /* Blue */
+ { RGB_TO_YUV(0x0000ff) }, /* Green */
+ { RGB_TO_YUV(0xffff00) }, /* Yellow */
+ { RGB_TO_YUV(0xff0000) }, /* Red */
+};
static const short kernel5[] =
{
@@ -75,7 +122,7 @@ const short vp8_rv[] =
extern void vp8_blit_text(const char *msg, unsigned char *address, const int pitch);
-
+extern void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch);
/***********************************************************************************************************
*/
void vp8_post_proc_down_and_across_c
@@ -100,7 +147,7 @@ void vp8_post_proc_down_and_across_c
for (row = 0; row < rows; row++)
{
- // post_proc_down for one row
+ /* post_proc_down for one row */
p_src = src_ptr;
p_dst = dst_ptr;
@@ -123,7 +170,7 @@ void vp8_post_proc_down_and_across_c
p_dst[col] = v;
}
- // now post_proc_across
+ /* now post_proc_across */
p_src = dst_ptr;
p_dst = dst_ptr;
@@ -152,12 +199,12 @@ void vp8_post_proc_down_and_across_c
p_dst[col-2] = d[(col-2)&7];
}
- //handle the last two pixels
+ /* handle the last two pixels */
p_dst[col-2] = d[(col-2)&7];
p_dst[col-1] = d[(col-1)&7];
- //next row
+ /* next row */
src_ptr += pitch;
dst_ptr += pitch;
}
@@ -329,13 +376,6 @@ void vp8_de_noise(YV12_BUFFER_CONFIG *source,
}
-
-//Notes: It is better to change CHAR to unsigned or signed to
-//avoid error on ARM platform.
-char vp8_an[8][64][3072];
-int vp8_cd[8][64];
-
-
double vp8_gaussian(double sigma, double mu, double x)
{
return 1 / (sigma * sqrt(2.0 * 3.14159265)) *
@@ -357,9 +397,9 @@ static void fillrd(struct postproc_state *state, int q, int a)
sigma = ai + .5 + .6 * (63 - qi) / 63.0;
- // set up a lookup table of 256 entries that matches
- // a gaussian distribution with sigma determined by q.
- //
+ /* set up a lookup table of 256 entries that matches
+ * a gaussian distribution with sigma determined by q.
+ */
{
double i;
int next, j;
@@ -450,6 +490,187 @@ void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
}
}
+/* Blend the macro block with a solid colored square. Leave the
+ * edges unblended to give distinction to macro blocks in areas
+ * filled with the same color block.
+ */
+void vp8_blend_mb_inner_c (unsigned char *y, unsigned char *u, unsigned char *v,
+ int y1, int u1, int v1, int alpha, int stride)
+{
+ int i, j;
+ int y1_const = y1*((1<<16)-alpha);
+ int u1_const = u1*((1<<16)-alpha);
+ int v1_const = v1*((1<<16)-alpha);
+
+ y += 2*stride + 2;
+ for (i = 0; i < 12; i++)
+ {
+ for (j = 0; j < 12; j++)
+ {
+ y[j] = (y[j]*alpha + y1_const)>>16;
+ }
+ y += stride;
+ }
+
+ stride >>= 1;
+
+ u += stride + 1;
+ v += stride + 1;
+
+ for (i = 0; i < 6; i++)
+ {
+ for (j = 0; j < 6; j++)
+ {
+ u[j] = (u[j]*alpha + u1_const)>>16;
+ v[j] = (v[j]*alpha + v1_const)>>16;
+ }
+ u += stride;
+ v += stride;
+ }
+}
+
+/* Blend only the edge of the macro block. Leave center
+ * unblended to allow for other visualizations to be layered.
+ */
+void vp8_blend_mb_outer_c (unsigned char *y, unsigned char *u, unsigned char *v,
+ int y1, int u1, int v1, int alpha, int stride)
+{
+ int i, j;
+ int y1_const = y1*((1<<16)-alpha);
+ int u1_const = u1*((1<<16)-alpha);
+ int v1_const = v1*((1<<16)-alpha);
+
+ for (i = 0; i < 2; i++)
+ {
+ for (j = 0; j < 16; j++)
+ {
+ y[j] = (y[j]*alpha + y1_const)>>16;
+ }
+ y += stride;
+ }
+
+ for (i = 0; i < 12; i++)
+ {
+ y[0] = (y[0]*alpha + y1_const)>>16;
+ y[1] = (y[1]*alpha + y1_const)>>16;
+ y[14] = (y[14]*alpha + y1_const)>>16;
+ y[15] = (y[15]*alpha + y1_const)>>16;
+ y += stride;
+ }
+
+ for (i = 0; i < 2; i++)
+ {
+ for (j = 0; j < 16; j++)
+ {
+ y[j] = (y[j]*alpha + y1_const)>>16;
+ }
+ y += stride;
+ }
+
+ stride >>= 1;
+
+ for (j = 0; j < 8; j++)
+ {
+ u[j] = (u[j]*alpha + u1_const)>>16;
+ v[j] = (v[j]*alpha + v1_const)>>16;
+ }
+ u += stride;
+ v += stride;
+
+ for (i = 0; i < 6; i++)
+ {
+ u[0] = (u[0]*alpha + u1_const)>>16;
+ v[0] = (v[0]*alpha + v1_const)>>16;
+
+ u[7] = (u[7]*alpha + u1_const)>>16;
+ v[7] = (v[7]*alpha + v1_const)>>16;
+
+ u += stride;
+ v += stride;
+ }
+
+ for (j = 0; j < 8; j++)
+ {
+ u[j] = (u[j]*alpha + u1_const)>>16;
+ v[j] = (v[j]*alpha + v1_const)>>16;
+ }
+}
+
+void vp8_blend_b_c (unsigned char *y, unsigned char *u, unsigned char *v,
+ int y1, int u1, int v1, int alpha, int stride)
+{
+ int i, j;
+ int y1_const = y1*((1<<16)-alpha);
+ int u1_const = u1*((1<<16)-alpha);
+ int v1_const = v1*((1<<16)-alpha);
+
+ for (i = 0; i < 4; i++)
+ {
+ for (j = 0; j < 4; j++)
+ {
+ y[j] = (y[j]*alpha + y1_const)>>16;
+ }
+ y += stride;
+ }
+
+ stride >>= 1;
+
+ for (i = 0; i < 2; i++)
+ {
+ for (j = 0; j < 2; j++)
+ {
+ u[j] = (u[j]*alpha + u1_const)>>16;
+ v[j] = (v[j]*alpha + v1_const)>>16;
+ }
+ u += stride;
+ v += stride;
+ }
+}
+
+static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int height)
+{
+ int dx;
+ int dy;
+
+ if (*x1 > width)
+ {
+ dx = *x1 - x0;
+ dy = *y1 - y0;
+
+ *x1 = width;
+ if (dx)
+ *y1 = ((width-x0)*dy)/dx + y0;
+ }
+ if (*x1 < 0)
+ {
+ dx = *x1 - x0;
+ dy = *y1 - y0;
+
+ *x1 = 0;
+ if (dx)
+ *y1 = ((0-x0)*dy)/dx + y0;
+ }
+ if (*y1 > height)
+ {
+ dx = *x1 - x0;
+ dy = *y1 - y0;
+
+ *y1 = height;
+ if (dy)
+ *x1 = ((height-y0)*dx)/dy + x0;
+ }
+ if (*y1 < 0)
+ {
+ dx = *x1 - x0;
+ dy = *y1 - y0;
+
+ *y1 = 0;
+ if (dy)
+ *x1 = ((0-y0)*dx)/dy + x0;
+ }
+}
+
+
#if CONFIG_RUNTIME_CPU_DETECT
#define RTCD_VTABLE(oci) (&(oci)->rtcd.postproc)
#else
@@ -471,7 +692,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
{
*dest = *oci->frame_to_show;
- // handle problem with extending borders
+ /* handle problem with extending borders */
dest->y_width = oci->Width;
dest->y_height = oci->Height;
dest->uv_height = dest->y_height / 2;
@@ -527,7 +748,8 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
oci->mb_cols, oci->mb_rows);
vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
}
- else if (flags & VP8D_DEBUG_LEVEL2)
+
+ if (flags & VP8D_DEBUG_LEVEL2)
{
int i, j;
unsigned char *y_ptr;
@@ -539,7 +761,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
y_ptr = post->y_buffer + 4 * post->y_stride + 4;
- // vp8_filter each macro block
+ /* vp8_filter each macro block */
for (i = 0; i < mb_rows; i++)
{
for (j = 0; j < mb_cols; j++)
@@ -553,12 +775,13 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
y_ptr += 16;
}
- mb_index ++; //border
+ mb_index ++; /* border */
y_ptr += post->y_stride * 16 - post->y_width;
}
}
- else if (flags & VP8D_DEBUG_LEVEL3)
+
+ if (flags & VP8D_DEBUG_LEVEL3)
{
int i, j;
unsigned char *y_ptr;
@@ -570,7 +793,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
y_ptr = post->y_buffer + 4 * post->y_stride + 4;
- // vp8_filter each macro block
+ /* vp8_filter each macro block */
for (i = 0; i < mb_rows; i++)
{
for (j = 0; j < mb_cols; j++)
@@ -587,12 +810,13 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
y_ptr += 16;
}
- mb_index ++; //border
+ mb_index ++; /* border */
y_ptr += post->y_stride * 16 - post->y_width;
}
}
- else if (flags & VP8D_DEBUG_LEVEL4)
+
+ if (flags & VP8D_DEBUG_LEVEL4)
{
sprintf(message, "Bitrate: %10.2f frame_rate: %10.2f ", oci->bitrate, oci->framerate);
vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
@@ -607,7 +831,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
y_ptr = post->y_buffer + 4 * post->y_stride + 4;
- // vp8_filter each macro block
+ /* vp8_filter each macro block */
for (i = 0; i < mb_rows; i++)
{
for (j = 0; j < mb_cols; j++)
@@ -620,7 +844,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
y_ptr += 16;
}
- mb_index ++; //border
+ mb_index ++; /* border */
y_ptr += post->y_stride * 16 - post->y_width;
}
@@ -629,11 +853,261 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
}
+ /* Draw motion vectors */
+ if (flags & VP8D_DEBUG_DRAW_MV)
+ {
+ YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
+ int width = post->y_width;
+ int height = post->y_height;
+ int mb_cols = width >> 4;
+ unsigned char *y_buffer = oci->post_proc_buffer.y_buffer;
+ int y_stride = oci->post_proc_buffer.y_stride;
+ MODE_INFO *mi = oci->mi;
+ int x0, y0;
+
+ for (y0 = 0; y0 < height; y0 += 16)
+ {
+ for (x0 = 0; x0 < width; x0 += 16)
+ {
+ int x1, y1;
+
+ if (mi->mbmi.mode == SPLITMV)
+ {
+ switch (mi->mbmi.partitioning)
+ {
+ case 0 : /* mv_top_bottom */
+ {
+ B_MODE_INFO *bmi = &mi->bmi[0];
+ MV *mv = &bmi->mv.as_mv;
+
+ x1 = x0 + 8 + (mv->col >> 3);
+ y1 = y0 + 4 + (mv->row >> 3);
+
+ constrain_line (x0+8, &x1, y0+4, &y1, width, height);
+ vp8_blit_line (x0+8, x1, y0+4, y1, y_buffer, y_stride);
+
+ bmi = &mi->bmi[8];
+
+ x1 = x0 + 8 + (mv->col >> 3);
+ y1 = y0 +12 + (mv->row >> 3);
+
+ constrain_line (x0+8, &x1, y0+12, &y1, width, height);
+ vp8_blit_line (x0+8, x1, y0+12, y1, y_buffer, y_stride);
+
+ break;
+ }
+ case 1 : /* mv_left_right */
+ {
+ B_MODE_INFO *bmi = &mi->bmi[0];
+ MV *mv = &bmi->mv.as_mv;
+
+ x1 = x0 + 4 + (mv->col >> 3);
+ y1 = y0 + 8 + (mv->row >> 3);
+
+ constrain_line (x0+4, &x1, y0+8, &y1, width, height);
+ vp8_blit_line (x0+4, x1, y0+8, y1, y_buffer, y_stride);
+
+ bmi = &mi->bmi[2];
+
+ x1 = x0 +12 + (mv->col >> 3);
+ y1 = y0 + 8 + (mv->row >> 3);
+
+ constrain_line (x0+12, &x1, y0+8, &y1, width, height);
+ vp8_blit_line (x0+12, x1, y0+8, y1, y_buffer, y_stride);
+
+ break;
+ }
+ case 2 : /* mv_quarters */
+ {
+ B_MODE_INFO *bmi = &mi->bmi[0];
+ MV *mv = &bmi->mv.as_mv;
+
+ x1 = x0 + 4 + (mv->col >> 3);
+ y1 = y0 + 4 + (mv->row >> 3);
+
+ constrain_line (x0+4, &x1, y0+4, &y1, width, height);
+ vp8_blit_line (x0+4, x1, y0+4, y1, y_buffer, y_stride);
+
+ bmi = &mi->bmi[2];
+
+ x1 = x0 +12 + (mv->col >> 3);
+ y1 = y0 + 4 + (mv->row >> 3);
+
+ constrain_line (x0+12, &x1, y0+4, &y1, width, height);
+ vp8_blit_line (x0+12, x1, y0+4, y1, y_buffer, y_stride);
+
+ bmi = &mi->bmi[8];
+
+ x1 = x0 + 4 + (mv->col >> 3);
+ y1 = y0 +12 + (mv->row >> 3);
+
+ constrain_line (x0+4, &x1, y0+12, &y1, width, height);
+ vp8_blit_line (x0+4, x1, y0+12, y1, y_buffer, y_stride);
+
+ bmi = &mi->bmi[10];
+
+ x1 = x0 +12 + (mv->col >> 3);
+ y1 = y0 +12 + (mv->row >> 3);
+
+ constrain_line (x0+12, &x1, y0+12, &y1, width, height);
+ vp8_blit_line (x0+12, x1, y0+12, y1, y_buffer, y_stride);
+ break;
+ }
+ default :
+ {
+ B_MODE_INFO *bmi = mi->bmi;
+ int bx0, by0;
+ for (by0 = y0; by0 < (y0+16); by0 += 4)
+ {
+ for (bx0 = x0; bx0 < (x0+16); bx0 += 4)
+ {
+ MV *mv = &bmi->mv.as_mv;
+
+ x1 = bx0 + 2 + (mv->col >> 3);
+ y1 = by0 + 2 + (mv->row >> 3);
+
+ constrain_line (bx0+2, &x1, by0+2, &y1, width, height);
+ vp8_blit_line (bx0+2, x1, by0+2, y1, y_buffer, y_stride);
+
+ bmi++;
+ }
+ }
+ }
+ }
+ }
+ else if (mi->mbmi.mode >= NEARESTMV)
+ {
+ MV *mv = &mi->mbmi.mv.as_mv;
+ const int lx0 = x0 + 8;
+ const int ly0 = y0 + 8;
+
+ x1 = lx0 + (mv->col >> 3);
+ y1 = ly0 + (mv->row >> 3);
+
+ if (x1 != lx0 && y1 != ly0)
+ {
+ constrain_line (lx0, &x1, ly0-1, &y1, width, height);
+ vp8_blit_line (lx0, x1, ly0-1, y1, y_buffer, y_stride);
+
+ constrain_line (lx0, &x1, ly0+1, &y1, width, height);
+ vp8_blit_line (lx0, x1, ly0+1, y1, y_buffer, y_stride);
+ }
+ else
+ vp8_blit_line (lx0, x1, ly0, y1, y_buffer, y_stride);
+ }
+ mi++;
+ }
+ mi++;
+ }
+ }
+
+ /* Color in block modes */
+ if (flags & VP8D_DEBUG_CLR_BLK_MODES)
+ {
+ int y, x;
+ YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
+ int width = post->y_width;
+ int height = post->y_height;
+ unsigned char *y_ptr = oci->post_proc_buffer.y_buffer;
+ unsigned char *u_ptr = oci->post_proc_buffer.u_buffer;
+ unsigned char *v_ptr = oci->post_proc_buffer.v_buffer;
+ int y_stride = oci->post_proc_buffer.y_stride;
+ MODE_INFO *mi = oci->mi;
+
+ for (y = 0; y < height; y += 16)
+ {
+ for (x = 0; x < width; x += 16)
+ {
+ int Y = 0, U = 0, V = 0;
+
+ if (mi->mbmi.mode == B_PRED)
+ {
+ int by, bx;
+ unsigned char *yl, *ul, *vl;
+ B_MODE_INFO *bmi = mi->bmi;
+
+ yl = y_ptr + x;
+ ul = u_ptr + (x>>1);
+ vl = v_ptr + (x>>1);
+
+ for (by = 0; by < 16; by += 4)
+ {
+ for (bx = 0; bx < 16; bx += 4)
+ {
+ Y = B_PREDICTION_MODE_colors[bmi->mode][0];
+ U = B_PREDICTION_MODE_colors[bmi->mode][1];
+ V = B_PREDICTION_MODE_colors[bmi->mode][2];
+
+ POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_b)
+ (yl+bx, ul+(bx>>1), vl+(bx>>1), Y, U, V, 0xc000, y_stride);
+
+ bmi++;
+ }
+
+ yl += y_stride*4;
+ ul += y_stride*1;
+ vl += y_stride*1;
+ }
+ }
+ else
+ {
+ Y = MB_PREDICTION_MODE_colors[mi->mbmi.mode][0];
+ U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1];
+ V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2];
+
+ POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_inner)
+ (y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride);
+ }
+ mi++;
+ }
+ y_ptr += y_stride*16;
+ u_ptr += y_stride*4;
+ v_ptr += y_stride*4;
+
+ mi++;
+ }
+ }
+
+ /* Color in frame reference blocks */
+ if (flags & VP8D_DEBUG_CLR_FRM_REF_BLKS)
+ {
+ int y, x;
+ YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
+ int width = post->y_width;
+ int height = post->y_height;
+ unsigned char *y_ptr = oci->post_proc_buffer.y_buffer;
+ unsigned char *u_ptr = oci->post_proc_buffer.u_buffer;
+ unsigned char *v_ptr = oci->post_proc_buffer.v_buffer;
+ int y_stride = oci->post_proc_buffer.y_stride;
+ MODE_INFO *mi = oci->mi;
+
+ for (y = 0; y < height; y += 16)
+ {
+ for (x = 0; x < width; x +=16)
+ {
+ int Y = 0, U = 0, V = 0;
+
+ Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0];
+ U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
+ V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
+
+ POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_outer)
+ (y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride);
+
+ mi++;
+ }
+ y_ptr += y_stride*16;
+ u_ptr += y_stride*4;
+ v_ptr += y_stride*4;
+
+ mi++;
+ }
+ }
*dest = oci->post_proc_buffer;
- // handle problem with extending borders
+ /* handle problem with extending borders */
dest->y_width = oci->Width;
dest->y_height = oci->Height;
dest->uv_height = dest->y_height / 2;
diff --git a/vp8/common/postproc.h b/vp8/common/postproc.h
index cd99056b0..7485135bf 100644
--- a/vp8/common/postproc.h
+++ b/vp8/common/postproc.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -23,6 +24,18 @@
char whiteclamp[16], char bothclamp[16],\
unsigned int w, unsigned int h, int pitch)
+#define prototype_postproc_blend_mb_inner(sym)\
+ void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
+ int y1, int u1, int v1, int alpha, int stride)
+
+#define prototype_postproc_blend_mb_outer(sym)\
+ void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
+ int y1, int u1, int v1, int alpha, int stride)
+
+#define prototype_postproc_blend_b(sym)\
+ void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
+ int y1, int u1, int v1, int alpha, int stride)
+
#if ARCH_X86 || ARCH_X86_64
#include "x86/postproc_x86.h"
#endif
@@ -47,16 +60,36 @@ extern prototype_postproc(vp8_postproc_downacross);
#endif
extern prototype_postproc_addnoise(vp8_postproc_addnoise);
+#ifndef vp8_postproc_blend_mb_inner
+#define vp8_postproc_blend_mb_inner vp8_blend_mb_inner_c
+#endif
+extern prototype_postproc_blend_mb_inner(vp8_postproc_blend_mb_inner);
+
+#ifndef vp8_postproc_blend_mb_outer
+#define vp8_postproc_blend_mb_outer vp8_blend_mb_outer_c
+#endif
+extern prototype_postproc_blend_mb_outer(vp8_postproc_blend_mb_outer);
+
+#ifndef vp8_postproc_blend_b
+#define vp8_postproc_blend_b vp8_blend_b_c
+#endif
+extern prototype_postproc_blend_b(vp8_postproc_blend_b);
typedef prototype_postproc((*vp8_postproc_fn_t));
typedef prototype_postproc_inplace((*vp8_postproc_inplace_fn_t));
typedef prototype_postproc_addnoise((*vp8_postproc_addnoise_fn_t));
+typedef prototype_postproc_blend_mb_inner((*vp8_postproc_blend_mb_inner_fn_t));
+typedef prototype_postproc_blend_mb_outer((*vp8_postproc_blend_mb_outer_fn_t));
+typedef prototype_postproc_blend_b((*vp8_postproc_blend_b_fn_t));
typedef struct
{
- vp8_postproc_inplace_fn_t down;
- vp8_postproc_inplace_fn_t across;
- vp8_postproc_fn_t downacross;
- vp8_postproc_addnoise_fn_t addnoise;
+ vp8_postproc_inplace_fn_t down;
+ vp8_postproc_inplace_fn_t across;
+ vp8_postproc_fn_t downacross;
+ vp8_postproc_addnoise_fn_t addnoise;
+ vp8_postproc_blend_mb_inner_fn_t blend_mb_inner;
+ vp8_postproc_blend_mb_outer_fn_t blend_mb_outer;
+ vp8_postproc_blend_b_fn_t blend_b;
} vp8_postproc_rtcd_vtable_t;
#if CONFIG_RUNTIME_CPU_DETECT
diff --git a/vp8/common/ppc/copy_altivec.asm b/vp8/common/ppc/copy_altivec.asm
index e87eb2112..a4ce91583 100644
--- a/vp8/common/ppc/copy_altivec.asm
+++ b/vp8/common/ppc/copy_altivec.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/ppc/filter_altivec.asm b/vp8/common/ppc/filter_altivec.asm
index 2a3550773..4da2e94f9 100644
--- a/vp8/common/ppc/filter_altivec.asm
+++ b/vp8/common/ppc/filter_altivec.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/ppc/filter_bilinear_altivec.asm b/vp8/common/ppc/filter_bilinear_altivec.asm
index 27e02a87f..fd8aa665f 100644
--- a/vp8/common/ppc/filter_bilinear_altivec.asm
+++ b/vp8/common/ppc/filter_bilinear_altivec.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/ppc/idctllm_altivec.asm b/vp8/common/ppc/idctllm_altivec.asm
index e88af8d7d..117d9cfc8 100644
--- a/vp8/common/ppc/idctllm_altivec.asm
+++ b/vp8/common/ppc/idctllm_altivec.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/ppc/loopfilter_altivec.c b/vp8/common/ppc/loopfilter_altivec.c
index 586eed477..bad3cf3bd 100644
--- a/vp8/common/ppc/loopfilter_altivec.c
+++ b/vp8/common/ppc/loopfilter_altivec.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/ppc/loopfilter_filters_altivec.asm b/vp8/common/ppc/loopfilter_filters_altivec.asm
index 78a5cf9b3..61df4e976 100644
--- a/vp8/common/ppc/loopfilter_filters_altivec.asm
+++ b/vp8/common/ppc/loopfilter_filters_altivec.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/ppc/platform_altivec.asm b/vp8/common/ppc/platform_altivec.asm
index 227ef2a94..f81d86f74 100644
--- a/vp8/common/ppc/platform_altivec.asm
+++ b/vp8/common/ppc/platform_altivec.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/ppc/recon_altivec.asm b/vp8/common/ppc/recon_altivec.asm
index f478b954c..dd39e05a8 100644
--- a/vp8/common/ppc/recon_altivec.asm
+++ b/vp8/common/ppc/recon_altivec.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/ppc/systemdependent.c b/vp8/common/ppc/systemdependent.c
index 284731085..1f5d79068 100644
--- a/vp8/common/ppc/systemdependent.c
+++ b/vp8/common/ppc/systemdependent.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/ppflags.h b/vp8/common/ppflags.h
index c66397682..b8d713cf0 100644
--- a/vp8/common/ppflags.h
+++ b/vp8/common/ppflags.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -12,14 +13,17 @@
#define __INC_PPFLAGS_H
enum
{
- VP8D_NOFILTERING = 0,
- VP8D_DEBLOCK = 1,
- VP8D_DEMACROBLOCK = 2,
- VP8D_ADDNOISE = 4,
- VP8D_DEBUG_LEVEL1 = 8,
- VP8D_DEBUG_LEVEL2 = 16,
- VP8D_DEBUG_LEVEL3 = 32,
- VP8D_DEBUG_LEVEL4 = 64,
+ VP8D_NOFILTERING = 0,
+ VP8D_DEBLOCK = 1<<0,
+ VP8D_DEMACROBLOCK = 1<<1,
+ VP8D_ADDNOISE = 1<<2,
+ VP8D_DEBUG_LEVEL1 = 1<<3,
+ VP8D_DEBUG_LEVEL2 = 1<<4,
+ VP8D_DEBUG_LEVEL3 = 1<<5,
+ VP8D_DEBUG_LEVEL4 = 1<<6,
+ VP8D_DEBUG_DRAW_MV = 1<<7,
+ VP8D_DEBUG_CLR_BLK_MODES = 1<<8,
+ VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9
};
#endif
diff --git a/vp8/common/pragmas.h b/vp8/common/pragmas.h
index 25a4b776f..99fee5ae2 100644
--- a/vp8/common/pragmas.h
+++ b/vp8/common/pragmas.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/predictdc.c b/vp8/common/predictdc.c
index df4c96e4a..f315f50e0 100644
--- a/vp8/common/predictdc.c
+++ b/vp8/common/predictdc.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/predictdc.h b/vp8/common/predictdc.h
index b8871e452..fa8596822 100644
--- a/vp8/common/predictdc.h
+++ b/vp8/common/predictdc.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/preproc.h b/vp8/common/preproc.h
index 00ec9a8d7..0b142bda7 100644
--- a/vp8/common/preproc.h
+++ b/vp8/common/preproc.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/preprocif.h b/vp8/common/preprocif.h
index 986c45b10..7d554b509 100644
--- a/vp8/common/preprocif.h
+++ b/vp8/common/preprocif.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/proposed.h b/vp8/common/proposed.h
index 1171ede43..c9659902b 100644
--- a/vp8/common/proposed.h
+++ b/vp8/common/proposed.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/quant_common.c b/vp8/common/quant_common.c
index 09fe31fe5..e9833fe33 100644
--- a/vp8/common/quant_common.c
+++ b/vp8/common/quant_common.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/quant_common.h b/vp8/common/quant_common.h
index 0c92ce8b9..cb64d8eb8 100644
--- a/vp8/common/quant_common.h
+++ b/vp8/common/quant_common.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/recon.c b/vp8/common/recon.c
index d1268ea22..d72d6e410 100644
--- a/vp8/common/recon.c
+++ b/vp8/common/recon.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -105,8 +106,24 @@ void vp8_recon2b_c
}
}
-void vp8_recon16x16mby(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
+void vp8_recon_mby_c(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
{
+#if ARCH_ARM
+ BLOCKD *b = &x->block[0];
+ RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+
+ /*b = &x->block[4];*/
+ b += 4;
+ RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+
+ /*b = &x->block[8];*/
+ b += 4;
+ RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+
+ /*b = &x->block[12];*/
+ b += 4;
+ RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+#else
int i;
for (i = 0; i < 16; i += 4)
@@ -115,10 +132,36 @@ void vp8_recon16x16mby(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
}
+#endif
}
-void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
+void vp8_recon_mb_c(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
{
+#if ARCH_ARM
+ BLOCKD *b = &x->block[0];
+
+ RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ b += 4;
+ RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ b += 4;
+ RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ b += 4;
+ RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ b += 4;
+
+ /*b = &x->block[16];*/
+
+ RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ b++;
+ b++;
+ RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ b++;
+ b++;
+ RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ b++;
+ b++;
+ RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+#else
int i;
for (i = 0; i < 16; i += 4)
@@ -134,4 +177,5 @@ void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
}
+#endif
}
diff --git a/vp8/common/recon.h b/vp8/common/recon.h
index f65a90f7e..1e6e343fc 100644
--- a/vp8/common/recon.h
+++ b/vp8/common/recon.h
@@ -1,21 +1,29 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
#ifndef __INC_RECON_H
#define __INC_RECON_H
+#include "blockd.h"
+
#define prototype_copy_block(sym) \
void sym(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch)
#define prototype_recon_block(sym) \
- void sym(unsigned char *pred, short *diff, unsigned char *dst, int pitch);
+ void sym(unsigned char *pred, short *diff, unsigned char *dst, int pitch)
+
+#define prototype_recon_macroblock(sym) \
+ void sym(const struct vp8_recon_rtcd_vtable *rtcd, MACROBLOCKD *x)
+
+struct vp8_recon_rtcd_vtable;
#if ARCH_X86 || ARCH_X86_64
#include "x86/recon_x86.h"
@@ -55,9 +63,20 @@ extern prototype_recon_block(vp8_recon_recon2);
#endif
extern prototype_recon_block(vp8_recon_recon4);
+#ifndef vp8_recon_recon_mb
+#define vp8_recon_recon_mb vp8_recon_mb_c
+#endif
+extern prototype_recon_macroblock(vp8_recon_recon_mb);
+
+#ifndef vp8_recon_recon_mby
+#define vp8_recon_recon_mby vp8_recon_mby_c
+#endif
+extern prototype_recon_macroblock(vp8_recon_recon_mby);
+
typedef prototype_copy_block((*vp8_copy_block_fn_t));
typedef prototype_recon_block((*vp8_recon_fn_t));
-typedef struct
+typedef prototype_recon_macroblock((*vp8_recon_mb_fn_t));
+typedef struct vp8_recon_rtcd_vtable
{
vp8_copy_block_fn_t copy16x16;
vp8_copy_block_fn_t copy8x8;
@@ -65,6 +84,8 @@ typedef struct
vp8_recon_fn_t recon;
vp8_recon_fn_t recon2;
vp8_recon_fn_t recon4;
+ vp8_recon_mb_fn_t recon_mb;
+ vp8_recon_mb_fn_t recon_mby;
} vp8_recon_rtcd_vtable_t;
#if CONFIG_RUNTIME_CPU_DETECT
@@ -73,9 +94,6 @@ typedef struct
#define RECON_INVOKE(ctx,fn) vp8_recon_##fn
#endif
-#include "blockd.h"
-void vp8_recon16x16mby(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
-void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
void vp8_recon_intra_mbuv(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
#endif
diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c
index c48886deb..74871c0e8 100644
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -17,9 +18,10 @@
#include "onyxc_int.h"
#endif
-// use this define on systems where unaligned int reads and writes are
-// not allowed, i.e. ARM architectures
-//#define MUST_BE_ALIGNED
+/* use this define on systems where unaligned int reads and writes are
+ * not allowed, i.e. ARM architectures
+ */
+/*#define MUST_BE_ALIGNED*/
static const int bbb[4] = {0, 2, 8, 10};
@@ -209,7 +211,8 @@ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
{
int i;
- if (x->mbmi.ref_frame != INTRA_FRAME && x->mbmi.mode != SPLITMV)
+ if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
+ x->mode_info_context->mbmi.mode != SPLITMV)
{
unsigned char *uptr, *vptr;
unsigned char *upred_ptr = &x->predictor[256];
@@ -253,16 +256,18 @@ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
}
}
-
+/*encoder only*/
void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
{
- if (x->mbmi.ref_frame != INTRA_FRAME && x->mbmi.mode != SPLITMV)
+
+ if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
+ x->mode_info_context->mbmi.mode != SPLITMV)
{
unsigned char *ptr_base;
unsigned char *ptr;
unsigned char *pred_ptr = x->predictor;
- int mv_row = x->mbmi.mv.as_mv.row;
- int mv_col = x->mbmi.mv.as_mv.col;
+ int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
+ int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
int pre_stride = x->block[0].pre_stride;
ptr_base = x->pre.y_buffer;
@@ -281,7 +286,7 @@ void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
{
int i;
- if (x->mbmi.partitioning < 3)
+ if (x->mode_info_context->mbmi.partitioning < 3)
{
for (i = 0; i < 4; i++)
{
@@ -312,7 +317,9 @@ void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
{
- if (x->mbmi.ref_frame != INTRA_FRAME && x->mbmi.mode != SPLITMV)
+
+ if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
+ x->mode_info_context->mbmi.mode != SPLITMV)
{
int offset;
unsigned char *ptr_base;
@@ -322,8 +329,8 @@ void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
unsigned char *upred_ptr = &x->predictor[256];
unsigned char *vpred_ptr = &x->predictor[320];
- int mv_row = x->mbmi.mv.as_mv.row;
- int mv_col = x->mbmi.mv.as_mv.col;
+ int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
+ int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
int pre_stride = x->block[0].pre_stride;
ptr_base = x->pre.y_buffer;
@@ -360,7 +367,7 @@ void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
{
int i;
- if (x->mbmi.partitioning < 3)
+ if (x->mode_info_context->mbmi.partitioning < 3)
{
for (i = 0; i < 4; i++)
{
@@ -409,7 +416,7 @@ void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel)
{
int i, j;
- if (x->mbmi.mode == SPLITMV)
+ if (x->mode_info_context->mbmi.mode == SPLITMV)
{
for (i = 0; i < 2; i++)
{
@@ -454,8 +461,8 @@ void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel)
}
else
{
- int mvrow = x->mbmi.mv.as_mv.row;
- int mvcol = x->mbmi.mv.as_mv.col;
+ int mvrow = x->mode_info_context->mbmi.mv.as_mv.row;
+ int mvcol = x->mode_info_context->mbmi.mv.as_mv.col;
if (mvrow < 0)
mvrow -= 1;
@@ -485,15 +492,16 @@ void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel)
}
-// The following functions are wriiten for skip_recon_mb() to call. Since there is no recon in this
-// situation, we can write the result directly to dst buffer instead of writing it to predictor
-// buffer and then copying it to dst buffer.
+/* The following functions are wriiten for skip_recon_mb() to call. Since there is no recon in this
+ * situation, we can write the result directly to dst buffer instead of writing it to predictor
+ * buffer and then copying it to dst buffer.
+ */
static void vp8_build_inter_predictors_b_s(BLOCKD *d, unsigned char *dst_ptr, vp8_subpix_fn_t sppf)
{
int r;
unsigned char *ptr_base;
unsigned char *ptr;
- //unsigned char *pred_ptr = d->predictor;
+ /*unsigned char *pred_ptr = d->predictor;*/
int dst_stride = d->dst_stride;
int pre_stride = d->pre_stride;
@@ -529,37 +537,37 @@ static void vp8_build_inter_predictors_b_s(BLOCKD *d, unsigned char *dst_ptr, vp
void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
{
- //unsigned char *pred_ptr = x->block[0].predictor;
- //unsigned char *dst_ptr = *(x->block[0].base_dst) + x->block[0].dst;
+ /*unsigned char *pred_ptr = x->block[0].predictor;
+ unsigned char *dst_ptr = *(x->block[0].base_dst) + x->block[0].dst;*/
unsigned char *pred_ptr = x->predictor;
unsigned char *dst_ptr = x->dst.y_buffer;
- if (x->mbmi.mode != SPLITMV)
+ if (x->mode_info_context->mbmi.mode != SPLITMV)
{
int offset;
unsigned char *ptr_base;
unsigned char *ptr;
unsigned char *uptr, *vptr;
- //unsigned char *pred_ptr = x->predictor;
- //unsigned char *upred_ptr = &x->predictor[256];
- //unsigned char *vpred_ptr = &x->predictor[320];
+ /*unsigned char *pred_ptr = x->predictor;
+ unsigned char *upred_ptr = &x->predictor[256];
+ unsigned char *vpred_ptr = &x->predictor[320];*/
unsigned char *udst_ptr = x->dst.u_buffer;
unsigned char *vdst_ptr = x->dst.v_buffer;
- int mv_row = x->mbmi.mv.as_mv.row;
- int mv_col = x->mbmi.mv.as_mv.col;
- int pre_stride = x->dst.y_stride; //x->block[0].pre_stride;
+ int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
+ int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
+ int pre_stride = x->dst.y_stride; /*x->block[0].pre_stride;*/
ptr_base = x->pre.y_buffer;
ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
if ((mv_row | mv_col) & 7)
{
- x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride);
+ x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
}
else
{
- RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride);
+ RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
}
mv_row = x->block[16].bmi.mv.as_mv.row;
@@ -582,16 +590,17 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
}
else
{
- //note: this whole ELSE part is not executed at all. So, no way to test the correctness of my modification. Later,
- //if sth is wrong, go back to what it is in build_inter_predictors_mb.
+ /* note: this whole ELSE part is not executed at all. So, no way to test the correctness of my modification. Later,
+ * if sth is wrong, go back to what it is in build_inter_predictors_mb.
+ */
int i;
- if (x->mbmi.partitioning < 3)
+ if (x->mode_info_context->mbmi.partitioning < 3)
{
for (i = 0; i < 4; i++)
{
BLOCKD *d = &x->block[bbb[i]];
- //vp8_build_inter_predictors4b(x, d, 16);
+ /*vp8_build_inter_predictors4b(x, d, 16);*/
{
unsigned char *ptr_base;
@@ -603,11 +612,11 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
{
- x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride);
+ x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
}
else
{
- RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride);
+ RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
}
}
}
@@ -621,7 +630,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
{
- //vp8_build_inter_predictors2b(x, d0, 16);
+ /*vp8_build_inter_predictors2b(x, d0, 16);*/
unsigned char *ptr_base;
unsigned char *ptr;
unsigned char *pred_ptr = d0->predictor;
@@ -653,7 +662,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
{
- //vp8_build_inter_predictors2b(x, d0, 8);
+ /*vp8_build_inter_predictors2b(x, d0, 8);*/
unsigned char *ptr_base;
unsigned char *ptr;
unsigned char *pred_ptr = d0->predictor;
@@ -663,11 +672,15 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
if (d0->bmi.mv.as_mv.row & 7 || d0->bmi.mv.as_mv.col & 7)
{
- x->subpixel_predict8x4(ptr, d0->pre_stride, d0->bmi.mv.as_mv.col & 7, d0->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride);
+ x->subpixel_predict8x4(ptr, d0->pre_stride,
+ d0->bmi.mv.as_mv.col & 7,
+ d0->bmi.mv.as_mv.row & 7,
+ dst_ptr, x->dst.uv_stride);
}
else
{
- RECON_INVOKE(&x->rtcd->recon, copy8x4)(ptr, d0->pre_stride, dst_ptr, x->dst.y_stride);
+ RECON_INVOKE(&x->rtcd->recon, copy8x4)(ptr,
+ d0->pre_stride, dst_ptr, x->dst.uv_stride);
}
}
else
diff --git a/vp8/common/reconinter.h b/vp8/common/reconinter.h
index b2d1ae97a..7c1dee431 100644
--- a/vp8/common/reconinter.h
+++ b/vp8/common/reconinter.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/reconintra.c b/vp8/common/reconintra.c
index e33bce348..9cf5f6a88 100644
--- a/vp8/common/reconintra.c
+++ b/vp8/common/reconintra.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -13,9 +14,9 @@
#include "reconintra.h"
#include "vpx_mem/vpx_mem.h"
-// For skip_recon_mb(), add vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) and
-// vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x).
-
+/* For skip_recon_mb(), add vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) and
+ * vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x).
+ */
void vp8_recon_intra_mbuv(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
{
int i;
@@ -41,8 +42,8 @@ void vp8_build_intra_predictors_mby(MACROBLOCKD *x)
yleft_col[i] = x->dst.y_buffer [i* x->dst.y_stride -1];
}
- // for Y
- switch (x->mbmi.mode)
+ /* for Y */
+ switch (x->mode_info_context->mbmi.mode)
{
case DC_PRED:
{
@@ -155,15 +156,15 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
int r, c, i;
int y_stride = x->dst.y_stride;
- ypred_ptr = x->dst.y_buffer; //x->predictor;
+ ypred_ptr = x->dst.y_buffer; /*x->predictor;*/
for (i = 0; i < 16; i++)
{
yleft_col[i] = x->dst.y_buffer [i* x->dst.y_stride -1];
}
- // for Y
- switch (x->mbmi.mode)
+ /* for Y */
+ switch (x->mode_info_context->mbmi.mode)
{
case DC_PRED:
{
@@ -203,11 +204,11 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
expected_dc = 128;
}
- //vpx_memset(ypred_ptr, expected_dc, 256);
+ /*vpx_memset(ypred_ptr, expected_dc, 256);*/
for (r = 0; r < 16; r++)
{
vpx_memset(ypred_ptr, expected_dc, 16);
- ypred_ptr += y_stride; //16;
+ ypred_ptr += y_stride; /*16;*/
}
}
break;
@@ -221,7 +222,7 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
((int *)ypred_ptr)[1] = ((int *)yabove_row)[1];
((int *)ypred_ptr)[2] = ((int *)yabove_row)[2];
((int *)ypred_ptr)[3] = ((int *)yabove_row)[3];
- ypred_ptr += y_stride; //16;
+ ypred_ptr += y_stride; /*16;*/
}
}
break;
@@ -232,7 +233,7 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
{
vpx_memset(ypred_ptr, yleft_col[r], 16);
- ypred_ptr += y_stride; //16;
+ ypred_ptr += y_stride; /*16;*/
}
}
@@ -255,7 +256,7 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
ypred_ptr[c] = pred;
}
- ypred_ptr += y_stride; //16;
+ ypred_ptr += y_stride; /*16;*/
}
}
@@ -289,7 +290,7 @@ void vp8_build_intra_predictors_mbuv(MACROBLOCKD *x)
vleft_col[i] = x->dst.v_buffer [i* x->dst.uv_stride -1];
}
- switch (x->mbmi.uv_mode)
+ switch (x->mode_info_context->mbmi.uv_mode)
{
case DC_PRED:
{
@@ -417,8 +418,8 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
unsigned char *vabove_row = x->dst.v_buffer - x->dst.uv_stride;
unsigned char vleft_col[20];
unsigned char vtop_left = vabove_row[-1];
- unsigned char *upred_ptr = x->dst.u_buffer; //&x->predictor[256];
- unsigned char *vpred_ptr = x->dst.v_buffer; //&x->predictor[320];
+ unsigned char *upred_ptr = x->dst.u_buffer; /*&x->predictor[256];*/
+ unsigned char *vpred_ptr = x->dst.v_buffer; /*&x->predictor[320];*/
int uv_stride = x->dst.uv_stride;
int i, j;
@@ -429,7 +430,7 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
vleft_col[i] = x->dst.v_buffer [i* x->dst.uv_stride -1];
}
- switch (x->mbmi.uv_mode)
+ switch (x->mode_info_context->mbmi.uv_mode)
{
case DC_PRED:
{
@@ -471,14 +472,14 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
}
- //vpx_memset(upred_ptr,expected_udc,64);
- //vpx_memset(vpred_ptr,expected_vdc,64);
+ /*vpx_memset(upred_ptr,expected_udc,64);*/
+ /*vpx_memset(vpred_ptr,expected_vdc,64);*/
for (i = 0; i < 8; i++)
{
vpx_memset(upred_ptr, expected_udc, 8);
vpx_memset(vpred_ptr, expected_vdc, 8);
- upred_ptr += uv_stride; //8;
- vpred_ptr += uv_stride; //8;
+ upred_ptr += uv_stride; /*8;*/
+ vpred_ptr += uv_stride; /*8;*/
}
}
break;
@@ -490,8 +491,8 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
{
vpx_memcpy(upred_ptr, uabove_row, 8);
vpx_memcpy(vpred_ptr, vabove_row, 8);
- upred_ptr += uv_stride; //8;
- vpred_ptr += uv_stride; //8;
+ upred_ptr += uv_stride; /*8;*/
+ vpred_ptr += uv_stride; /*8;*/
}
}
@@ -504,8 +505,8 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
{
vpx_memset(upred_ptr, uleft_col[i], 8);
vpx_memset(vpred_ptr, vleft_col[i], 8);
- upred_ptr += uv_stride; //8;
- vpred_ptr += uv_stride; //8;
+ upred_ptr += uv_stride; /*8;*/
+ vpred_ptr += uv_stride; /*8;*/
}
}
@@ -537,8 +538,8 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
vpred_ptr[j] = predv;
}
- upred_ptr += uv_stride; //8;
- vpred_ptr += uv_stride; //8;
+ upred_ptr += uv_stride; /*8;*/
+ vpred_ptr += uv_stride; /*8;*/
}
}
diff --git a/vp8/common/reconintra.h b/vp8/common/reconintra.h
index d63aa15cb..988b43a77 100644
--- a/vp8/common/reconintra.h
+++ b/vp8/common/reconintra.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c
index d92d5c96a..db44fa190 100644
--- a/vp8/common/reconintra4x4.c
+++ b/vp8/common/reconintra4x4.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -55,7 +56,7 @@ void vp8_predict_intra4x4(BLOCKD *x,
break;
case B_TM_PRED:
{
- // prediction similar to true_motion prediction
+ /* prediction similar to true_motion prediction */
for (r = 0; r < 4; r++)
{
for (c = 0; c < 4; c++)
@@ -294,8 +295,9 @@ void vp8_predict_intra4x4(BLOCKD *x,
}
}
-// copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and
-// to the right prediction have filled in pixels to use.
+/* copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and
+ * to the right prediction have filled in pixels to use.
+ */
void vp8_intra_prediction_down_copy(MACROBLOCKD *x)
{
unsigned char *above_right = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16;
@@ -317,6 +319,74 @@ void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
vp8_intra_prediction_down_copy(x);
+#if ARCH_ARM
+ {
+ BLOCKD *b = &x->block[0];
+
+ vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+ RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ b += 1;
+
+ vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+ RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ b += 1;
+
+ vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+ RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ b += 1;
+
+ vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+ RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ b += 1;
+
+ vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+ RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ b += 1;
+
+ vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+ RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ b += 1;
+
+ vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+ RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ b += 1;
+
+ vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+ RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ b += 1;
+
+ vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+ RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ b += 1;
+
+ vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+ RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ b += 1;
+
+ vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+ RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ b += 1;
+
+ vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+ RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ b += 1;
+
+ vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+ RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ b += 1;
+
+ vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+ RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ b += 1;
+
+ vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+ RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ b += 1;
+
+ vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+ RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+ }
+#else
for (i = 0; i < 16; i++)
{
BLOCKD *b = &x->block[i];
@@ -324,6 +394,7 @@ void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
vp8_predict_intra4x4(b, x->block[i].bmi.mode, x->block[i].predictor);
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
}
+#endif
vp8_recon_intra_mbuv(rtcd, x);
diff --git a/vp8/common/reconintra4x4.h b/vp8/common/reconintra4x4.h
index 788c8c40a..6ac2b7137 100644
--- a/vp8/common/reconintra4x4.h
+++ b/vp8/common/reconintra4x4.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/segmentation_common.h b/vp8/common/segmentation_common.h
deleted file mode 100644
index bb93533a3..000000000
--- a/vp8/common/segmentation_common.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-
-#include "string.h"
-#include "blockd.h"
-#include "onyxc_int.h"
-
-extern void vp8_update_gf_useage_maps(VP8_COMMON *cm, MACROBLOCKD *xd);
diff --git a/vp8/common/setupintrarecon.c b/vp8/common/setupintrarecon.c
index dcaafe6c6..7976e252b 100644
--- a/vp8/common/setupintrarecon.c
+++ b/vp8/common/setupintrarecon.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -15,22 +16,16 @@ void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf)
{
int i;
- // set up frame new frame for intra coded blocks
- vpx_memset(ybf->y_buffer - 1 - 2 * ybf->y_stride, 127, ybf->y_width + 5);
+ /* set up frame new frame for intra coded blocks */
vpx_memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
-
for (i = 0; i < ybf->y_height; i++)
ybf->y_buffer[ybf->y_stride *i - 1] = (unsigned char) 129;
- vpx_memset(ybf->u_buffer - 1 - 2 * ybf->uv_stride, 127, ybf->uv_width + 5);
vpx_memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
-
for (i = 0; i < ybf->uv_height; i++)
ybf->u_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129;
- vpx_memset(ybf->v_buffer - 1 - 2 * ybf->uv_stride, 127, ybf->uv_width + 5);
vpx_memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
-
for (i = 0; i < ybf->uv_height; i++)
ybf->v_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129;
diff --git a/vp8/common/setupintrarecon.h b/vp8/common/setupintrarecon.h
index 6ec79b29c..5264fd04b 100644
--- a/vp8/common/setupintrarecon.h
+++ b/vp8/common/setupintrarecon.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/subpixel.h b/vp8/common/subpixel.h
index fbd5f4daf..acdeec3bc 100644
--- a/vp8/common/subpixel.h
+++ b/vp8/common/subpixel.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/swapyv12buffer.c b/vp8/common/swapyv12buffer.c
index afe6a885e..73656b3d7 100644
--- a/vp8/common/swapyv12buffer.c
+++ b/vp8/common/swapyv12buffer.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/swapyv12buffer.h b/vp8/common/swapyv12buffer.h
index caf9499d9..a6473ed92 100644
--- a/vp8/common/swapyv12buffer.h
+++ b/vp8/common/swapyv12buffer.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/systemdependent.h b/vp8/common/systemdependent.h
index 1829b649c..db996987a 100644
--- a/vp8/common/systemdependent.h
+++ b/vp8/common/systemdependent.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/textblit.c b/vp8/common/textblit.c
index a45937b12..1756100a7 100644
--- a/vp8/common/textblit.c
+++ b/vp8/common/textblit.c
@@ -1,13 +1,14 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
-
+#include <stdlib.h>
void vp8_blit_text(const char *msg, unsigned char *address, const int pitch)
@@ -50,3 +51,80 @@ void vp8_blit_text(const char *msg, unsigned char *address, const int pitch)
colpos++;
}
}
+
+static void plot (const int x, const int y, unsigned char *image, const int pitch)
+{
+ image [x+y*pitch] ^= 255;
+}
+
+/* Bresenham line algorithm */
+void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch)
+{
+ int steep = abs(y1 - y0) > abs(x1 - x0);
+ int deltax, deltay;
+ int error, ystep, y, x;
+
+ if (steep)
+ {
+ int t;
+ t = x0;
+ x0 = y0;
+ y0 = t;
+
+ t = x1;
+ x1 = y1;
+ y1 = t;
+ }
+
+ if (x0 > x1)
+ {
+ int t;
+ t = x0;
+ x0 = x1;
+ x1 = t;
+
+ t = y0;
+ y0 = y1;
+ y1 = t;
+ }
+
+ deltax = x1 - x0;
+ deltay = abs(y1 - y0);
+ error = deltax / 2;
+
+ y = y0;
+
+ if (y0 < y1)
+ ystep = 1;
+ else
+ ystep = -1;
+
+ if (steep)
+ {
+ for (x = x0; x <= x1; x++)
+ {
+ plot(y,x, image, pitch);
+
+ error = error - deltay;
+ if (error < 0)
+ {
+ y = y + ystep;
+ error = error + deltax;
+ }
+ }
+ }
+ else
+ {
+ for (x = x0; x <= x1; x++)
+ {
+ plot(x,y, image, pitch);
+
+ error = error - deltay;
+ if (error < 0)
+ {
+ y = y + ystep;
+ error = error + deltax;
+ }
+ }
+ }
+}
diff --git a/vp8/common/threading.h b/vp8/common/threading.h
index a02cb244b..1929f7c4f 100644
--- a/vp8/common/threading.h
+++ b/vp8/common/threading.h
@@ -1,17 +1,18 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
#ifndef _PTHREAD_EMULATION
#define _PTHREAD_EMULATION
-#define VPXINFINITE 10000 //10second.
+#define VPXINFINITE 10000 /* 10second. */
/* Thread management macros */
#ifdef _WIN32
@@ -71,10 +72,11 @@
#define sem_wait(sem) (semaphore_wait(*sem) )
#define sem_post(sem) semaphore_signal(*sem)
#define sem_destroy(sem) semaphore_destroy(mach_task_self(),*sem)
-#define thread_sleep(nms) // { struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);}
+#define thread_sleep(nms) /* { struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */
#else
#include <unistd.h>
-#define thread_sleep(nms) usleep(nms*1000);// {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);}
+#include <sched.h>
+#define thread_sleep(nms) sched_yield();/* {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */
#endif
/* Not Windows. Assume pthreads */
diff --git a/vp8/common/treecoder.c b/vp8/common/treecoder.c
index 4ad018d49..d80c64bdf 100644
--- a/vp8/common/treecoder.c
+++ b/vp8/common/treecoder.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -46,6 +47,12 @@ void vp8_tokens_from_tree(struct vp8_token_struct *p, vp8_tree t)
tree2tok(p, t, 0, 0, 0);
}
+void vp8_tokens_from_tree_offset(struct vp8_token_struct *p, vp8_tree t,
+ int offset)
+{
+ tree2tok(p - offset, t, 0, 0, 0);
+}
+
static void branch_counts(
int n, /* n = size of alphabet */
vp8_token tok [ /* n */ ],
diff --git a/vp8/common/treecoder.h b/vp8/common/treecoder.h
index 0356d2b02..ebf51c5ed 100644
--- a/vp8/common/treecoder.h
+++ b/vp8/common/treecoder.h
@@ -1,17 +1,18 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
#ifndef __INC_TREECODER_H
#define __INC_TREECODER_H
-typedef unsigned char vp8bc_index_t; // probability index
+typedef unsigned char vp8bc_index_t; /* probability index */
typedef unsigned char vp8_prob;
@@ -53,6 +54,8 @@ typedef const struct vp8_token_struct
/* Construct encoding array from tree. */
void vp8_tokens_from_tree(struct vp8_token_struct *, vp8_tree);
+void vp8_tokens_from_tree_offset(struct vp8_token_struct *, vp8_tree,
+ int offset);
/* Convert array of token occurrence counts into a table of probabilities
diff --git a/vp8/common/type_aliases.h b/vp8/common/type_aliases.h
index addd26469..22b531a76 100644
--- a/vp8/common/type_aliases.h
+++ b/vp8/common/type_aliases.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -63,32 +64,32 @@ typedef signed char INT8;
#endif
#ifndef TYPE_INT16
-//#define TYPE_INT16
+/*#define TYPE_INT16*/
typedef signed short INT16;
#endif
#ifndef TYPE_INT32
-//#define TYPE_INT32
+/*#define TYPE_INT32*/
typedef signed int INT32;
#endif
#ifndef TYPE_UINT8
-//#define TYPE_UINT8
+/*#define TYPE_UINT8*/
typedef unsigned char UINT8;
#endif
#ifndef TYPE_UINT32
-//#define TYPE_UINT32
+/*#define TYPE_UINT32*/
typedef unsigned int UINT32;
#endif
#ifndef TYPE_UINT16
-//#define TYPE_UINT16
+/*#define TYPE_UINT16*/
typedef unsigned short UINT16;
#endif
#ifndef TYPE_BOOL
-//#define TYPE_BOOL
+/*#define TYPE_BOOL*/
typedef int BOOL;
#endif
@@ -100,7 +101,7 @@ typedef __int64 INT64;
#ifndef TYPE_INT64
#ifdef _TMS320C6X
-//for now we only have 40bits
+/* for now we only have 40bits */
typedef long INT64;
#else
typedef long long INT64;
diff --git a/vp8/common/vfwsetting.hpp b/vp8/common/vfwsetting.hpp
index e352e7a19..44869ecc7 100644
--- a/vp8/common/vfwsetting.hpp
+++ b/vp8/common/vfwsetting.hpp
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/vpx_ref_build_prefix.h b/vp8/common/vpx_ref_build_prefix.h
index 40608c6dd..a2fce65dc 100644
--- a/vp8/common/vpx_ref_build_prefix.h
+++ b/vp8/common/vpx_ref_build_prefix.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/vpxblit.h b/vp8/common/vpxblit.h
index d03e0bd02..a95d90574 100644
--- a/vp8/common/vpxblit.h
+++ b/vp8/common/vpxblit.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/vpxblit_c64.h b/vp8/common/vpxblit_c64.h
index a8e28f59a..4ee617f6c 100644
--- a/vp8/common/vpxblit_c64.h
+++ b/vp8/common/vpxblit_c64.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/vpxerrors.h b/vp8/common/vpxerrors.h
index e4c9f3ef3..b70f29673 100644
--- a/vp8/common/vpxerrors.h
+++ b/vp8/common/vpxerrors.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/x86/boolcoder.cxx b/vp8/common/x86/boolcoder.cxx
index 06faca69c..faddf1f42 100644
--- a/vp8/common/x86/boolcoder.cxx
+++ b/vp8/common/x86/boolcoder.cxx
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/x86/idct_x86.h b/vp8/common/x86/idct_x86.h
index 5dfb212e1..f6e568cdc 100644
--- a/vp8/common/x86/idct_x86.h
+++ b/vp8/common/x86/idct_x86.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -21,7 +22,7 @@
#if HAVE_MMX
extern prototype_idct(vp8_short_idct4x4llm_1_mmx);
extern prototype_idct(vp8_short_idct4x4llm_mmx);
-extern prototype_idct_scalar(vp8_dc_only_idct_mmx);
+extern prototype_idct_scalar_add(vp8_dc_only_idct_add_mmx);
extern prototype_second_order(vp8_short_inv_walsh4x4_mmx);
extern prototype_second_order(vp8_short_inv_walsh4x4_1_mmx);
@@ -33,8 +34,8 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_1_mmx);
#undef vp8_idct_idct16
#define vp8_idct_idct16 vp8_short_idct4x4llm_mmx
-#undef vp8_idct_idct1_scalar
-#define vp8_idct_idct1_scalar vp8_dc_only_idct_mmx
+#undef vp8_idct_idct1_scalar_add
+#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_mmx
#undef vp8_idct_iwalsh16
#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_mmx
diff --git a/vp8/common/x86/idctllm_mmx.asm b/vp8/common/x86/idctllm_mmx.asm
index 2751c6934..43735bc4b 100644
--- a/vp8/common/x86/idctllm_mmx.asm
+++ b/vp8/common/x86/idctllm_mmx.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
@@ -57,11 +58,11 @@ sym(vp8_short_idct4x4llm_mmx):
movq mm5, mm1
paddw mm2, mm0 ; a1 =0+2
- pmulhw mm5, [x_s1sqr2 GLOBAL] ;
+ pmulhw mm5, [GLOBAL(x_s1sqr2)] ;
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
movq mm7, mm3 ;
- pmulhw mm7, [x_c1sqr2less1 GLOBAL] ;
+ pmulhw mm7, [GLOBAL(x_c1sqr2less1)] ;
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
psubw mm7, mm5 ; c1
@@ -69,10 +70,10 @@ sym(vp8_short_idct4x4llm_mmx):
movq mm5, mm1
movq mm4, mm3
- pmulhw mm5, [x_c1sqr2less1 GLOBAL]
+ pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
paddw mm5, mm1
- pmulhw mm3, [x_s1sqr2 GLOBAL]
+ pmulhw mm3, [GLOBAL(x_s1sqr2)]
paddw mm3, mm4
paddw mm3, mm5 ; d1
@@ -112,11 +113,11 @@ sym(vp8_short_idct4x4llm_mmx):
movq mm5, mm1
paddw mm2, mm0 ; a1 =0+2
- pmulhw mm5, [x_s1sqr2 GLOBAL] ;
+ pmulhw mm5, [GLOBAL(x_s1sqr2)] ;
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
movq mm7, mm3 ;
- pmulhw mm7, [x_c1sqr2less1 GLOBAL] ;
+ pmulhw mm7, [GLOBAL(x_c1sqr2less1)] ;
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
psubw mm7, mm5 ; c1
@@ -124,16 +125,16 @@ sym(vp8_short_idct4x4llm_mmx):
movq mm5, mm1
movq mm4, mm3
- pmulhw mm5, [x_c1sqr2less1 GLOBAL]
+ pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
paddw mm5, mm1
- pmulhw mm3, [x_s1sqr2 GLOBAL]
+ pmulhw mm3, [GLOBAL(x_s1sqr2)]
paddw mm3, mm4
paddw mm3, mm5 ; d1
- paddw mm0, [fours GLOBAL]
+ paddw mm0, [GLOBAL(fours)]
- paddw mm2, [fours GLOBAL]
+ paddw mm2, [GLOBAL(fours)]
movq mm6, mm2 ; a1
movq mm4, mm0 ; b1
@@ -195,7 +196,7 @@ sym(vp8_short_idct4x4llm_1_mmx):
mov rax, arg(0) ;input
movd mm0, [rax]
- paddw mm0, [fours GLOBAL]
+ paddw mm0, [GLOBAL(fours)]
mov rdx, arg(1) ;output
psraw mm0, 3
@@ -219,35 +220,61 @@ sym(vp8_short_idct4x4llm_1_mmx):
pop rbp
ret
-;void dc_only_idct_mmx(short input_dc, short *output, int pitch)
-global sym(vp8_dc_only_idct_mmx)
-sym(vp8_dc_only_idct_mmx):
+;void vp8_dc_only_idct_add_mmx(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
+global sym(vp8_dc_only_idct_add_mmx)
+sym(vp8_dc_only_idct_add_mmx):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 3
+ SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
+ push rsi
+ push rdi
; end prolog
- movd mm0, arg(0) ;input_dc
+ mov rsi, arg(1) ;s -- prediction
+ mov rdi, arg(2) ;d -- destination
+ movsxd rax, dword ptr arg(4) ;stride
+ movsxd rdx, dword ptr arg(3) ;pitch
+ pxor mm0, mm0
- paddw mm0, [fours GLOBAL]
- mov rdx, arg(1) ;output
+ movd mm5, arg(0) ;input_dc
- psraw mm0, 3
- movsxd rax, dword ptr arg(2) ;pitch
+ paddw mm5, [GLOBAL(fours)]
- punpcklwd mm0, mm0
- punpckldq mm0, mm0
+ psraw mm5, 3
- movq [rdx], mm0
- movq [rdx+rax], mm0
+ punpcklwd mm5, mm5
+ punpckldq mm5, mm5
- movq [rdx+rax*2], mm0
- add rdx, rax
+ movd mm1, [rsi]
+ punpcklbw mm1, mm0
+ paddsw mm1, mm5
+ packuswb mm1, mm0 ; pack and unpack to saturate
+ movd [rdi], mm1
- movq [rdx+rax*2], mm0
+ movd mm2, [rsi+rdx]
+ punpcklbw mm2, mm0
+ paddsw mm2, mm5
+ packuswb mm2, mm0 ; pack and unpack to saturate
+ movd [rdi+rax], mm2
+
+ movd mm3, [rsi+2*rdx]
+ punpcklbw mm3, mm0
+ paddsw mm3, mm5
+ packuswb mm3, mm0 ; pack and unpack to saturate
+ movd [rdi+2*rax], mm3
+
+ add rdi, rax
+ add rsi, rdx
+ movd mm4, [rsi+2*rdx]
+ punpcklbw mm4, mm0
+ paddsw mm4, mm5
+ packuswb mm4, mm0 ; pack and unpack to saturate
+ movd [rdi+2*rax], mm4
; begin epilog
+ pop rdi
+ pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
diff --git a/vp8/common/x86/idctllm_sse2.asm b/vp8/common/x86/idctllm_sse2.asm
new file mode 100644
index 000000000..edee1578e
--- /dev/null
+++ b/vp8/common/x86/idctllm_sse2.asm
@@ -0,0 +1,708 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void idct_dequant_0_2x_sse2
+; (
+; short *qcoeff - 0
+; short *dequant - 1
+; unsigned char *pre - 2
+; unsigned char *dst - 3
+; int dst_stride - 4
+; int blk_stride - 5
+; )
+
+global sym(idct_dequant_0_2x_sse2)
+sym(idct_dequant_0_2x_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ GET_GOT rbx
+ ; end prolog
+
+ mov rdx, arg(1) ; dequant
+ mov rax, arg(0) ; qcoeff
+
+ ; Zero out xmm7, for use unpacking
+ pxor xmm7, xmm7
+
+ movd xmm4, [rax]
+ movd xmm5, [rdx]
+
+ pinsrw xmm4, [rax+32], 4
+ pinsrw xmm5, [rdx], 4
+
+ pmullw xmm4, xmm5
+
+ ; clear coeffs
+ movd [rax], xmm7
+ movd [rax+32], xmm7
+;pshufb
+ pshuflw xmm4, xmm4, 00000000b
+ pshufhw xmm4, xmm4, 00000000b
+
+ mov rax, arg(2) ; pre
+ paddw xmm4, [GLOBAL(fours)]
+
+ movsxd rcx, dword ptr arg(5) ; blk_stride
+ psraw xmm4, 3
+
+ movq xmm0, [rax]
+ movq xmm1, [rax+rcx]
+ movq xmm2, [rax+2*rcx]
+ lea rcx, [3*rcx]
+ movq xmm3, [rax+rcx]
+
+ punpcklbw xmm0, xmm7
+ punpcklbw xmm1, xmm7
+ punpcklbw xmm2, xmm7
+ punpcklbw xmm3, xmm7
+
+ mov rax, arg(3) ; dst
+ movsxd rdx, dword ptr arg(4) ; dst_stride
+
+ ; Add to predict buffer
+ paddw xmm0, xmm4
+ paddw xmm1, xmm4
+ paddw xmm2, xmm4
+ paddw xmm3, xmm4
+
+ ; pack up before storing
+ packuswb xmm0, xmm7
+ packuswb xmm1, xmm7
+ packuswb xmm2, xmm7
+ packuswb xmm3, xmm7
+
+ ; store blocks back out
+ movq [rax], xmm0
+ movq [rax + rdx], xmm1
+
+ lea rax, [rax + 2*rdx]
+
+ movq [rax], xmm2
+ movq [rax + rdx], xmm3
+
+ ; begin epilog
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(idct_dequant_full_2x_sse2)
+sym(idct_dequant_full_2x_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; special case when 2 blocks have 0 or 1 coeffs
+ ; dc is set as first coeff, so no need to load qcoeff
+ mov rax, arg(0) ; qcoeff
+ mov rsi, arg(2) ; pre
+ mov rdi, arg(3) ; dst
+ movsxd rcx, dword ptr arg(5) ; blk_stride
+
+ ; Zero out xmm7, for use unpacking
+ pxor xmm7, xmm7
+
+ mov rdx, arg(1) ; dequant
+
+ ; note the transpose of xmm1 and xmm2, necessary for shuffle
+ ; to spit out sensicle data
+ movdqa xmm0, [rax]
+ movdqa xmm2, [rax+16]
+ movdqa xmm1, [rax+32]
+ movdqa xmm3, [rax+48]
+
+ ; Clear out coeffs
+ movdqa [rax], xmm7
+ movdqa [rax+16], xmm7
+ movdqa [rax+32], xmm7
+ movdqa [rax+48], xmm7
+
+ ; dequantize qcoeff buffer
+ pmullw xmm0, [rdx]
+ pmullw xmm2, [rdx+16]
+ pmullw xmm1, [rdx]
+ pmullw xmm3, [rdx+16]
+
+ ; repack so block 0 row x and block 1 row x are together
+ movdqa xmm4, xmm0
+ punpckldq xmm0, xmm1
+ punpckhdq xmm4, xmm1
+
+ pshufd xmm0, xmm0, 11011000b
+ pshufd xmm1, xmm4, 11011000b
+
+ movdqa xmm4, xmm2
+ punpckldq xmm2, xmm3
+ punpckhdq xmm4, xmm3
+
+ pshufd xmm2, xmm2, 11011000b
+ pshufd xmm3, xmm4, 11011000b
+
+ ; first pass
+ psubw xmm0, xmm2 ; b1 = 0-2
+ paddw xmm2, xmm2 ;
+
+ movdqa xmm5, xmm1
+ paddw xmm2, xmm0 ; a1 = 0+2
+
+ pmulhw xmm5, [GLOBAL(x_s1sqr2)]
+ paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
+
+ movdqa xmm7, xmm3
+ pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
+
+ paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
+ psubw xmm7, xmm5 ; c1
+
+ movdqa xmm5, xmm1
+ movdqa xmm4, xmm3
+
+ pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
+ paddw xmm5, xmm1
+
+ pmulhw xmm3, [GLOBAL(x_s1sqr2)]
+ paddw xmm3, xmm4
+
+ paddw xmm3, xmm5 ; d1
+ movdqa xmm6, xmm2 ; a1
+
+ movdqa xmm4, xmm0 ; b1
+ paddw xmm2, xmm3 ;0
+
+ paddw xmm4, xmm7 ;1
+ psubw xmm0, xmm7 ;2
+
+ psubw xmm6, xmm3 ;3
+
+ ; transpose for the second pass
+ movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
+ punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
+ punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
+
+ movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
+ punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
+ punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
+
+
+ movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
+ punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
+ punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
+
+ movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
+ punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
+ punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
+
+
+ movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
+ punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
+ punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
+
+ movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
+ punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
+ punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
+
+ pshufd xmm0, xmm2, 11011000b
+ pshufd xmm2, xmm1, 11011000b
+
+ pshufd xmm1, xmm5, 11011000b
+ pshufd xmm3, xmm7, 11011000b
+
+ ; second pass
+ psubw xmm0, xmm2 ; b1 = 0-2
+ paddw xmm2, xmm2
+
+ movdqa xmm5, xmm1
+ paddw xmm2, xmm0 ; a1 = 0+2
+
+ pmulhw xmm5, [GLOBAL(x_s1sqr2)]
+ paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
+
+ movdqa xmm7, xmm3
+ pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
+
+ paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
+ psubw xmm7, xmm5 ; c1
+
+ movdqa xmm5, xmm1
+ movdqa xmm4, xmm3
+
+ pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
+ paddw xmm5, xmm1
+
+ pmulhw xmm3, [GLOBAL(x_s1sqr2)]
+ paddw xmm3, xmm4
+
+ paddw xmm3, xmm5 ; d1
+ paddw xmm0, [GLOBAL(fours)]
+
+ paddw xmm2, [GLOBAL(fours)]
+ movdqa xmm6, xmm2 ; a1
+
+ movdqa xmm4, xmm0 ; b1
+ paddw xmm2, xmm3 ;0
+
+ paddw xmm4, xmm7 ;1
+ psubw xmm0, xmm7 ;2
+
+ psubw xmm6, xmm3 ;3
+ psraw xmm2, 3
+
+ psraw xmm0, 3
+ psraw xmm4, 3
+
+ psraw xmm6, 3
+
+ ; transpose to save
+ movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
+ punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
+ punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
+
+ movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
+ punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
+ punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
+
+
+ movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
+ punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
+ punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
+
+ movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
+ punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
+ punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
+
+
+ movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
+ punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
+ punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
+
+ movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
+ punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
+ punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
+
+ pshufd xmm0, xmm2, 11011000b
+ pshufd xmm2, xmm1, 11011000b
+
+ pshufd xmm1, xmm5, 11011000b
+ pshufd xmm3, xmm7, 11011000b
+
+ pxor xmm7, xmm7
+
+ ; Load up predict blocks
+ movq xmm4, [rsi]
+ movq xmm5, [rsi+rcx]
+
+ punpcklbw xmm4, xmm7
+ punpcklbw xmm5, xmm7
+
+ paddw xmm0, xmm4
+ paddw xmm1, xmm5
+
+ movq xmm4, [rsi+2*rcx]
+ lea rcx, [3*rcx]
+ movq xmm5, [rsi+rcx]
+
+ punpcklbw xmm4, xmm7
+ punpcklbw xmm5, xmm7
+
+ paddw xmm2, xmm4
+ paddw xmm3, xmm5
+
+.finish:
+
+ ; pack up before storing
+ packuswb xmm0, xmm7
+ packuswb xmm1, xmm7
+ packuswb xmm2, xmm7
+ packuswb xmm3, xmm7
+
+ ; Load destination stride before writing out,
+ ; doesn't need to persist
+ movsxd rdx, dword ptr arg(4) ; dst_stride
+
+ ; store blocks back out
+ movq [rdi], xmm0
+ movq [rdi + rdx], xmm1
+
+ lea rdi, [rdi + 2*rdx]
+
+ movq [rdi], xmm2
+ movq [rdi + rdx], xmm3
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void idct_dequant_dc_0_2x_sse2
+; (
+; short *qcoeff - 0
+; short *dequant - 1
+; unsigned char *pre - 2
+; unsigned char *dst - 3
+; int dst_stride - 4
+; short *dc - 5
+; )
+global sym(idct_dequant_dc_0_2x_sse2)
+sym(idct_dequant_dc_0_2x_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; special case when 2 blocks have 0 or 1 coeffs
+ ; dc is set as first coeff, so no need to load qcoeff
+ mov rax, arg(0) ; qcoeff
+ mov rsi, arg(2) ; pre
+ mov rdi, arg(3) ; dst
+ mov rdx, arg(5) ; dc
+
+ ; Zero out xmm7, for use unpacking
+ pxor xmm7, xmm7
+
+ ; load up 2 dc words here == 2*16 = doubleword
+ movd xmm4, [rdx]
+
+ ; Load up predict blocks
+ movq xmm0, [rsi]
+ movq xmm1, [rsi+16]
+ movq xmm2, [rsi+32]
+ movq xmm3, [rsi+48]
+
+ ; Duplicate and expand dc across
+ punpcklwd xmm4, xmm4
+ punpckldq xmm4, xmm4
+
+ ; Rounding to dequant and downshift
+ paddw xmm4, [GLOBAL(fours)]
+ psraw xmm4, 3
+
+ ; Predict buffer needs to be expanded from bytes to words
+ punpcklbw xmm0, xmm7
+ punpcklbw xmm1, xmm7
+ punpcklbw xmm2, xmm7
+ punpcklbw xmm3, xmm7
+
+ ; Add to predict buffer
+ paddw xmm0, xmm4
+ paddw xmm1, xmm4
+ paddw xmm2, xmm4
+ paddw xmm3, xmm4
+
+ ; pack up before storing
+ packuswb xmm0, xmm7
+ packuswb xmm1, xmm7
+ packuswb xmm2, xmm7
+ packuswb xmm3, xmm7
+
+ ; Load destination stride before writing out,
+ ; doesn't need to persist
+ movsxd rdx, dword ptr arg(4) ; dst_stride
+
+ ; store blocks back out
+ movq [rdi], xmm0
+ movq [rdi + rdx], xmm1
+
+ lea rdi, [rdi + 2*rdx]
+
+ movq [rdi], xmm2
+ movq [rdi + rdx], xmm3
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(idct_dequant_dc_full_2x_sse2)
+sym(idct_dequant_dc_full_2x_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; special case when 2 blocks have 0 or 1 coeffs
+ ; dc is set as first coeff, so no need to load qcoeff
+ mov rax, arg(0) ; qcoeff
+ mov rsi, arg(2) ; pre
+ mov rdi, arg(3) ; dst
+
+ ; Zero out xmm7, for use unpacking
+ pxor xmm7, xmm7
+
+ mov rdx, arg(1) ; dequant
+
+ ; note the transpose of xmm1 and xmm2, necessary for shuffle
+ ; to spit out sensicle data
+ movdqa xmm0, [rax]
+ movdqa xmm2, [rax+16]
+ movdqa xmm1, [rax+32]
+ movdqa xmm3, [rax+48]
+
+ ; Clear out coeffs
+ movdqa [rax], xmm7
+ movdqa [rax+16], xmm7
+ movdqa [rax+32], xmm7
+ movdqa [rax+48], xmm7
+
+ ; dequantize qcoeff buffer
+ pmullw xmm0, [rdx]
+ pmullw xmm2, [rdx+16]
+ pmullw xmm1, [rdx]
+ pmullw xmm3, [rdx+16]
+
+ ; DC component
+ mov rdx, arg(5)
+
+ ; repack so block 0 row x and block 1 row x are together
+ movdqa xmm4, xmm0
+ punpckldq xmm0, xmm1
+ punpckhdq xmm4, xmm1
+
+ pshufd xmm0, xmm0, 11011000b
+ pshufd xmm1, xmm4, 11011000b
+
+ movdqa xmm4, xmm2
+ punpckldq xmm2, xmm3
+ punpckhdq xmm4, xmm3
+
+ pshufd xmm2, xmm2, 11011000b
+ pshufd xmm3, xmm4, 11011000b
+
+ ; insert DC component
+ pinsrw xmm0, [rdx], 0
+ pinsrw xmm0, [rdx+2], 4
+
+ ; first pass
+ psubw xmm0, xmm2 ; b1 = 0-2
+ paddw xmm2, xmm2 ;
+
+ movdqa xmm5, xmm1
+ paddw xmm2, xmm0 ; a1 = 0+2
+
+ pmulhw xmm5, [GLOBAL(x_s1sqr2)]
+ paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
+
+ movdqa xmm7, xmm3
+ pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
+
+ paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
+ psubw xmm7, xmm5 ; c1
+
+ movdqa xmm5, xmm1
+ movdqa xmm4, xmm3
+
+ pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
+ paddw xmm5, xmm1
+
+ pmulhw xmm3, [GLOBAL(x_s1sqr2)]
+ paddw xmm3, xmm4
+
+ paddw xmm3, xmm5 ; d1
+ movdqa xmm6, xmm2 ; a1
+
+ movdqa xmm4, xmm0 ; b1
+ paddw xmm2, xmm3 ;0
+
+ paddw xmm4, xmm7 ;1
+ psubw xmm0, xmm7 ;2
+
+ psubw xmm6, xmm3 ;3
+
+ ; transpose for the second pass
+ movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
+ punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
+ punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
+
+ movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
+ punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
+ punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
+
+
+ movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
+ punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
+ punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
+
+ movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
+ punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
+ punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
+
+
+ movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
+ punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
+ punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
+
+ movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
+ punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
+ punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
+
+ pshufd xmm0, xmm2, 11011000b
+ pshufd xmm2, xmm1, 11011000b
+
+ pshufd xmm1, xmm5, 11011000b
+ pshufd xmm3, xmm7, 11011000b
+
+ ; second pass
+ psubw xmm0, xmm2 ; b1 = 0-2
+ paddw xmm2, xmm2
+
+ movdqa xmm5, xmm1
+ paddw xmm2, xmm0 ; a1 = 0+2
+
+ pmulhw xmm5, [GLOBAL(x_s1sqr2)]
+ paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
+
+ movdqa xmm7, xmm3
+ pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
+
+ paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
+ psubw xmm7, xmm5 ; c1
+
+ movdqa xmm5, xmm1
+ movdqa xmm4, xmm3
+
+ pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
+ paddw xmm5, xmm1
+
+ pmulhw xmm3, [GLOBAL(x_s1sqr2)]
+ paddw xmm3, xmm4
+
+ paddw xmm3, xmm5 ; d1
+ paddw xmm0, [GLOBAL(fours)]
+
+ paddw xmm2, [GLOBAL(fours)]
+ movdqa xmm6, xmm2 ; a1
+
+ movdqa xmm4, xmm0 ; b1
+ paddw xmm2, xmm3 ;0
+
+ paddw xmm4, xmm7 ;1
+ psubw xmm0, xmm7 ;2
+
+ psubw xmm6, xmm3 ;3
+ psraw xmm2, 3
+
+ psraw xmm0, 3
+ psraw xmm4, 3
+
+ psraw xmm6, 3
+
+ ; transpose to save
+ movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
+ punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
+ punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
+
+ movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
+ punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
+ punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
+
+
+ movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
+ punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
+ punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
+
+ movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
+ punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
+ punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
+
+
+ movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
+ punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
+ punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
+
+ movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
+ punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
+ punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
+
+ pshufd xmm0, xmm2, 11011000b
+ pshufd xmm2, xmm1, 11011000b
+
+ pshufd xmm1, xmm5, 11011000b
+ pshufd xmm3, xmm7, 11011000b
+
+ pxor xmm7, xmm7
+
+ ; Load up predict blocks
+ movq xmm4, [rsi]
+ movq xmm5, [rsi+16]
+
+ punpcklbw xmm4, xmm7
+ punpcklbw xmm5, xmm7
+
+ paddw xmm0, xmm4
+ paddw xmm1, xmm5
+
+ movq xmm4, [rsi+32]
+ movq xmm5, [rsi+48]
+
+ punpcklbw xmm4, xmm7
+ punpcklbw xmm5, xmm7
+
+ paddw xmm2, xmm4
+ paddw xmm3, xmm5
+
+.finish:
+
+ ; pack up before storing
+ packuswb xmm0, xmm7
+ packuswb xmm1, xmm7
+ packuswb xmm2, xmm7
+ packuswb xmm3, xmm7
+
+ ; Load destination stride before writing out,
+ ; doesn't need to persist
+ movsxd rdx, dword ptr arg(4) ; dst_stride
+
+ ; store blocks back out
+ movq [rdi], xmm0
+ movq [rdi + rdx], xmm1
+
+ lea rdi, [rdi + 2*rdx]
+
+ movq [rdi], xmm2
+ movq [rdi + rdx], xmm3
+
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+fours:
+ times 8 dw 0x0004
+align 16
+x_s1sqr2:
+ times 8 dw 0x8A8C
+align 16
+x_c1sqr2less1:
+ times 8 dw 0x4E7B
diff --git a/vp8/common/x86/iwalsh_mmx.asm b/vp8/common/x86/iwalsh_mmx.asm
index 562e5908f..10b5274dc 100644
--- a/vp8/common/x86/iwalsh_mmx.asm
+++ b/vp8/common/x86/iwalsh_mmx.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
@@ -68,7 +69,7 @@ sym(vp8_short_inv_walsh4x4_mmx):
movq mm2, [rsi + 16] ;ip[8]
movq mm3, [rsi + 24] ;ip[12]
- movd mm7, rax
+ movq mm7, rax
movq mm4, mm0
punpcklwd mm7, mm7 ;0003000300030003h
diff --git a/vp8/common/x86/iwalsh_sse2.asm b/vp8/common/x86/iwalsh_sse2.asm
index 96943dfb8..83c97df7d 100644
--- a/vp8/common/x86/iwalsh_sse2.asm
+++ b/vp8/common/x86/iwalsh_sse2.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
@@ -16,6 +17,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 2
+ SAVE_XMM
push rsi
push rdi
; end prolog
@@ -100,6 +102,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
; begin epilog
pop rdi
pop rsi
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
diff --git a/vp8/common/x86/loopfilter_mmx.asm b/vp8/common/x86/loopfilter_mmx.asm
index 6e4d2b651..c6c215c3c 100644
--- a/vp8/common/x86/loopfilter_mmx.asm
+++ b/vp8/common/x86/loopfilter_mmx.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
@@ -110,7 +111,7 @@ next8_h:
psubusb mm3, mm2 ; q1-=p1
psubusb mm2, mm4 ; p1-=q1
por mm2, mm3 ; abs(p1-q1)
- pand mm2, [tfe GLOBAL] ; set lsb of each byte to zero
+ pand mm2, [GLOBAL(tfe)] ; set lsb of each byte to zero
psrlw mm2, 1 ; abs(p1-q1)/2
movq mm6, mm5 ; p0
@@ -149,12 +150,12 @@ next8_h:
; start work on filters
movq mm2, [rsi+2*rax] ; p1
movq mm7, [rdi] ; q1
- pxor mm2, [t80 GLOBAL] ; p1 offset to convert to signed values
- pxor mm7, [t80 GLOBAL] ; q1 offset to convert to signed values
+ pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
+ pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
psubsb mm2, mm7 ; p1 - q1
pand mm2, mm4 ; high var mask (hvm)(p1 - q1)
- pxor mm6, [t80 GLOBAL] ; offset to convert to signed values
- pxor mm0, [t80 GLOBAL] ; offset to convert to signed values
+ pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values
+ pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values
movq mm3, mm0 ; q0
psubsb mm0, mm6 ; q0 - p0
paddsb mm2, mm0 ; 1 * (q0 - p0) + hvm(p1 - q1)
@@ -162,8 +163,8 @@ next8_h:
paddsb mm2, mm0 ; 3 * (q0 - p0) + hvm(p1 - q1)
pand mm1, mm2 ; mask filter values we don't care about
movq mm2, mm1
- paddsb mm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
- paddsb mm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
+ paddsb mm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
+ paddsb mm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
pxor mm0, mm0 ;
pxor mm5, mm5
@@ -184,29 +185,29 @@ next8_h:
movq mm5, mm0 ; save results
packsswb mm0, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
- paddsw mm5, [ones GLOBAL]
- paddsw mm1, [ones GLOBAL]
+ paddsw mm5, [GLOBAL(ones)]
+ paddsw mm1, [GLOBAL(ones)]
psraw mm5, 1 ; partial shifted one more time for 2nd tap
psraw mm1, 1 ; partial shifted one more time for 2nd tap
packsswb mm5, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
pandn mm4, mm5 ; high edge variance additive
paddsb mm6, mm2 ; p0+= p0 add
- pxor mm6, [t80 GLOBAL] ; unoffset
+ pxor mm6, [GLOBAL(t80)] ; unoffset
movq [rsi+rax], mm6 ; write back
movq mm6, [rsi+2*rax] ; p1
- pxor mm6, [t80 GLOBAL] ; reoffset
+ pxor mm6, [GLOBAL(t80)] ; reoffset
paddsb mm6, mm4 ; p1+= p1 add
- pxor mm6, [t80 GLOBAL] ; unoffset
+ pxor mm6, [GLOBAL(t80)] ; unoffset
movq [rsi+2*rax], mm6 ; write back
psubsb mm3, mm0 ; q0-= q0 add
- pxor mm3, [t80 GLOBAL] ; unoffset
+ pxor mm3, [GLOBAL(t80)] ; unoffset
movq [rsi], mm3 ; write back
psubsb mm7, mm4 ; q1-= q1 add
- pxor mm7, [t80 GLOBAL] ; unoffset
+ pxor mm7, [GLOBAL(t80)] ; unoffset
movq [rdi], mm7 ; write back
add rsi,8
@@ -402,7 +403,7 @@ next8_v:
psubusb mm5, mm1 ; q1-=p1
psubusb mm1, mm2 ; p1-=q1
por mm5, mm1 ; abs(p1-q1)
- pand mm5, [tfe GLOBAL] ; set lsb of each byte to zero
+ pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
psrlw mm5, 1 ; abs(p1-q1)/2
mov rdx, arg(2) ;flimit ;
@@ -454,14 +455,14 @@ next8_v:
movq mm6, [rdx+8] ; p0
movq mm0, [rdx+16] ; q0
- pxor mm2, [t80 GLOBAL] ; p1 offset to convert to signed values
- pxor mm7, [t80 GLOBAL] ; q1 offset to convert to signed values
+ pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
+ pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
psubsb mm2, mm7 ; p1 - q1
pand mm2, mm4 ; high var mask (hvm)(p1 - q1)
- pxor mm6, [t80 GLOBAL] ; offset to convert to signed values
- pxor mm0, [t80 GLOBAL] ; offset to convert to signed values
+ pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values
+ pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values
movq mm3, mm0 ; q0
psubsb mm0, mm6 ; q0 - p0
@@ -473,9 +474,9 @@ next8_v:
pand mm1, mm2 ; mask filter values we don't care about
movq mm2, mm1
- paddsb mm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
+ paddsb mm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
- paddsb mm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
+ paddsb mm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
pxor mm0, mm0 ;
pxor mm5, mm5
@@ -502,9 +503,9 @@ next8_v:
movq mm5, mm0 ; save results
packsswb mm0, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
- paddsw mm5, [ones GLOBAL]
+ paddsw mm5, [GLOBAL(ones)]
- paddsw mm1, [ones GLOBAL]
+ paddsw mm1, [GLOBAL(ones)]
psraw mm5, 1 ; partial shifted one more time for 2nd tap
psraw mm1, 1 ; partial shifted one more time for 2nd tap
@@ -513,22 +514,22 @@ next8_v:
pandn mm4, mm5 ; high edge variance additive
paddsb mm6, mm2 ; p0+= p0 add
- pxor mm6, [t80 GLOBAL] ; unoffset
+ pxor mm6, [GLOBAL(t80)] ; unoffset
; mm6=p0 ;
movq mm1, [rdx] ; p1
- pxor mm1, [t80 GLOBAL] ; reoffset
+ pxor mm1, [GLOBAL(t80)] ; reoffset
paddsb mm1, mm4 ; p1+= p1 add
- pxor mm1, [t80 GLOBAL] ; unoffset
+ pxor mm1, [GLOBAL(t80)] ; unoffset
; mm6 = p0 mm1 = p1
psubsb mm3, mm0 ; q0-= q0 add
- pxor mm3, [t80 GLOBAL] ; unoffset
+ pxor mm3, [GLOBAL(t80)] ; unoffset
; mm3 = q0
psubsb mm7, mm4 ; q1-= q1 add
- pxor mm7, [t80 GLOBAL] ; unoffset
+ pxor mm7, [GLOBAL(t80)] ; unoffset
; mm7 = q1
; tranpose and write back
@@ -707,7 +708,7 @@ next8_mbh:
psubusb mm3, mm2 ; q1-=p1
psubusb mm2, mm4 ; p1-=q1
por mm2, mm3 ; abs(p1-q1)
- pand mm2, [tfe GLOBAL] ; set lsb of each byte to zero
+ pand mm2, [GLOBAL(tfe)] ; set lsb of each byte to zero
psrlw mm2, 1 ; abs(p1-q1)/2
movq mm6, mm5 ; p0
@@ -752,12 +753,12 @@ next8_mbh:
; start work on filters
movq mm2, [rsi+2*rax] ; p1
movq mm7, [rdi] ; q1
- pxor mm2, [t80 GLOBAL] ; p1 offset to convert to signed values
- pxor mm7, [t80 GLOBAL] ; q1 offset to convert to signed values
+ pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
+ pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
psubsb mm2, mm7 ; p1 - q1
- pxor mm6, [t80 GLOBAL] ; offset to convert to signed values
- pxor mm0, [t80 GLOBAL] ; offset to convert to signed values
+ pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values
+ pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values
movq mm3, mm0 ; q0
psubsb mm0, mm6 ; q0 - p0
paddsb mm2, mm0 ; 1 * (q0 - p0) + (p1 - q1)
@@ -771,7 +772,7 @@ next8_mbh:
pand mm2, mm4; ; Filter2 = vp8_filter & hev
movq mm5, mm2 ;
- paddsb mm5, [t3 GLOBAL];
+ paddsb mm5, [GLOBAL(t3)];
pxor mm0, mm0 ; 0
pxor mm7, mm7 ; 0
@@ -784,7 +785,7 @@ next8_mbh:
movq mm5, mm0 ; Filter2
- paddsb mm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4)
+ paddsb mm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4)
pxor mm0, mm0 ; 0
pxor mm7, mm7 ; 0
@@ -817,10 +818,10 @@ next8_mbh:
pxor mm2, mm2
punpcklbw mm1, mm4
punpckhbw mm2, mm4
- pmulhw mm1, [s27 GLOBAL]
- pmulhw mm2, [s27 GLOBAL]
- paddw mm1, [s63 GLOBAL]
- paddw mm2, [s63 GLOBAL]
+ pmulhw mm1, [GLOBAL(s27)]
+ pmulhw mm2, [GLOBAL(s27)]
+ paddw mm1, [GLOBAL(s63)]
+ paddw mm2, [GLOBAL(s63)]
psraw mm1, 7
psraw mm2, 7
packsswb mm1, mm2
@@ -828,8 +829,8 @@ next8_mbh:
psubsb mm3, mm1
paddsb mm6, mm1
- pxor mm3, [t80 GLOBAL]
- pxor mm6, [t80 GLOBAL]
+ pxor mm3, [GLOBAL(t80)]
+ pxor mm6, [GLOBAL(t80)]
movq [rsi+rax], mm6
movq [rsi], mm3
@@ -843,10 +844,10 @@ next8_mbh:
pxor mm2, mm2
punpcklbw mm1, mm4
punpckhbw mm2, mm4
- pmulhw mm1, [s18 GLOBAL]
- pmulhw mm2, [s18 GLOBAL]
- paddw mm1, [s63 GLOBAL]
- paddw mm2, [s63 GLOBAL]
+ pmulhw mm1, [GLOBAL(s18)]
+ pmulhw mm2, [GLOBAL(s18)]
+ paddw mm1, [GLOBAL(s63)]
+ paddw mm2, [GLOBAL(s63)]
psraw mm1, 7
psraw mm2, 7
packsswb mm1, mm2
@@ -854,14 +855,14 @@ next8_mbh:
movq mm3, [rdi]
movq mm6, [rsi+rax*2] ; p1
- pxor mm3, [t80 GLOBAL]
- pxor mm6, [t80 GLOBAL]
+ pxor mm3, [GLOBAL(t80)]
+ pxor mm6, [GLOBAL(t80)]
paddsb mm6, mm1
psubsb mm3, mm1
- pxor mm6, [t80 GLOBAL]
- pxor mm3, [t80 GLOBAL]
+ pxor mm6, [GLOBAL(t80)]
+ pxor mm3, [GLOBAL(t80)]
movq [rdi], mm3
movq [rsi+rax*2], mm6
@@ -875,10 +876,10 @@ next8_mbh:
pxor mm2, mm2
punpcklbw mm1, mm4
punpckhbw mm2, mm4
- pmulhw mm1, [s9 GLOBAL]
- pmulhw mm2, [s9 GLOBAL]
- paddw mm1, [s63 GLOBAL]
- paddw mm2, [s63 GLOBAL]
+ pmulhw mm1, [GLOBAL(s9)]
+ pmulhw mm2, [GLOBAL(s9)]
+ paddw mm1, [GLOBAL(s63)]
+ paddw mm2, [GLOBAL(s63)]
psraw mm1, 7
psraw mm2, 7
packsswb mm1, mm2
@@ -888,14 +889,14 @@ next8_mbh:
neg rax
movq mm3, [rdi+rax ]
- pxor mm6, [t80 GLOBAL]
- pxor mm3, [t80 GLOBAL]
+ pxor mm6, [GLOBAL(t80)]
+ pxor mm3, [GLOBAL(t80)]
paddsb mm6, mm1
psubsb mm3, mm1
- pxor mm6, [t80 GLOBAL]
- pxor mm3, [t80 GLOBAL]
+ pxor mm6, [GLOBAL(t80)]
+ pxor mm3, [GLOBAL(t80)]
movq [rdi+rax ], mm3
neg rax
movq [rdi+rax*4], mm6
@@ -1104,7 +1105,7 @@ next8_mbv:
psubusb mm5, mm1 ; q1-=p1
psubusb mm1, mm2 ; p1-=q1
por mm5, mm1 ; abs(p1-q1)
- pand mm5, [tfe GLOBAL] ; set lsb of each byte to zero
+ pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
psrlw mm5, 1 ; abs(p1-q1)/2
mov rdx, arg(2) ;flimit ;
@@ -1154,14 +1155,14 @@ next8_mbv:
; start work on filters
movq mm2, [rdx+16] ; p1
movq mm7, [rdx+40] ; q1
- pxor mm2, [t80 GLOBAL] ; p1 offset to convert to signed values
- pxor mm7, [t80 GLOBAL] ; q1 offset to convert to signed values
+ pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
+ pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
psubsb mm2, mm7 ; p1 - q1
movq mm6, [rdx+24] ; p0
movq mm0, [rdx+32] ; q0
- pxor mm6, [t80 GLOBAL] ; offset to convert to signed values
- pxor mm0, [t80 GLOBAL] ; offset to convert to signed values
+ pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values
+ pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values
movq mm3, mm0 ; q0
psubsb mm0, mm6 ; q0 - p0
@@ -1175,7 +1176,7 @@ next8_mbv:
pand mm2, mm4; ; Filter2 = vp8_filter & hev
movq mm5, mm2 ;
- paddsb mm5, [t3 GLOBAL];
+ paddsb mm5, [GLOBAL(t3)];
pxor mm0, mm0 ; 0
pxor mm7, mm7 ; 0
@@ -1188,7 +1189,7 @@ next8_mbv:
movq mm5, mm0 ; Filter2
- paddsb mm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4)
+ paddsb mm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4)
pxor mm0, mm0 ; 0
pxor mm7, mm7 ; 0
@@ -1221,10 +1222,10 @@ next8_mbv:
pxor mm2, mm2
punpcklbw mm1, mm4
punpckhbw mm2, mm4
- pmulhw mm1, [s27 GLOBAL]
- pmulhw mm2, [s27 GLOBAL]
- paddw mm1, [s63 GLOBAL]
- paddw mm2, [s63 GLOBAL]
+ pmulhw mm1, [GLOBAL(s27)]
+ pmulhw mm2, [GLOBAL(s27)]
+ paddw mm1, [GLOBAL(s63)]
+ paddw mm2, [GLOBAL(s63)]
psraw mm1, 7
psraw mm2, 7
packsswb mm1, mm2
@@ -1232,8 +1233,8 @@ next8_mbv:
psubsb mm3, mm1
paddsb mm6, mm1
- pxor mm3, [t80 GLOBAL]
- pxor mm6, [t80 GLOBAL]
+ pxor mm3, [GLOBAL(t80)]
+ pxor mm6, [GLOBAL(t80)]
movq [rdx+24], mm6
movq [rdx+32], mm3
@@ -1247,24 +1248,24 @@ next8_mbv:
pxor mm2, mm2
punpcklbw mm1, mm4
punpckhbw mm2, mm4
- pmulhw mm1, [s18 GLOBAL]
- pmulhw mm2, [s18 GLOBAL]
- paddw mm1, [s63 GLOBAL]
- paddw mm2, [s63 GLOBAL]
+ pmulhw mm1, [GLOBAL(s18)]
+ pmulhw mm2, [GLOBAL(s18)]
+ paddw mm1, [GLOBAL(s63)]
+ paddw mm2, [GLOBAL(s63)]
psraw mm1, 7
psraw mm2, 7
packsswb mm1, mm2
movq mm3, [rdx + 40]
movq mm6, [rdx + 16] ; p1
- pxor mm3, [t80 GLOBAL]
- pxor mm6, [t80 GLOBAL]
+ pxor mm3, [GLOBAL(t80)]
+ pxor mm6, [GLOBAL(t80)]
paddsb mm6, mm1
psubsb mm3, mm1
- pxor mm6, [t80 GLOBAL]
- pxor mm3, [t80 GLOBAL]
+ pxor mm6, [GLOBAL(t80)]
+ pxor mm3, [GLOBAL(t80)]
movq [rdx + 40], mm3
movq [rdx + 16], mm6
@@ -1278,10 +1279,10 @@ next8_mbv:
pxor mm2, mm2
punpcklbw mm1, mm4
punpckhbw mm2, mm4
- pmulhw mm1, [s9 GLOBAL]
- pmulhw mm2, [s9 GLOBAL]
- paddw mm1, [s63 GLOBAL]
- paddw mm2, [s63 GLOBAL]
+ pmulhw mm1, [GLOBAL(s9)]
+ pmulhw mm2, [GLOBAL(s9)]
+ paddw mm1, [GLOBAL(s63)]
+ paddw mm2, [GLOBAL(s63)]
psraw mm1, 7
psraw mm2, 7
packsswb mm1, mm2
@@ -1289,14 +1290,14 @@ next8_mbv:
movq mm6, [rdx+ 8]
movq mm3, [rdx+48]
- pxor mm6, [t80 GLOBAL]
- pxor mm3, [t80 GLOBAL]
+ pxor mm6, [GLOBAL(t80)]
+ pxor mm3, [GLOBAL(t80)]
paddsb mm6, mm1
psubsb mm3, mm1
- pxor mm6, [t80 GLOBAL] ; mm6 = 71 61 51 41 31 21 11 01
- pxor mm3, [t80 GLOBAL] ; mm3 = 76 66 56 46 36 26 15 06
+ pxor mm6, [GLOBAL(t80)] ; mm6 = 71 61 51 41 31 21 11 01
+ pxor mm3, [GLOBAL(t80)] ; mm3 = 76 66 56 46 36 26 15 06
; tranpose and write back
movq mm0, [rdx] ; mm0 = 70 60 50 40 30 20 10 00
@@ -1431,7 +1432,7 @@ nexts8_h:
psubusb mm0, mm1 ; q1-=p1
psubusb mm1, mm4 ; p1-=q1
por mm1, mm0 ; abs(p1-q1)
- pand mm1, [tfe GLOBAL] ; set lsb of each byte to zero
+ pand mm1, [GLOBAL(tfe)] ; set lsb of each byte to zero
psrlw mm1, 1 ; abs(p1-q1)/2
movq mm5, [rsi+rax] ; p0
@@ -1449,12 +1450,12 @@ nexts8_h:
pcmpeqb mm5, mm3
; start work on filters
- pxor mm2, [t80 GLOBAL] ; p1 offset to convert to signed values
- pxor mm7, [t80 GLOBAL] ; q1 offset to convert to signed values
+ pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
+ pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
psubsb mm2, mm7 ; p1 - q1
- pxor mm6, [t80 GLOBAL] ; offset to convert to signed values
- pxor mm0, [t80 GLOBAL] ; offset to convert to signed values
+ pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values
+ pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values
movq mm3, mm0 ; q0
psubsb mm0, mm6 ; q0 - p0
paddsb mm2, mm0 ; p1 - q1 + 1 * (q0 - p0)
@@ -1463,7 +1464,7 @@ nexts8_h:
pand mm5, mm2 ; mask filter values we don't care about
; do + 4 side
- paddsb mm5, [t4 GLOBAL] ; 3* (q0 - p0) + (p1 - q1) + 4
+ paddsb mm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4
movq mm0, mm5 ; get a copy of filters
psllw mm0, 8 ; shift left 8
@@ -1476,12 +1477,12 @@ nexts8_h:
por mm0, mm1 ; put the two together to get result
psubsb mm3, mm0 ; q0-= q0 add
- pxor mm3, [t80 GLOBAL] ; unoffset
+ pxor mm3, [GLOBAL(t80)] ; unoffset
movq [rsi], mm3 ; write back
; now do +3 side
- psubsb mm5, [t1s GLOBAL] ; +3 instead of +4
+ psubsb mm5, [GLOBAL(t1s)] ; +3 instead of +4
movq mm0, mm5 ; get a copy of filters
psllw mm0, 8 ; shift left 8
@@ -1493,7 +1494,7 @@ nexts8_h:
paddsb mm6, mm0 ; p0+= p0 add
- pxor mm6, [t80 GLOBAL] ; unoffset
+ pxor mm6, [GLOBAL(t80)] ; unoffset
movq [rsi+rax], mm6 ; write back
add rsi,8
@@ -1588,7 +1589,7 @@ nexts8_v:
psubusb mm7, mm6 ; q1-=p1
psubusb mm6, mm3 ; p1-=q1
por mm6, mm7 ; abs(p1-q1)
- pand mm6, [tfe GLOBAL] ; set lsb of each byte to zero
+ pand mm6, [GLOBAL(tfe)] ; set lsb of each byte to zero
psrlw mm6, 1 ; abs(p1-q1)/2
movq mm5, mm1 ; p0
@@ -1616,16 +1617,16 @@ nexts8_v:
movq t0, mm0
movq t1, mm3
- pxor mm0, [t80 GLOBAL] ; p1 offset to convert to signed values
- pxor mm3, [t80 GLOBAL] ; q1 offset to convert to signed values
+ pxor mm0, [GLOBAL(t80)] ; p1 offset to convert to signed values
+ pxor mm3, [GLOBAL(t80)] ; q1 offset to convert to signed values
psubsb mm0, mm3 ; p1 - q1
movq mm6, mm1 ; p0
movq mm7, mm2 ; q0
- pxor mm6, [t80 GLOBAL] ; offset to convert to signed values
+ pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values
- pxor mm7, [t80 GLOBAL] ; offset to convert to signed values
+ pxor mm7, [GLOBAL(t80)] ; offset to convert to signed values
movq mm3, mm7 ; offseted ; q0
psubsb mm7, mm6 ; q0 - p0
@@ -1636,7 +1637,7 @@ nexts8_v:
pand mm5, mm0 ; mask filter values we don't care about
- paddsb mm5, [t4 GLOBAL] ; 3* (q0 - p0) + (p1 - q1) + 4
+ paddsb mm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4
movq mm0, mm5 ; get a copy of filters
psllw mm0, 8 ; shift left 8
@@ -1650,10 +1651,10 @@ nexts8_v:
por mm0, mm7 ; put the two together to get result
psubsb mm3, mm0 ; q0-= q0sz add
- pxor mm3, [t80 GLOBAL] ; unoffset
+ pxor mm3, [GLOBAL(t80)] ; unoffset
; now do +3 side
- psubsb mm5, [t1s GLOBAL] ; +3 instead of +4
+ psubsb mm5, [GLOBAL(t1s)] ; +3 instead of +4
movq mm0, mm5 ; get a copy of filters
psllw mm0, 8 ; shift left 8
@@ -1665,7 +1666,7 @@ nexts8_v:
por mm0, mm5 ; put the two together to get result
paddsb mm6, mm0 ; p0+= p0 add
- pxor mm6, [t80 GLOBAL] ; unoffset
+ pxor mm6, [GLOBAL(t80)] ; unoffset
movq mm0, t0
diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index 5275dfa3b..849133dc4 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -1,662 +1,330 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
+; Use of pmaxub instead of psubusb to compute filter mask was seen
+; in ffvp8
+
+%macro LFH_FILTER_AND_HEV_MASK 1
+%if %1
+ movdqa xmm2, [rdi+2*rax] ; q3
+ movdqa xmm1, [rsi+2*rax] ; q2
+ movdqa xmm4, [rsi+rax] ; q1
+ movdqa xmm5, [rsi] ; q0
+ neg rax ; negate pitch to deal with above border
+%else
+ movlps xmm2, [rsi + rcx*2] ; q3
+ movlps xmm1, [rsi + rcx] ; q2
+ movlps xmm4, [rsi] ; q1
+ movlps xmm5, [rsi + rax] ; q0
+
+ movhps xmm2, [rdi + rcx*2]
+ movhps xmm1, [rdi + rcx]
+ movhps xmm4, [rdi]
+ movhps xmm5, [rdi + rax]
+
+ lea rsi, [rsi + rax*4]
+ lea rdi, [rdi + rax*4]
+
+ movdqa XMMWORD PTR [rsp], xmm1 ; store q2
+ movdqa XMMWORD PTR [rsp + 16], xmm4 ; store q1
+%endif
-;void vp8_loop_filter_horizontal_edge_sse2
-;(
-; unsigned char *src_ptr,
-; int src_pixel_step,
-; const char *flimit,
-; const char *limit,
-; const char *thresh,
-; int count
-;)
-global sym(vp8_loop_filter_horizontal_edge_sse2)
-sym(vp8_loop_filter_horizontal_edge_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 32 ; reserve 32 bytes
- %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16];
- %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16];
-
- mov rsi, arg(0) ;src_ptr
- movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
-
- mov rdx, arg(3) ;limit
- movdqa xmm7, XMMWORD PTR [rdx]
- mov rdi, rsi ; rdi points to row +1 for indirect addressing
- add rdi, rax
-
- ; calculate breakout conditions
- movdqu xmm2, [rdi+2*rax] ; q3
- movdqu xmm1, [rsi+2*rax] ; q2
- movdqa xmm6, xmm1 ; q2
- psubusb xmm1, xmm2 ; q2-=q3
- psubusb xmm2, xmm6 ; q3-=q2
- por xmm1, xmm2 ; abs(q3-q2)
- psubusb xmm1, xmm7 ;
-
-
- movdqu xmm4, [rsi+rax] ; q1
- movdqa xmm3, xmm4 ; q1
- psubusb xmm4, xmm6 ; q1-=q2
- psubusb xmm6, xmm3 ; q2-=q1
- por xmm4, xmm6 ; abs(q2-q1)
-
- psubusb xmm4, xmm7
- por xmm1, xmm4
-
- movdqu xmm4, [rsi] ; q0
- movdqa xmm0, xmm4 ; q0
- psubusb xmm4, xmm3 ; q0-=q1
- psubusb xmm3, xmm0 ; q1-=q0
- por xmm4, xmm3 ; abs(q0-q1)
- movdqa t0, xmm4 ; save to t0
- psubusb xmm4, xmm7
- por xmm1, xmm4
-
- neg rax ; negate pitch to deal with above border
- movdqu xmm2, [rsi+4*rax] ; p3
- movdqu xmm4, [rdi+4*rax] ; p2
- movdqa xmm5, xmm4 ; p2
- psubusb xmm4, xmm2 ; p2-=p3
- psubusb xmm2, xmm5 ; p3-=p2
- por xmm4, xmm2 ; abs(p3 - p2)
- psubusb xmm4, xmm7
- por xmm1, xmm4
-
-
- movdqu xmm4, [rsi+2*rax] ; p1
- movdqa xmm3, xmm4 ; p1
- psubusb xmm4, xmm5 ; p1-=p2
- psubusb xmm5, xmm3 ; p2-=p1
- por xmm4, xmm5 ; abs(p2 - p1)
- psubusb xmm4, xmm7
- por xmm1, xmm4
-
- movdqa xmm2, xmm3 ; p1
-
- movdqu xmm4, [rsi+rax] ; p0
- movdqa xmm5, xmm4 ; p0
- psubusb xmm4, xmm3 ; p0-=p1
- psubusb xmm3, xmm5 ; p1-=p0
- por xmm4, xmm3 ; abs(p1 - p0)
- movdqa t1, xmm4 ; save to t1
- psubusb xmm4, xmm7
- por xmm1, xmm4
-
- movdqu xmm3, [rdi] ; q1
- movdqa xmm4, xmm3 ; q1
- psubusb xmm3, xmm2 ; q1-=p1
- psubusb xmm2, xmm4 ; p1-=q1
- por xmm2, xmm3 ; abs(p1-q1)
- pand xmm2, [tfe GLOBAL] ; set lsb of each byte to zero
- psrlw xmm2, 1 ; abs(p1-q1)/2
-
- movdqa xmm6, xmm5 ; p0
- movdqu xmm3, [rsi] ; q0
- psubusb xmm5, xmm3 ; p0-=q0
- psubusb xmm3, xmm6 ; q0-=p0
- por xmm5, xmm3 ; abs(p0 - q0)
- paddusb xmm5, xmm5 ; abs(p0-q0)*2
- paddusb xmm5, xmm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
-
- mov rdx, arg(2) ;flimit ; get flimit
- movdqa xmm2, [rdx] ;
-
- paddb xmm2, xmm2 ; flimit*2 (less than 255)
- paddb xmm7, xmm2 ; flimit * 2 + limit (less than 255)
-
- psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
- por xmm1, xmm5
- pxor xmm5, xmm5
- pcmpeqb xmm1, xmm5 ; mask mm1
-
-
- ; calculate high edge variance
- mov rdx, arg(4) ;thresh ; get thresh
- movdqa xmm7, [rdx] ;
- movdqa xmm4, t0 ; get abs (q1 - q0)
- psubusb xmm4, xmm7
- movdqa xmm3, t1 ; get abs (p1 - p0)
- psubusb xmm3, xmm7
- paddb xmm4, xmm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
- pcmpeqb xmm4, xmm5
- pcmpeqb xmm5, xmm5
- pxor xmm4, xmm5
-
-
- ; start work on filters
- movdqu xmm2, [rsi+2*rax] ; p1
- movdqu xmm7, [rdi] ; q1
- pxor xmm2, [t80 GLOBAL] ; p1 offset to convert to signed values
- pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values
- psubsb xmm2, xmm7 ; p1 - q1
- pand xmm2, xmm4 ; high var mask (hvm)(p1 - q1)
- pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values
- pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values
- movdqa xmm3, xmm0 ; q0
- psubsb xmm0, xmm6 ; q0 - p0
- paddsb xmm2, xmm0 ; 1 * (q0 - p0) + hvm(p1 - q1)
- paddsb xmm2, xmm0 ; 2 * (q0 - p0) + hvm(p1 - q1)
- paddsb xmm2, xmm0 ; 3 * (q0 - p0) + hvm(p1 - q1)
- pand xmm1, xmm2 ; mask filter values we don't care about
- movdqa xmm2, xmm1
- paddsb xmm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
- paddsb xmm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
-
- pxor xmm0, xmm0 ;
- pxor xmm5, xmm5
- punpcklbw xmm0, xmm2 ;
- punpckhbw xmm5, xmm2 ;
- psraw xmm0, 11 ;
- psraw xmm5, 11
- packsswb xmm0, xmm5
- movdqa xmm2, xmm0 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
-
- pxor xmm0, xmm0 ; 0
- movdqa xmm5, xmm1 ; abcdefgh
- punpcklbw xmm0, xmm1 ; e0f0g0h0
- psraw xmm0, 11 ; sign extended shift right by 3
- pxor xmm1, xmm1 ; 0
- punpckhbw xmm1, xmm5 ; a0b0c0d0
- psraw xmm1, 11 ; sign extended shift right by 3
- movdqa xmm5, xmm0 ; save results
-
- packsswb xmm0, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
- paddsw xmm5, [ones GLOBAL]
- paddsw xmm1, [ones GLOBAL]
- psraw xmm5, 1 ; partial shifted one more time for 2nd tap
- psraw xmm1, 1 ; partial shifted one more time for 2nd tap
- packsswb xmm5, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
- pandn xmm4, xmm5 ; high edge variance additive
-
- paddsb xmm6, xmm2 ; p0+= p0 add
- pxor xmm6, [t80 GLOBAL] ; unoffset
- movdqu [rsi+rax], xmm6 ; write back
-
- movdqu xmm6, [rsi+2*rax] ; p1
- pxor xmm6, [t80 GLOBAL] ; reoffset
- paddsb xmm6, xmm4 ; p1+= p1 add
- pxor xmm6, [t80 GLOBAL] ; unoffset
- movdqu [rsi+2*rax], xmm6 ; write back
-
- psubsb xmm3, xmm0 ; q0-= q0 add
- pxor xmm3, [t80 GLOBAL] ; unoffset
- movdqu [rsi], xmm3 ; write back
-
- psubsb xmm7, xmm4 ; q1-= q1 add
- pxor xmm7, [t80 GLOBAL] ; unoffset
- movdqu [rdi], xmm7 ; write back
-
- add rsp, 32
- pop rsp
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp8_loop_filter_vertical_edge_sse2
-;(
-; unsigned char *src_ptr,
-; int src_pixel_step,
-; const char *flimit,
-; const char *limit,
-; const char *thresh,
-; int count
-;)
-global sym(vp8_loop_filter_vertical_edge_sse2)
-sym(vp8_loop_filter_vertical_edge_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 96 ; reserve 96 bytes
- %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16];
- %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16];
- %define srct [rsp + 32] ;__declspec(align(16)) char srct[64];
-
- mov rsi, arg(0) ;src_ptr
- movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
-
- lea rsi, [rsi + rax*4 - 4]
- mov rdi, rsi ; rdi points to row +1 for indirect addressing
-
- add rdi, rax
- lea rcx, [rdi + rax *8]
-
- ;transpose
- movq xmm7, QWORD PTR [rsi+2*rax] ; 67 66 65 64 63 62 61 60
- movq xmm6, QWORD PTR [rdi+2*rax] ; 77 76 75 74 73 72 71 70
-
- punpcklbw xmm7, xmm6 ; 77 67 76 66 75 65 74 64 73 63 72 62 71 61 70 60
- movq xmm5, QWORD PTR [rsi] ; 47 46 45 44 43 42 41 40
-
- movq xmm4, QWORD PTR [rsi+rax] ; 57 56 55 54 53 52 51 50
- punpcklbw xmm5, xmm4 ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
-
- movdqa xmm3, xmm5 ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
- punpckhwd xmm5, xmm7 ; 77 67 57 47 76 66 56 46 75 65 55 45 74 64 54 44
-
- lea rsi, [rsi+ rax*8]
-
- punpcklwd xmm3, xmm7 ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40
- movq xmm6, QWORD PTR [rsi + 2*rax] ; e7 e6 e5 e4 e3 e2 e1 e0
-
- movq xmm7, QWORD PTR [rcx + 2*rax] ; f7 f6 f5 f4 f3 f2 f1 f0
- punpcklbw xmm6, xmm7 ; f7 e7 f6 e6 f5 e5 f4 e4 f3 e3 f2 e2 f1 e1 f0 e0
-
- movq xmm4, QWORD PTR [rsi] ; c7 c6 c5 c4 c3 c2 c1 c0
- movq xmm7, QWORD PTR [rsi + rax] ; d7 d6 d5 d4 d3 d2 d1 d0
-
- punpcklbw xmm4, xmm7 ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 c1 d0 c0
- movdqa xmm7, xmm4 ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 c1 d0 c0
-
- punpckhwd xmm7, xmm6 ; f7 e7 d7 c7 f6 e6 d6 c6 f5 e5 d5 c5 f4 e4 d4 c4
- punpcklwd xmm4, xmm6 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0
-
- ; xmm3 xmm4, xmm5 xmm7 in use
- neg rax
-
- lea rsi, [rsi+rax*8]
- movq xmm6, QWORD PTR [rsi+rax*2] ; 27 26 25 24 23 22 21 20
-
- movq xmm1, QWORD PTR [rsi+rax ] ; 37 36 35 34 33 32 31 30
- punpcklbw xmm6, xmm1 ; 37 27 36 26 35 25 34 24 33 23 32 22 31 21 30 20
-
- movq xmm2, QWORD PTR [rsi+rax*4] ; 07 06 05 04 03 02 01 00
- movq xmm1, QWORD PTR [rdi+rax*4] ; 17 16 15 14 13 12 11 10
-
- punpcklbw xmm2, xmm1 ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
- movdqa xmm0, xmm2
-
- punpckhwd xmm2, xmm6 ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04
- punpcklwd xmm0, xmm6 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
-
- movdqa xmm6, xmm2
- punpckldq xmm2, xmm5 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
-
- punpckhdq xmm6, xmm5 ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06
- ;xmm0 xmm2 xmm3 xmm4, xmm6, xmm7
-
- movdqa xmm5, xmm0 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
- punpckhdq xmm5, xmm3 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
-
- punpckldq xmm0, xmm3 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
- lea rsi, [rcx+rax]
- ; xmm1, xmm3 free
- movq xmm1, QWORD PTR [rsi+rax*2] ; a7 a6 a5 a4 a3 a2 a1 a0
- movq xmm3, QWORD PTR [rsi+rax] ; b7 b6 b5 b4 b3 b2 b1 b0
-
- punpcklbw xmm1, xmm3 ;
- lea rdx, srct ;
-
- movdqa [rdx+16], xmm1 ; b7 a7 b6 a6 b5 a5 b4 a4 b3 a3 b2 a2 b1 a1 b0 a0
- movq xmm3, QWORD PTR [rsi+rax*4] ; 87 86 85 84 83 82 81 80
-
- movq xmm1, QWORD PTR [rcx+rax*4]
- punpcklbw xmm3, xmm1 ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
-
- movdqa [rdx], xmm3 ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
-
- punpckhwd xmm3, [rdx+16] ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84
- movdqa xmm1, xmm3 ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84
-
- punpckhdq xmm1, xmm7 ; f7 e7 d7 c7 b7 a7 97 87 f6 e6 d6 c6 b6 a6 96 86
- punpckldq xmm3, xmm7 ; f5 e5 d5 c5 b5 a5 95 85 f4 e4 d4 c4 b4 a4 94 84
-
- movdqa xmm7, xmm2 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
- punpcklqdq xmm7, xmm3 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
-
- punpckhqdq xmm2, xmm3 ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
- movdqa [rdx+32], xmm7 ; save 4s
-
- movdqa [rdx+48], xmm2 ; save 5s
- movdqa xmm7, xmm6 ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06
-
- punpckhqdq xmm7, xmm1 ; f7 e7 d7 c7 b7 a7 97 87 77 67 57 47 37 27 17 07 = q3
- punpcklqdq xmm6, xmm1 ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06 = q2
-
- ; free 1, 3 xmm7-7s xmm6-6s, xmm2-5s
- movq xmm1, QWORD PTR [rdx] ; 93 83 92 82 91 81 90 80
- movq xmm3, QWORD PTR [rdx+16] ; b3 a3 b2 a2 b1 a1 b0 a0
-
- punpcklwd xmm1, xmm3 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80
- movdqa xmm3, xmm1 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80
-
- punpckhdq xmm3, xmm4 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
- punpckldq xmm1, xmm4 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
-
- movdqa xmm4, xmm5 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
- punpcklqdq xmm5, xmm3 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
-
- punpckhqdq xmm4, xmm3 ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
- movdqa [rdx], xmm5 ; save 2s
-
- movdqa [rdx+16], xmm4 ; save 3s
-
- movdqa xmm3, xmm6 ;
- psubusb xmm3, xmm7 ; q3 - q2
-
- psubusb xmm7, xmm6 ; q2 - q3
- por xmm7, xmm3 ; abs(q3-q2)
-
- movdqa xmm3, xmm2 ; q1
- psubusb xmm3, xmm6 ; q1 - q2
-
- psubusb xmm6, xmm2 ; q2 - q1
- por xmm6, xmm3 ; abs(q2-q1)
-
-
- movdqa xmm3, xmm0 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
- punpcklqdq xmm0, xmm1 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
-
- punpckhqdq xmm3, xmm1 ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
- movdqa xmm1, xmm3
-
- psubusb xmm3, xmm0 ; p2-p3
- psubusb xmm0, xmm1 ; p3-p2
-
- por xmm0, xmm3 ; abs(p3-p2)
- movdqa xmm3, xmm5 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
-
- psubusb xmm3, xmm1 ; p1-p2
- psubusb xmm1, xmm5 ; p2-p1
-
- por xmm1, xmm3 ; abs(p1-p2)
- mov rdx, arg(3) ;limit
-
- movdqa xmm3, [rdx] ; limit
-
- psubusb xmm7, xmm3
- psubusb xmm0, xmm3
-
- psubusb xmm1, xmm3
- psubusb xmm6, xmm3
-
- por xmm7, xmm6
- por xmm0, xmm1
-
- por xmm0, xmm7 ; abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit
-
- movdqa xmm1, xmm5 ; p1
-
- movdqa xmm7, xmm4 ; xmm4 xmm7 = p0
-
- psubusb xmm7, xmm5 ; p0 - p1
- psubusb xmm5, xmm4 ; p1 - p0
-
- por xmm5, xmm7 ; abs(p1-p0)
- movdqa t0, xmm5 ; save abs(p1-p0)
-
- lea rdx, srct
- psubusb xmm5, xmm3
-
- por xmm0, xmm5 ; xmm0=mask
- movdqa xmm5, [rdx+32] ; xmm5=q0
-
- movdqa xmm7, [rdx+48] ; xmm7=q1
- movdqa xmm6, xmm5 ; mm6=q0
-
- movdqa xmm2, xmm7 ; q1
-
- psubusb xmm5, xmm7 ; q0-q1
- psubusb xmm7, xmm6 ; q1-q0
-
- por xmm7, xmm5 ; abs(q1-q0)
- movdqa t1, xmm7 ; save abs(q1-q0)
-
- psubusb xmm7, xmm3
- por xmm0, xmm7 ; mask
-
- movdqa xmm5, xmm2 ; q1
- psubusb xmm5, xmm1 ; q1-=p1
- psubusb xmm1, xmm2 ; p1-=q1
- por xmm5, xmm1 ; abs(p1-q1)
- pand xmm5, [tfe GLOBAL] ; set lsb of each byte to zero
- psrlw xmm5, 1 ; abs(p1-q1)/2
+ movdqa xmm6, xmm1 ; q2
+ movdqa xmm3, xmm4 ; q1
- mov rdx, arg(2) ;flimit ;
- movdqa xmm2, [rdx] ;flimit xmm2
+ psubusb xmm1, xmm2 ; q2-=q3
+ psubusb xmm2, xmm6 ; q3-=q2
- movdqa xmm1, xmm4 ; xmm1=xmm4=p0
+ psubusb xmm4, xmm6 ; q1-=q2
+ psubusb xmm6, xmm3 ; q2-=q1
- movdqa xmm7, xmm6 ; xmm7=xmm6=q0
- psubusb xmm1, xmm7 ; p0-q0
+ por xmm4, xmm6 ; abs(q2-q1)
+ por xmm1, xmm2 ; abs(q3-q2)
- psubusb xmm7, xmm4 ; q0-p0
- por xmm1, xmm7 ; abs(q0-p0)
- paddusb xmm1, xmm1 ; abs(q0-p0)*2
- paddusb xmm1, xmm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
+ movdqa xmm0, xmm5 ; q0
+ pmaxub xmm1, xmm4
- paddb xmm2, xmm2 ; flimit*2 (less than 255)
- paddb xmm3, xmm2 ; flimit * 2 + limit (less than 255)
+ psubusb xmm5, xmm3 ; q0-=q1
+ psubusb xmm3, xmm0 ; q1-=q0
- psubusb xmm1, xmm3 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ por xmm5, xmm3 ; abs(q0-q1)
+ movdqa t0, xmm5 ; save to t0
- por xmm1, xmm0; ; mask
+ pmaxub xmm1, xmm5
- pxor xmm0, xmm0
- pcmpeqb xmm1, xmm0
- ; calculate high edge variance
- mov rdx, arg(4) ;thresh ; get thresh
- movdqa xmm7, [rdx]
+%if %1
+ movdqa xmm2, [rsi+4*rax] ; p3
+ movdqa xmm4, [rdi+4*rax] ; p2
+ movdqa xmm6, [rsi+2*rax] ; p1
+%else
+ movlps xmm2, [rsi + rax] ; p3
+ movlps xmm4, [rsi] ; p2
+ movlps xmm6, [rsi + rcx] ; p1
- ;
- movdqa xmm4, t0 ; get abs (q1 - q0)
- psubusb xmm4, xmm7
+ movhps xmm2, [rdi + rax]
+ movhps xmm4, [rdi]
+ movhps xmm6, [rdi + rcx]
- movdqa xmm3, t1 ; get abs (p1 - p0)
- psubusb xmm3, xmm7
+ movdqa XMMWORD PTR [rsp + 32], xmm4 ; store p2
+ movdqa XMMWORD PTR [rsp + 48], xmm6 ; store p1
+%endif
- por xmm4, xmm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
- pcmpeqb xmm4, xmm0
+ movdqa xmm5, xmm4 ; p2
+ movdqa xmm3, xmm6 ; p1
- pcmpeqb xmm0, xmm0
- pxor xmm4, xmm0
+ psubusb xmm4, xmm2 ; p2-=p3
+ psubusb xmm2, xmm5 ; p3-=p2
- ; start work on filters
- lea rdx, srct
+ psubusb xmm3, xmm5 ; p1-=p2
+ pmaxub xmm1, xmm4 ; abs(p3 - p2)
- movdqa xmm2, [rdx] ; p1
- movdqa xmm7, [rdx+48] ; q1
+ psubusb xmm5, xmm6 ; p2-=p1
+ pmaxub xmm1, xmm2 ; abs(p3 - p2)
- movdqa xmm6, [rdx+16] ; p0
- movdqa xmm0, [rdx+32] ; q0
+ pmaxub xmm1, xmm5 ; abs(p2 - p1)
+ movdqa xmm2, xmm6 ; p1
- pxor xmm2, [t80 GLOBAL] ; p1 offset to convert to signed values
- pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values
+ pmaxub xmm1, xmm3 ; abs(p2 - p1)
+%if %1
+ movdqa xmm4, [rsi+rax] ; p0
+ movdqa xmm3, [rdi] ; q1
+%else
+ movlps xmm4, [rsi + rcx*2] ; p0
+ movhps xmm4, [rdi + rcx*2]
+ movdqa xmm3, q1 ; q1
+%endif
- psubsb xmm2, xmm7 ; p1 - q1
- pand xmm2, xmm4 ; high var mask (hvm)(p1 - q1)
+ movdqa xmm5, xmm4 ; p0
+ psubusb xmm4, xmm6 ; p0-=p1
- pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values
- pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values
+ psubusb xmm6, xmm5 ; p1-=p0
- movdqa xmm3, xmm0 ; q0
- psubsb xmm0, xmm6 ; q0 - p0
+ por xmm6, xmm4 ; abs(p1 - p0)
+ mov rdx, arg(2) ; get flimit
- paddsb xmm2, xmm0 ; 1 * (q0 - p0) + hvm(p1 - q1)
- paddsb xmm2, xmm0 ; 2 * (q0 - p0) + hvm(p1 - q1)
+ movdqa t1, xmm6 ; save to t1
- paddsb xmm2, xmm0 ; 3 * (q0 - p0) + hvm(p1 - q1)
- pand xmm1, xmm2 ; mask filter values we don't care about
+ movdqa xmm4, xmm3 ; q1
+ pmaxub xmm1, xmm6
- movdqa xmm2, xmm1
- paddsb xmm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
+ psubusb xmm3, xmm2 ; q1-=p1
+ psubusb xmm2, xmm4 ; p1-=q1
- paddsb xmm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
- pxor xmm0, xmm0 ;
+ psubusb xmm1, xmm7
+ por xmm2, xmm3 ; abs(p1-q1)
- pxor xmm5, xmm5
- punpcklbw xmm0, xmm2 ;
+ movdqa xmm4, XMMWORD PTR [rdx] ; flimit
- punpckhbw xmm5, xmm2 ;
- psraw xmm0, 11 ;
+ movdqa xmm3, xmm0 ; q0
+ pand xmm2, [GLOBAL(tfe)] ; set lsb of each byte to zero
- psraw xmm5, 11
- packsswb xmm0, xmm5
+ mov rdx, arg(4) ; hev get thresh
- movdqa xmm2, xmm0 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
+ movdqa xmm6, xmm5 ; p0
+ psrlw xmm2, 1 ; abs(p1-q1)/2
- pxor xmm0, xmm0 ; 0
- movdqa xmm5, xmm1 ; abcdefgh
+ psubusb xmm5, xmm3 ; p0-=q0
+ paddb xmm4, xmm4 ; flimit*2 (less than 255)
- punpcklbw xmm0, xmm1 ; e0f0g0h0
- psraw xmm0, 11 ; sign extended shift right by 3
+ psubusb xmm3, xmm6 ; q0-=p0
+ por xmm5, xmm3 ; abs(p0 - q0)
- pxor xmm1, xmm1 ; 0
- punpckhbw xmm1, xmm5 ; a0b0c0d0
+ paddusb xmm5, xmm5 ; abs(p0-q0)*2
+ paddb xmm7, xmm4 ; flimit * 2 + limit (less than 255)
- psraw xmm1, 11 ; sign extended shift right by 3
- movdqa xmm5, xmm0 ; save results
+ movdqa xmm4, t0 ; hev get abs (q1 - q0)
- packsswb xmm0, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
- paddsw xmm5, [ones GLOBAL]
+ movdqa xmm3, t1 ; get abs (p1 - p0)
- paddsw xmm1, [ones GLOBAL]
- psraw xmm5, 1 ; partial shifted one more time for 2nd tap
+ paddusb xmm5, xmm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
- psraw xmm1, 1 ; partial shifted one more time for 2nd tap
- packsswb xmm5, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
+ movdqa xmm2, XMMWORD PTR [rdx] ; hev
- pandn xmm4, xmm5 ; high edge variance additive
+ psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ psubusb xmm4, xmm2 ; hev
- paddsb xmm6, xmm2 ; p0+= p0 add
- pxor xmm6, [t80 GLOBAL] ; unoffset
+ psubusb xmm3, xmm2 ; hev
+ por xmm1, xmm5
- ; mm6=p0 ;
- movdqa xmm1, [rdx] ; p1
- pxor xmm1, [t80 GLOBAL] ; reoffset
+ pxor xmm7, xmm7
+ paddb xmm4, xmm3 ; hev abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
- paddsb xmm1, xmm4 ; p1+= p1 add
- pxor xmm1, [t80 GLOBAL] ; unoffset
- ; mm6 = p0 mm1 = p1
+ pcmpeqb xmm4, xmm5 ; hev
+ pcmpeqb xmm3, xmm3 ; hev
- psubsb xmm3, xmm0 ; q0-= q0 add
- pxor xmm3, [t80 GLOBAL] ; unoffset
+ pcmpeqb xmm1, xmm7 ; mask xmm1
+ pxor xmm4, xmm3 ; hev
+%endmacro
- ; mm3 = q0
- psubsb xmm7, xmm4 ; q1-= q1 add
- pxor xmm7, [t80 GLOBAL] ; unoffset
- ; mm7 = q1
+%macro B_FILTER 1
+%if %1 == 0
+ movdqa xmm2, p1 ; p1
+ movdqa xmm7, q1 ; q1
+%elif %1 == 1
+ movdqa xmm2, [rsi+2*rax] ; p1
+ movdqa xmm7, [rdi] ; q1
+%elif %1 == 2
+ lea rdx, srct
- ; tranpose and write back
- ; xmm1 = f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
- ; xmm6 = f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
- ; xmm3 = f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
- ; xmm7 = f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
- movdqa xmm2, xmm1 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
- punpcklbw xmm2, xmm6 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
+ movdqa xmm2, [rdx] ; p1
+ movdqa xmm7, [rdx+48] ; q1
+ movdqa xmm6, [rdx+16] ; p0
+ movdqa xmm0, [rdx+32] ; q0
+%endif
- movdqa xmm4, xmm3 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
- punpckhbw xmm1, xmm6 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
+ pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
+ pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
- punpcklbw xmm4, xmm7 ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
- punpckhbw xmm3, xmm7 ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84
+ psubsb xmm2, xmm7 ; p1 - q1
+ pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values
- movdqa xmm6, xmm2 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
- punpcklwd xmm2, xmm4 ; 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02
+ pand xmm2, xmm4 ; high var mask (hvm)(p1 - q1)
+ pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values
- punpckhwd xmm6, xmm4 ; 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42
- movdqa xmm5, xmm1 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
+ movdqa xmm3, xmm0 ; q0
+ psubsb xmm0, xmm6 ; q0 - p0
- punpcklwd xmm1, xmm3 ; f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2
- punpckhwd xmm5, xmm3 ; b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82
+ paddsb xmm2, xmm0 ; 1 * (q0 - p0) + hvm(p1 - q1)
- ; xmm2 = 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02
- ; xmm6 = 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42
- ; xmm5 = f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
- ; xmm1 = b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82
- lea rsi, [rsi+rax*8]
+ paddsb xmm2, xmm0 ; 2 * (q0 - p0) + hvm(p1 - q1)
- movd [rsi+rax*4+2], xmm2
- psrldq xmm2, 4
+ paddsb xmm2, xmm0 ; 3 * (q0 - p0) + hvm(p1 - q1)
- movd [rdi+rax*4+2], xmm2
- psrldq xmm2, 4
+ pand xmm1, xmm2 ; mask filter values we don't care about
- movd [rsi+rax*2+2], xmm2
- psrldq xmm2, 4
+ movdqa xmm2, xmm1
- movd [rdi+rax*2+2], xmm2
- movd [rsi+2], xmm6
+ paddsb xmm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
+ paddsb xmm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
- psrldq xmm6, 4
- movd [rdi+2], xmm6
+ punpckhbw xmm5, xmm2 ; axbxcxdx
+ punpcklbw xmm2, xmm2 ; exfxgxhx
- psrldq xmm6, 4
- neg rax
+ punpcklbw xmm0, xmm1 ; exfxgxhx
+ psraw xmm5, 11 ; sign extended shift right by 3
- movd [rdi+rax+2], xmm6
- psrldq xmm6, 4
+ punpckhbw xmm1, xmm1 ; axbxcxdx
+ psraw xmm2, 11 ; sign extended shift right by 3
- movd [rdi+rax*2+2], xmm6
- lea rsi, [rsi+rax*8]
+ packsswb xmm2, xmm5 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
+ psraw xmm0, 11 ; sign extended shift right by 3
- neg rax
- ;;;;;;;;;;;;;;;;;;;;/
- movd [rsi+rax*4+2], xmm1
- psrldq xmm1, 4
+ psraw xmm1, 11 ; sign extended shift right by 3
+ movdqa xmm5, xmm0 ; save results
+
+ packsswb xmm0, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
+ paddsw xmm5, [GLOBAL(ones)]
+
+ paddsw xmm1, [GLOBAL(ones)]
+ psraw xmm5, 1 ; partial shifted one more time for 2nd tap
+
+ psraw xmm1, 1 ; partial shifted one more time for 2nd tap
+
+ paddsb xmm6, xmm2 ; p0+= p0 add
+ packsswb xmm5, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
+
+%if %1 == 0
+ movdqa xmm1, p1 ; p1
+%elif %1 == 1
+ movdqa xmm1, [rsi+2*rax] ; p1
+%elif %1 == 2
+ movdqa xmm1, [rdx] ; p1
+%endif
+ pandn xmm4, xmm5 ; high edge variance additive
+ pxor xmm6, [GLOBAL(t80)] ; unoffset
+
+ pxor xmm1, [GLOBAL(t80)] ; reoffset
+ psubsb xmm3, xmm0 ; q0-= q0 add
+
+ paddsb xmm1, xmm4 ; p1+= p1 add
+ pxor xmm3, [GLOBAL(t80)] ; unoffset
+
+ pxor xmm1, [GLOBAL(t80)] ; unoffset
+ psubsb xmm7, xmm4 ; q1-= q1 add
+
+ pxor xmm7, [GLOBAL(t80)] ; unoffset
+%if %1 == 0
+ lea rsi, [rsi + rcx*2]
+ lea rdi, [rdi + rcx*2]
+ movq MMWORD PTR [rsi], xmm6 ; p0
+ movhps MMWORD PTR [rdi], xmm6
+ movq MMWORD PTR [rsi + rax], xmm1 ; p1
+ movhps MMWORD PTR [rdi + rax], xmm1
+ movq MMWORD PTR [rsi + rcx], xmm3 ; q0
+ movhps MMWORD PTR [rdi + rcx], xmm3
+ movq MMWORD PTR [rsi + rcx*2],xmm7 ; q1
+ movhps MMWORD PTR [rdi + rcx*2],xmm7
+%elif %1 == 1
+ movdqa [rsi+rax], xmm6 ; write back
+ movdqa [rsi+2*rax], xmm1 ; write back
+ movdqa [rsi], xmm3 ; write back
+ movdqa [rdi], xmm7 ; write back
+%endif
+
+%endmacro
- movd [rcx+rax*4+2], xmm1
- psrldq xmm1, 4
- movd [rsi+rax*2+2], xmm1
- psrldq xmm1, 4
+;void vp8_loop_filter_horizontal_edge_sse2
+;(
+; unsigned char *src_ptr,
+; int src_pixel_step,
+; const char *flimit,
+; const char *limit,
+; const char *thresh,
+; int count
+;)
+global sym(vp8_loop_filter_horizontal_edge_sse2)
+sym(vp8_loop_filter_horizontal_edge_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
- movd [rcx+rax*2+2], xmm1
- psrldq xmm1, 4
+ ALIGN_STACK 16, rax
+ sub rsp, 32 ; reserve 32 bytes
+ %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16];
+ %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16];
- movd [rsi+2], xmm5
- psrldq xmm5, 4
+ mov rsi, arg(0) ;src_ptr
+ movsxd rax, dword ptr arg(1) ;src_pixel_step
- movd [rcx+2], xmm5
- psrldq xmm5, 4
+ mov rdx, arg(3) ;limit
+ movdqa xmm7, XMMWORD PTR [rdx]
- neg rax
- movd [rcx+rax+2], xmm5
+ lea rdi, [rsi+rax] ; rdi points to row +1 for indirect addressing
- psrldq xmm5, 4
- movd [rcx+rax*2+2], xmm5
+ ; calculate breakout conditions and high edge variance
+ LFH_FILTER_AND_HEV_MASK 1
+ ; filter and write back the result
+ B_FILTER 1
- add rsp, 96
+ add rsp, 32
pop rsp
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
-;void vp8_mbloop_filter_horizontal_edge_sse2
+;void vp8_loop_filter_horizontal_edge_uv_sse2
;(
; unsigned char *src_ptr,
; int src_pixel_step,
@@ -665,335 +333,280 @@ sym(vp8_loop_filter_vertical_edge_sse2):
; const char *thresh,
; int count
;)
-global sym(vp8_mbloop_filter_horizontal_edge_sse2)
-sym(vp8_mbloop_filter_horizontal_edge_sse2):
+global sym(vp8_loop_filter_horizontal_edge_uv_sse2)
+sym(vp8_loop_filter_horizontal_edge_uv_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
; end prolog
ALIGN_STACK 16, rax
- sub rsp, 32 ; reserve 32 bytes
- %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8];
- %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8];
-
- mov rsi, arg(0) ;src_ptr
- movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
-
- mov rdx, arg(3) ;limit
+ sub rsp, 96 ; reserve 96 bytes
+ %define q2 [rsp + 0] ;__declspec(align(16)) char q2[16];
+ %define q1 [rsp + 16] ;__declspec(align(16)) char q1[16];
+ %define p2 [rsp + 32] ;__declspec(align(16)) char p2[16];
+ %define p1 [rsp + 48] ;__declspec(align(16)) char p1[16];
+ %define t0 [rsp + 64] ;__declspec(align(16)) char t0[16];
+ %define t1 [rsp + 80] ;__declspec(align(16)) char t1[16];
+
+ mov rsi, arg(0) ; u
+ mov rdi, arg(5) ; v
+ movsxd rax, dword ptr arg(1) ; src_pixel_step
+ mov rcx, rax
+ neg rax ; negate pitch to deal with above border
+
+ mov rdx, arg(3) ;limit
movdqa xmm7, XMMWORD PTR [rdx]
- mov rdi, rsi ; rdi points to row +1 for indirect addressing
- add rdi, rax
-
- ; calculate breakout conditions
- movdqa xmm2, XMMWORD PTR [rdi+2*rax] ; q3
- movdqa xmm1, XMMWORD PTR [rsi+2*rax] ; q2
-
- movdqa xmm6, xmm1 ; q2
- psubusb xmm1, xmm2 ; q2-=q3
-
-
- psubusb xmm2, xmm6 ; q3-=q2
- por xmm1, xmm2 ; abs(q3-q2)
-
- psubusb xmm1, xmm7
-
- ; mm1 = abs(q3-q2), mm6 =q2, mm7 = limit
- movdqa xmm4, XMMWORD PTR [rsi+rax] ; q1
- movdqa xmm3, xmm4 ; q1
-
- psubusb xmm4, xmm6 ; q1-=q2
- psubusb xmm6, xmm3 ; q2-=q1
-
- por xmm4, xmm6 ; abs(q2-q1)
- psubusb xmm4, xmm7
-
- por xmm1, xmm4
- ; mm1 = mask, mm3=q1, mm7 = limit
-
- movdqa xmm4, XMMWORD PTR [rsi] ; q0
- movdqa xmm0, xmm4 ; q0
-
- psubusb xmm4, xmm3 ; q0-=q1
- psubusb xmm3, xmm0 ; q1-=q0
-
- por xmm4, xmm3 ; abs(q0-q1)
- movdqa t0, xmm4 ; save to t0
-
- psubusb xmm4, xmm7
- por xmm1, xmm4
-
- ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1)
- neg rax ; negate pitch to deal with above border
-
- movdqa xmm2, XMMWORD PTR [rsi+4*rax] ; p3
- movdqa xmm4, XMMWORD PTR [rdi+4*rax] ; p2
-
- movdqa xmm5, xmm4 ; p2
- psubusb xmm4, xmm2 ; p2-=p3
-
- psubusb xmm2, xmm5 ; p3-=p2
- por xmm4, xmm2 ; abs(p3 - p2)
-
- psubusb xmm4, xmm7
- por xmm1, xmm4
-
- ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1)
- movdqa xmm4, XMMWORD PTR [rsi+2*rax] ; p1
- movdqa xmm3, xmm4 ; p1
-
- psubusb xmm4, xmm5 ; p1-=p2
- psubusb xmm5, xmm3 ; p2-=p1
-
- por xmm4, xmm5 ; abs(p2 - p1)
- psubusb xmm4, xmm7
-
- por xmm1, xmm4
-
- movdqa xmm2, xmm3 ; p1
-
- ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1)
- movdqa xmm4, XMMWORD PTR [rsi+rax] ; p0
- movdqa xmm5, xmm4 ; p0
-
- psubusb xmm4, xmm3 ; p0-=p1
- psubusb xmm3, xmm5 ; p1-=p0
-
- por xmm4, xmm3 ; abs(p1 - p0)
- movdqa t1, xmm4 ; save to t1
-
- psubusb xmm4, xmm7
- por xmm1, xmm4
+ lea rsi, [rsi + rcx]
+ lea rdi, [rdi + rcx]
- ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1) t1 = abs(p1-p0)
- ; mm5 = p0
- movdqa xmm3, XMMWORD PTR [rdi] ; q1
- movdqa xmm4, xmm3 ; q1
- psubusb xmm3, xmm2 ; q1-=p1
- psubusb xmm2, xmm4 ; p1-=q1
- por xmm2, xmm3 ; abs(p1-q1)
- pand xmm2, [tfe GLOBAL] ; set lsb of each byte to zero
- psrlw xmm2, 1 ; abs(p1-q1)/2
-
- movdqa xmm6, xmm5 ; p0
- movdqa xmm3, xmm0 ; q0
+ ; calculate breakout conditions and high edge variance
+ LFH_FILTER_AND_HEV_MASK 0
+ ; filter and write back the result
+ B_FILTER 0
- psubusb xmm5, xmm3 ; p0-=q0
- psubusb xmm3, xmm6 ; q0-=p0
-
- por xmm5, xmm3 ; abs(p0 - q0)
- paddusb xmm5, xmm5 ; abs(p0-q0)*2
- paddusb xmm5, xmm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
+ add rsp, 96
+ pop rsp
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
- mov rdx, arg(2) ;flimit ; get flimit
- movdqa xmm2, XMMWORD PTR [rdx] ;
- paddb xmm2, xmm2 ; flimit*2 (less than 255)
- paddb xmm7, xmm2 ; flimit * 2 + limit (less than 255)
- psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
- por xmm1, xmm5
- pxor xmm5, xmm5
- pcmpeqb xmm1, xmm5 ; mask mm1
- ; mm1 = mask, mm0=q0, mm7 = flimit, t0 = abs(q0-q1) t1 = abs(p1-p0)
- ; mm6 = p0,
+%macro MB_FILTER_AND_WRITEBACK 1
+%if %1 == 0
+ movdqa xmm2, p1 ; p1
+ movdqa xmm7, q1 ; q1
+%elif %1 == 1
+ movdqa xmm2, [rsi+2*rax] ; p1
+ movdqa xmm7, [rdi] ; q1
- ; calculate high edge variance
- mov rdx, arg(4) ;thresh ; get thresh
- movdqa xmm7, XMMWORD PTR [rdx] ;
+ mov rcx, rax
+ neg rcx
+%elif %1 == 2
+ lea rdx, srct
- movdqa xmm4, t0 ; get abs (q1 - q0)
- psubusb xmm4, xmm7
+ movdqa xmm2, [rdx+32] ; p1
+ movdqa xmm7, [rdx+80] ; q1
+ movdqa xmm6, [rdx+48] ; p0
+ movdqa xmm0, [rdx+64] ; q0
+%endif
- movdqa xmm3, t1 ; get abs (p1 - p0)
- psubusb xmm3, xmm7
+ pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
+ pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
+ pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values
+ pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values
- paddb xmm4, xmm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
- pcmpeqb xmm4, xmm5
+ psubsb xmm2, xmm7 ; p1 - q1
+ movdqa xmm3, xmm0 ; q0
- pcmpeqb xmm5, xmm5
- pxor xmm4, xmm5
- ; mm1 = mask, mm0=q0, mm7 = thresh, t0 = abs(q0-q1) t1 = abs(p1-p0)
- ; mm6 = p0, mm4=hev
- ; start work on filters
- movdqa xmm2, XMMWORD PTR [rsi+2*rax] ; p1
- movdqa xmm7, XMMWORD PTR [rdi] ; q1
+ psubsb xmm0, xmm6 ; q0 - p0
- pxor xmm2, [t80 GLOBAL] ; p1 offset to convert to signed values
- pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values
+ paddsb xmm2, xmm0 ; 1 * (q0 - p0) + (p1 - q1)
- psubsb xmm2, xmm7 ; p1 - q1
- pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values
+ paddsb xmm2, xmm0 ; 2 * (q0 - p0)
- pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values
- movdqa xmm3, xmm0 ; q0
+ paddsb xmm2, xmm0 ; 3 * (q0 - p0) + (p1 - q1)
- psubsb xmm0, xmm6 ; q0 - p0
- paddsb xmm2, xmm0 ; 1 * (q0 - p0) + (p1 - q1)
+ pand xmm1, xmm2 ; mask filter values we don't care about
- paddsb xmm2, xmm0 ; 2 * (q0 - p0)
- paddsb xmm2, xmm0 ; 3 * (q0 - p0) + (p1 - q1)
-
- pand xmm1, xmm2 ; mask filter values we don't care about
- ; mm1 = vp8_filter, mm4=hev, mm6=ps0, mm3=qs0
- movdqa xmm2, xmm1 ; vp8_filter
- pand xmm2, xmm4; ; Filter2 = vp8_filter & hev
+ movdqa xmm2, xmm1 ; vp8_filter
+ pand xmm2, xmm4 ; Filter2 = vp8_filter & hev
+ pxor xmm0, xmm0
- movdqa xmm5, xmm2 ;
- paddsb xmm5, [t3 GLOBAL];
+ pandn xmm4, xmm1 ; vp8_filter&=~hev
+ pxor xmm1, xmm1
- pxor xmm0, xmm0 ; 0
- pxor xmm7, xmm7 ; 0
+ punpcklbw xmm0, xmm4 ; Filter 2 (hi)
+ movdqa xmm5, xmm2
- punpcklbw xmm0, xmm5 ; e0f0g0h0
- psraw xmm0, 11 ; sign extended shift right by 3
+ punpckhbw xmm1, xmm4 ; Filter 2 (lo)
+ paddsb xmm5, [GLOBAL(t3)] ; vp8_signed_char_clamp(Filter2 + 3)
- punpckhbw xmm7, xmm5 ; a0b0c0d0
- psraw xmm7, 11 ; sign extended shift right by 3
+ pmulhw xmm1, [GLOBAL(s9)] ; Filter 2 (lo) * 9
- packsswb xmm0, xmm7 ; Filter2 >>=3;
- movdqa xmm5, xmm0 ; Filter2
+ pmulhw xmm0, [GLOBAL(s9)] ; Filter 2 (hi) * 9
- paddsb xmm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4)
- pxor xmm0, xmm0 ; 0
+ punpckhbw xmm7, xmm5 ; axbxcxdx
+ paddsb xmm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4)
- pxor xmm7, xmm7 ; 0
- punpcklbw xmm0, xmm2 ; e0f0g0h0
+ punpcklbw xmm5, xmm5 ; exfxgxhx
+ psraw xmm7, 11 ; sign extended shift right by 3
- psraw xmm0, 11 ; sign extended shift right by 3
- punpckhbw xmm7, xmm2 ; a0b0c0d0
+ psraw xmm5, 11 ; sign extended shift right by 3
+ punpckhbw xmm4, xmm2 ; axbxcxdx
- psraw xmm7, 11 ; sign extended shift right by 3
- packsswb xmm0, xmm7 ; Filter2 >>=3;
+ punpcklbw xmm2, xmm2 ; exfxgxhx
+ psraw xmm4, 11 ; sign extended shift right by 3
- ; mm0= filter2 mm1 = vp8_filter, mm3 =qs0 mm5=s mm4 =hev mm6=ps0
- psubsb xmm3, xmm0 ; qs0 =qs0 - filter1
- paddsb xmm6, xmm5 ; ps0 =ps0 + Fitler2
+ packsswb xmm5, xmm7 ; Filter2 >>=3;
+ psraw xmm2, 11 ; sign extended shift right by 3
- ; mm1=vp8_filter, mm3=qs0, mm4 =hev mm6=ps0
- ; vp8_filter &= ~hev;
- ; Filter2 = vp8_filter;
- pandn xmm4, xmm1 ; vp8_filter&=~hev
+ packsswb xmm2, xmm4 ; Filter1 >>=3;
+ movdqa xmm7, xmm1
+ paddsb xmm6, xmm5 ; ps0 =ps0 + Fitler2
+ movdqa xmm4, xmm1
- ; mm3=qs0, mm4=filter2, mm6=ps0
+ psubsb xmm3, xmm2 ; qs0 =qs0 - Filter1
+ movdqa xmm5, xmm0
- ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7);
- ; s = vp8_signed_char_clamp(qs0 - u);
- ; *oq0 = s^0x80;
- ; s = vp8_signed_char_clamp(ps0 + u);
- ; *op0 = s^0x80;
- pxor xmm0, xmm0
- pxor xmm1, xmm1
+ movdqa xmm2, xmm5
+ paddw xmm0, [GLOBAL(s63)] ; Filter 2 (hi) * 9 + 63
- pxor xmm2, xmm2
- punpcklbw xmm1, xmm4
+ paddw xmm1, [GLOBAL(s63)] ; Filter 2 (lo) * 9 + 63
+ paddw xmm5, xmm5 ; Filter 2 (hi) * 18
- punpckhbw xmm2, xmm4
- pmulhw xmm1, [s27 GLOBAL]
+ paddw xmm7, xmm7 ; Filter 2 (lo) * 18
+ paddw xmm5, xmm0 ; Filter 2 (hi) * 27 + 63
- pmulhw xmm2, [s27 GLOBAL]
- paddw xmm1, [s63 GLOBAL]
+ paddw xmm7, xmm1 ; Filter 2 (lo) * 27 + 63
+ paddw xmm2, xmm0 ; Filter 2 (hi) * 18 + 63
- paddw xmm2, [s63 GLOBAL]
- psraw xmm1, 7
+ paddw xmm4, xmm1 ; Filter 2 (lo) * 18 + 63
+ psraw xmm0, 7 ; (Filter 2 (hi) * 9 + 63) >> 7
- psraw xmm2, 7
- packsswb xmm1, xmm2
+ psraw xmm1, 7 ; (Filter 2 (lo) * 9 + 63) >> 7
+ psraw xmm2, 7 ; (Filter 2 (hi) * 18 + 63) >> 7
- psubsb xmm3, xmm1
- paddsb xmm6, xmm1
+ packsswb xmm0, xmm1 ; u1 = vp8_signed_char_clamp((63 + Filter2 * 9)>>7)
+ psraw xmm4, 7 ; (Filter 2 (lo) * 18 + 63) >> 7
- pxor xmm3, [t80 GLOBAL]
- pxor xmm6, [t80 GLOBAL]
+ psraw xmm5, 7 ; (Filter 2 (hi) * 27 + 63) >> 7
+ packsswb xmm2, xmm4 ; u2 = vp8_signed_char_clamp((63 + Filter2 * 18)>>7)
- movdqa XMMWORD PTR [rsi+rax], xmm6
- movdqa XMMWORD PTR [rsi], xmm3
+ psraw xmm7, 7 ; (Filter 2 (lo) * 27 + 63) >> 7
- ; roughly 2/7th difference across boundary
- ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7);
- ; s = vp8_signed_char_clamp(qs1 - u);
- ; *oq1 = s^0x80;
- ; s = vp8_signed_char_clamp(ps1 + u);
- ; *op1 = s^0x80;
- pxor xmm1, xmm1
- pxor xmm2, xmm2
+ packsswb xmm5, xmm7 ; u3 = vp8_signed_char_clamp((63 + Filter2 * 27)>>7)
- punpcklbw xmm1, xmm4
- punpckhbw xmm2, xmm4
+ psubsb xmm3, xmm5 ; sq = vp8_signed_char_clamp(qs0 - u3)
+ paddsb xmm6, xmm5 ; sp = vp8_signed_char_clamp(ps0 - u3)
- pmulhw xmm1, [s18 GLOBAL]
- pmulhw xmm2, [s18 GLOBAL]
+%if %1 == 0
+ movdqa xmm5, q2 ; q2
+ movdqa xmm1, q1 ; q1
+ movdqa xmm4, p1 ; p1
+ movdqa xmm7, p2 ; p2
- paddw xmm1, [s63 GLOBAL]
- paddw xmm2, [s63 GLOBAL]
+%elif %1 == 1
+ movdqa xmm5, XMMWORD PTR [rdi+rcx] ; q2
+ movdqa xmm1, XMMWORD PTR [rdi] ; q1
+ movdqa xmm4, XMMWORD PTR [rsi+rax*2] ; p1
+ movdqa xmm7, XMMWORD PTR [rdi+rax*4] ; p2
+%elif %1 == 2
+ movdqa xmm5, XMMWORD PTR [rdx+96] ; q2
+ movdqa xmm1, XMMWORD PTR [rdx+80] ; q1
+ movdqa xmm4, XMMWORD PTR [rdx+32] ; p1
+ movdqa xmm7, XMMWORD PTR [rdx+16] ; p2
+%endif
- psraw xmm1, 7
- psraw xmm2, 7
+ pxor xmm3, [GLOBAL(t80)] ; *oq0 = sq^0x80
+ pxor xmm6, [GLOBAL(t80)] ; *oq0 = sp^0x80
- packsswb xmm1, xmm2
+ pxor xmm1, [GLOBAL(t80)]
+ pxor xmm4, [GLOBAL(t80)]
- movdqa xmm3, XMMWORD PTR [rdi]
- movdqa xmm6, XMMWORD PTR [rsi+rax*2] ; p1
+ psubsb xmm1, xmm2 ; sq = vp8_signed_char_clamp(qs1 - u2)
+ paddsb xmm4, xmm2 ; sp = vp8_signed_char_clamp(ps1 - u2)
- pxor xmm3, [t80 GLOBAL]
- pxor xmm6, [t80 GLOBAL]
+ pxor xmm1, [GLOBAL(t80)] ; *oq1 = sq^0x80;
+ pxor xmm4, [GLOBAL(t80)] ; *op1 = sp^0x80;
- paddsb xmm6, xmm1
- psubsb xmm3, xmm1
+ pxor xmm7, [GLOBAL(t80)]
+ pxor xmm5, [GLOBAL(t80)]
- pxor xmm6, [t80 GLOBAL]
- pxor xmm3, [t80 GLOBAL]
+ paddsb xmm7, xmm0 ; sp = vp8_signed_char_clamp(ps2 - u)
+ psubsb xmm5, xmm0 ; sq = vp8_signed_char_clamp(qs2 - u)
- movdqa XMMWORD PTR [rdi], xmm3
- movdqa XMMWORD PTR [rsi+rax*2],xmm6
+ pxor xmm7, [GLOBAL(t80)] ; *op2 = sp^0x80;
+ pxor xmm5, [GLOBAL(t80)] ; *oq2 = sq^0x80;
- ; roughly 1/7th difference across boundary
- ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7);
- ; s = vp8_signed_char_clamp(qs2 - u);
- ; *oq2 = s^0x80;
- ; s = vp8_signed_char_clamp(ps2 + u);
- ; *op2 = s^0x80;
- pxor xmm1, xmm1
- pxor xmm2, xmm2
+%if %1 == 0
+ lea rsi, [rsi+rcx*2]
+ lea rdi, [rdi+rcx*2]
- punpcklbw xmm1, xmm4
- punpckhbw xmm2, xmm4
+ movq MMWORD PTR [rsi], xmm6 ; p0
+ movhps MMWORD PTR [rdi], xmm6
+ movq MMWORD PTR [rsi + rcx], xmm3 ; q0
+ movhps MMWORD PTR [rdi + rcx], xmm3
- pmulhw xmm1, [s9 GLOBAL]
- pmulhw xmm2, [s9 GLOBAL]
+ movq MMWORD PTR [rsi+rcx*2], xmm1 ; q1
+ movhps MMWORD PTR [rdi+rcx*2], xmm1
- paddw xmm1, [s63 GLOBAL]
- paddw xmm2, [s63 GLOBAL]
+ movq MMWORD PTR [rsi + rax], xmm4 ; p1
+ movhps MMWORD PTR [rdi + rax], xmm4
- psraw xmm1, 7
- psraw xmm2, 7
+ movq MMWORD PTR [rsi+rax*2], xmm7 ; p2
+ movhps MMWORD PTR [rdi+rax*2], xmm7
- packsswb xmm1, xmm2
+ lea rsi, [rsi + rcx]
+ lea rdi, [rdi + rcx]
+ movq MMWORD PTR [rsi+rcx*2], xmm5 ; q2
+ movhps MMWORD PTR [rdi+rcx*2], xmm5
+%elif %1 == 1
+ movdqa XMMWORD PTR [rdi+rcx], xmm5 ; q2
+ movdqa XMMWORD PTR [rdi], xmm1 ; q1
+ movdqa XMMWORD PTR [rsi], xmm3 ; q0
+ movdqa XMMWORD PTR [rsi+rax ],xmm6 ; p0
+ movdqa XMMWORD PTR [rsi+rax*2],xmm4 ; p1
+ movdqa XMMWORD PTR [rdi+rax*4],xmm7 ; p2
+%elif %1 == 2
+ movdqa XMMWORD PTR [rdx+80], xmm1 ; q1
+ movdqa XMMWORD PTR [rdx+64], xmm3 ; q0
+ movdqa XMMWORD PTR [rdx+48], xmm6 ; p0
+ movdqa XMMWORD PTR [rdx+32], xmm4 ; p1
+%endif
+%endmacro
- movdqa xmm6, XMMWORD PTR [rdi+rax*4]
- neg rax
- movdqa xmm3, XMMWORD PTR [rdi+rax ]
+;void vp8_mbloop_filter_horizontal_edge_sse2
+;(
+; unsigned char *src_ptr,
+; int src_pixel_step,
+; const char *flimit,
+; const char *limit,
+; const char *thresh,
+; int count
+;)
+global sym(vp8_mbloop_filter_horizontal_edge_sse2)
+sym(vp8_mbloop_filter_horizontal_edge_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
- pxor xmm6, [t80 GLOBAL]
- pxor xmm3, [t80 GLOBAL]
+ ALIGN_STACK 16, rax
+ sub rsp, 32 ; reserve 32 bytes
+ %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16];
+ %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16];
- paddsb xmm6, xmm1
- psubsb xmm3, xmm1
+ mov rsi, arg(0) ;src_ptr
+ movsxd rax, dword ptr arg(1) ;src_pixel_step
- pxor xmm6, [t80 GLOBAL]
- pxor xmm3, [t80 GLOBAL]
+ mov rdx, arg(3) ;limit
+ movdqa xmm7, XMMWORD PTR [rdx]
- movdqa XMMWORD PTR [rdi+rax ], xmm3
- neg rax
+ lea rdi, [rsi+rax] ; rdi points to row +1 for indirect addressing
- movdqa XMMWORD PTR [rdi+rax*4], xmm6
+ ; calculate breakout conditions and high edge variance
+ LFH_FILTER_AND_HEV_MASK 1
+ ; filter and write back the results
+ MB_FILTER_AND_WRITEBACK 1
add rsp, 32
pop rsp
@@ -1001,561 +614,751 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
-;void vp8_mbloop_filter_vertical_edge_sse2
+;void vp8_mbloop_filter_horizontal_edge_uv_sse2
;(
-; unsigned char *src_ptr,
+; unsigned char *u,
; int src_pixel_step,
; const char *flimit,
; const char *limit,
; const char *thresh,
-; int count
+; unsigned char *v
;)
-global sym(vp8_mbloop_filter_vertical_edge_sse2)
-sym(vp8_mbloop_filter_vertical_edge_sse2):
+global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2)
+sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
; end prolog
ALIGN_STACK 16, rax
- sub rsp, 160 ; reserve 160 bytes
- %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16];
- %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16];
- %define srct [rsp + 32] ;__declspec(align(16)) char srct[128];
+ sub rsp, 96 ; reserve 96 bytes
+ %define q2 [rsp + 0] ;__declspec(align(16)) char q2[16];
+ %define q1 [rsp + 16] ;__declspec(align(16)) char q1[16];
+ %define p2 [rsp + 32] ;__declspec(align(16)) char p2[16];
+ %define p1 [rsp + 48] ;__declspec(align(16)) char p1[16];
+ %define t0 [rsp + 64] ;__declspec(align(16)) char t0[16];
+ %define t1 [rsp + 80] ;__declspec(align(16)) char t1[16];
+
+ mov rsi, arg(0) ; u
+ mov rdi, arg(5) ; v
+ movsxd rax, dword ptr arg(1) ; src_pixel_step
+ mov rcx, rax
+ neg rax ; negate pitch to deal with above border
+
+ mov rdx, arg(3) ;limit
+ movdqa xmm7, XMMWORD PTR [rdx]
+
+ lea rsi, [rsi + rcx]
+ lea rdi, [rdi + rcx]
+
+ ; calculate breakout conditions and high edge variance
+ LFH_FILTER_AND_HEV_MASK 0
+ ; filter and write back the results
+ MB_FILTER_AND_WRITEBACK 0
+
+ add rsp, 96
+ pop rsp
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
- mov rsi, arg(0) ;src_ptr
- movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
+%macro TRANSPOSE_16X8 2
+ movq xmm4, QWORD PTR [rsi] ; xx xx xx xx xx xx xx xx 07 06 05 04 03 02 01 00
+ movq xmm1, QWORD PTR [rdi] ; xx xx xx xx xx xx xx xx 17 16 15 14 13 12 11 10
+ movq xmm0, QWORD PTR [rsi+2*rax] ; xx xx xx xx xx xx xx xx 27 26 25 24 23 22 21 20
+ movq xmm7, QWORD PTR [rdi+2*rax] ; xx xx xx xx xx xx xx xx 37 36 35 34 33 32 31 30
+ movq xmm5, QWORD PTR [rsi+4*rax] ; xx xx xx xx xx xx xx xx 47 46 45 44 43 42 41 40
+ movq xmm2, QWORD PTR [rdi+4*rax] ; xx xx xx xx xx xx xx xx 57 56 55 54 53 52 51 50
- lea rsi, [rsi + rax*4 - 4]
- lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing
+ punpcklbw xmm4, xmm1 ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
- mov rcx, rax
- neg rcx
+ movq xmm1, QWORD PTR [rdi+2*rcx] ; xx xx xx xx xx xx xx xx 77 76 75 74 73 72 71 70
- ; Transpose
- movq xmm0, QWORD PTR [rdi+rax*2] ; xx xx xx xx xx xx xx xx 77 76 75 74 73 72 71 70
- movq xmm7, QWORD PTR [rsi+rax*2] ; xx xx xx xx xx xx xx xx 67 66 65 64 63 62 61 60
+ movdqa xmm3, xmm4 ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
+ punpcklbw xmm0, xmm7 ; 37 27 36 36 35 25 34 24 33 23 32 22 31 21 30 20
- punpcklbw xmm7, xmm0 ; 77 67 76 66 75 65 74 64 73 63 72 62 71 61 70 60
- movq xmm0, QWORD PTR [rsi+rax] ;
+ movq xmm7, QWORD PTR [rsi+2*rcx] ; xx xx xx xx xx xx xx xx 67 66 65 64 63 62 61 60
+
+ punpcklbw xmm5, xmm2 ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
+%if %1
+ lea rsi, [rsi+rax*8]
+%else
+ mov rsi, arg(5) ; v_ptr
+%endif
- movq xmm5, QWORD PTR [rsi] ;
- punpcklbw xmm5, xmm0 ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
+ movdqa xmm6, xmm5 ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
+ punpcklbw xmm7, xmm1 ; 77 67 76 66 75 65 74 64 73 63 72 62 71 61 70 60
- movdqa xmm6, xmm5 ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
- punpcklwd xmm5, xmm7 ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40
+ punpcklwd xmm5, xmm7 ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40
- punpckhwd xmm6, xmm7 ; 77 67 57 47 76 66 56 46 75 65 55 45 74 64 54 44
- movq xmm7, QWORD PTR [rsi + rcx] ; xx xx xx xx xx xx xx xx 37 36 35 34 33 32 31 30
+ punpckhwd xmm6, xmm7 ; 77 67 57 47 76 66 56 46 75 65 55 45 74 64 54 44
+%if %1
+ lea rdi, [rdi+rax*8]
+%else
+ lea rsi, [rsi - 4]
+%endif
- movq xmm0, QWORD PTR [rsi + rcx*2] ; xx xx xx xx xx xx xx xx 27 26 25 24 23 22 21 20
- punpcklbw xmm0, xmm7 ; 37 27 36 36 35 25 34 24 33 23 32 22 31 21 30 20
+ punpcklwd xmm3, xmm0 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
+%if %1
+ lea rdx, srct
+%else
+ lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing
+%endif
- movq xmm4, QWORD PTR [rsi + rcx*4] ; xx xx xx xx xx xx xx xx 07 06 05 04 03 02 01 00
- movq xmm7, QWORD PTR [rdi + rcx*4] ; xx xx xx xx xx xx xx xx 17 16 15 14 13 12 11 10
+ movdqa xmm2, xmm3 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
+ punpckhwd xmm4, xmm0 ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04
- punpcklbw xmm4, xmm7 ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
- movdqa xmm3, xmm4 ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
+ movdqa xmm7, xmm4 ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04
+ punpckhdq xmm3, xmm5 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
- punpcklwd xmm3, xmm0 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
- punpckhwd xmm4, xmm0 ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04
+ punpckhdq xmm7, xmm6 ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06
- movdqa xmm7, xmm4 ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04
- movdqa xmm2, xmm3 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
+ punpckldq xmm4, xmm6 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
- punpckhdq xmm7, xmm6 ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06
- punpckldq xmm4, xmm6 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
+ punpckldq xmm2, xmm5 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
- punpckhdq xmm3, xmm5 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
- punpckldq xmm2, xmm5 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
+ movdqa t0, xmm2 ; save to free XMM2
+ movq xmm2, QWORD PTR [rsi] ; xx xx xx xx xx xx xx xx 87 86 85 84 83 82 81 80
+ movq xmm6, QWORD PTR [rdi] ; xx xx xx xx xx xx xx xx 97 96 95 94 93 92 91 90
+ movq xmm0, QWORD PTR [rsi+2*rax] ; xx xx xx xx xx xx xx xx a7 a6 a5 a4 a3 a2 a1 a0
+ movq xmm5, QWORD PTR [rdi+2*rax] ; xx xx xx xx xx xx xx xx b7 b6 b5 b4 b3 b2 b1 b0
+ movq xmm1, QWORD PTR [rsi+4*rax] ; xx xx xx xx xx xx xx xx c7 c6 c5 c4 c3 c2 c1 c0
- movdqa t0, xmm2 ; save to free XMM2
- ;movdqa t1, xmm3
+ punpcklbw xmm2, xmm6 ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
- ; XMM3 XMM4 XMM7 in use
- lea rsi, [rsi+rax*8]
- lea rdi, [rdi+rax*8]
+ movq xmm6, QWORD PTR [rdi+4*rax] ; xx xx xx xx xx xx xx xx d7 d6 d5 d4 d3 d2 d1 d0
- movq xmm6, QWORD PTR [rdi+rax*2] ; xx xx xx xx xx xx xx xx f7 f6 f5 f4 f3 f2 f1 f0
- movq xmm5, QWORD PTR [rsi+rax*2] ; xx xx xx xx xx xx xx xx e7 e6 e5 e4 e3 e2 e1 e0
+ punpcklbw xmm0, xmm5 ; b7 a7 b6 a6 b5 a5 b4 a4 b3 a3 b2 a2 b1 a1 b0 a0
- punpcklbw xmm5, xmm6 ; f7 e7 f6 e6 f5 e5 f4 e4 f3 e3 f2 e2 f1 e1 f0 e0
- movq xmm6, QWORD PTR [rsi+rax] ; xx xx xx xx xx xx xx xx d7 d6 d5 d4 d3 d2 d1 d0
+ movq xmm5, QWORD PTR [rsi+2*rcx] ; xx xx xx xx xx xx xx xx e7 e6 e5 e4 e3 e2 e1 e0
- movq xmm1, QWORD PTR [rsi] ; xx xx xx xx xx xx xx xx c7 c6 c5 c4 c3 c2 c1 c0
- punpcklbw xmm1, xmm6 ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 e1 d0 c0
+ punpcklbw xmm1, xmm6 ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 e1 d0 c0
- movdqa xmm6, xmm1 ;
- punpckhwd xmm6, xmm5 ; f7 e7 d7 c7 f6 e6 d6 c6 f5 e5 d5 c5 f4 e4 d4 c4
+ movq xmm6, QWORD PTR [rdi+2*rcx] ; xx xx xx xx xx xx xx xx f7 f6 f5 f4 f3 f2 f1 f0
- punpcklwd xmm1, xmm5 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0
- movq xmm5, QWORD PTR [rsi+rcx] ; xx xx xx xx xx xx xx xx b7 b6 b5 b4 b3 b2 b1 b0
+ punpcklbw xmm5, xmm6 ; f7 e7 f6 e6 f5 e5 f4 e4 f3 e3 f2 e2 f1 e1 f0 e0
- movq xmm0, QWORD PTR [rsi+rcx*2] ; xx xx xx xx xx xx xx xx a7 a6 a5 a4 a3 a2 a1 a0
- punpcklbw xmm0, xmm5 ; b7 a7 b6 a6 b5 a5 b4 a4 b3 a3 b2 a2 b1 a1 b0 a0
+ movdqa xmm6, xmm1 ;
+ punpckhwd xmm6, xmm5 ; f7 e7 d7 c7 f6 e6 d6 c6 f5 e5 d5 c5 f4 e4 d4 c4
- movq xmm2, QWORD PTR [rsi+rcx*4] ; xx xx xx xx xx xx xx xx 87 86 85 84 83 82 81 80
- movq xmm5, QWORD PTR [rdi+rcx*4] ; xx xx xx xx xx xx xx xx 97 96 95 94 93 92 91 90
+ punpcklwd xmm1, xmm5 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0
+ movdqa xmm5, xmm2 ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
- punpcklbw xmm2, xmm5 ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
- movdqa xmm5, xmm2 ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
+ punpcklwd xmm5, xmm0 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80
- punpcklwd xmm5, xmm0 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80
- punpckhwd xmm2, xmm0 ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84
+ punpckhwd xmm2, xmm0 ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84
movdqa xmm0, xmm5
- punpckldq xmm0, xmm1 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
+ punpckldq xmm0, xmm1 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
+ punpckhdq xmm5, xmm1 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
+ movdqa xmm1, xmm2 ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84
- punpckhdq xmm5, xmm1 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
- movdqa xmm1, xmm2 ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84
+ punpckldq xmm1, xmm6 ; f5 e5 d5 c5 b5 a5 95 85 f4 e4 d4 c4 b4 a4 94 84
- punpckldq xmm1, xmm6 ; f5 e5 d5 c5 b5 a5 95 85 f4 e4 d4 c4 b4 a4 94 84
- punpckhdq xmm2, xmm6 ; f7 e7 d7 c7 b7 a7 97 87 f6 e6 d6 c6 b6 a6 96 86
+ punpckhdq xmm2, xmm6 ; f7 e7 d7 c7 b7 a7 97 87 f6 e6 d6 c6 b6 a6 96 86
+ movdqa xmm6, xmm7 ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06
- movdqa xmm6, xmm7 ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06
- punpcklqdq xmm6, xmm2 ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06
+ punpcklqdq xmm6, xmm2 ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06
+ punpckhqdq xmm7, xmm2 ; f7 e7 d7 c7 b7 a7 97 87 77 67 57 47 37 27 17 07
+%if %2
+ movdqa xmm2, xmm3 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
+ punpcklqdq xmm2, xmm5 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
- lea rdx, srct
- punpckhqdq xmm7, xmm2 ; f7 e7 d7 c7 b7 a7 97 87 77 67 57 47 37 27 17 07
+ punpckhqdq xmm3, xmm5 ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
+
+ movdqa [rdx], xmm2 ; save 2
+
+ movdqa xmm5, xmm4 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
+ punpcklqdq xmm4, xmm1 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
+
+ movdqa [rdx+16], xmm3 ; save 3
+
+ punpckhqdq xmm5, xmm1 ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
+
+ movdqa [rdx+32], xmm4 ; save 4
+ movdqa [rdx+48], xmm5 ; save 5
+ movdqa xmm1, t0 ; get
+
+ movdqa xmm2, xmm1 ;
+ punpckhqdq xmm1, xmm0 ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
+
+ punpcklqdq xmm2, xmm0 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
+%else
+ movdqa [rdx+112], xmm7 ; save 7
- movdqa [rdx+112], xmm7 ; save 7
- movdqa xmm2, xmm3 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
+ movdqa [rdx+96], xmm6 ; save 6
- movdqa [rdx+96], xmm6 ; save 6
- punpcklqdq xmm2, xmm5 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+ movdqa xmm2, xmm3 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
+ punpckhqdq xmm3, xmm5 ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
- punpckhqdq xmm3, xmm5 ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
- movdqa [rdx+32], xmm2 ; save 2
+ punpcklqdq xmm2, xmm5 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
- movdqa xmm5, xmm4 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
- punpcklqdq xmm4, xmm1 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
+ movdqa [rdx+32], xmm2 ; save 2
- movdqa [rdx+48], xmm3 ; save 3
- punpckhqdq xmm5, xmm1 ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
+ movdqa xmm5, xmm4 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
+ punpcklqdq xmm4, xmm1 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
- movdqa [rdx+64], xmm4 ; save 4
- movdqa [rdx+80], xmm5 ; save 5
+ movdqa [rdx+48], xmm3 ; save 3
- movdqa xmm1, t0 ; get
- movdqa xmm2, xmm1 ;
+ punpckhqdq xmm5, xmm1 ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
- punpckhqdq xmm1, xmm0 ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
- punpcklqdq xmm2, xmm0 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
+ movdqa [rdx+64], xmm4 ; save 4
+ movdqa [rdx+80], xmm5 ; save 5
+ movdqa xmm1, t0 ; get
+
+ movdqa xmm2, xmm1
+ punpckhqdq xmm1, xmm0 ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
+
+ punpcklqdq xmm2, xmm0 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
movdqa [rdx+16], xmm1
+
movdqa [rdx], xmm2
+%endif
+%endmacro
+
+%macro LFV_FILTER_MASK_HEV_MASK 1
+ movdqa xmm0, xmm6 ; q2
+ psubusb xmm0, xmm7 ; q2-q3
- movdqa xmm0, xmm6 ; q2
- psubusb xmm0, xmm7 ; q2-q3
+ psubusb xmm7, xmm6 ; q3-q2
+ movdqa xmm4, xmm5 ; q1
- psubusb xmm7, xmm6 ; q3-q2
- por xmm7, xmm0 ; abs (q3-q2)
+ por xmm7, xmm0 ; abs (q3-q2)
+ psubusb xmm4, xmm6 ; q1-q2
- movdqa xmm1, xmm5 ; q1
- psubusb xmm1, xmm6 ; q1-q2
+ movdqa xmm0, xmm1
+ psubusb xmm6, xmm5 ; q2-q1
- psubusb xmm6, xmm5 ; q2-q1
- por xmm6, xmm1 ; abs (q2-q1)
+ por xmm6, xmm4 ; abs (q2-q1)
+ psubusb xmm0, xmm2 ; p2 - p3;
- ;/*
- ;movdqa xmm0, xmm4 ; q0
- ;psubusb xmm0 xmm5 ; q0-q1
- ;
- ;pusbusb xmm5, xmm4 ; q1-q0
- ;por xmm5, xmm0 ; abs (q1-q0)
- ;*/
+ psubusb xmm2, xmm1 ; p3 - p2;
+ por xmm0, xmm2 ; abs(p2-p3)
+%if %1
+ movdqa xmm2, [rdx] ; p1
+%else
+ movdqa xmm2, [rdx+32] ; p1
+%endif
+ movdqa xmm5, xmm2 ; p1
+ pmaxub xmm0, xmm7
- movdqa xmm1, [rdx+16] ; p2
- movdqa xmm0, xmm1
+ psubusb xmm5, xmm1 ; p1-p2
+ psubusb xmm1, xmm2 ; p2-p1
- psubusb xmm0, xmm2 ; p2 - p3;
- psubusb xmm2, xmm1 ; p3 - p2;
+ movdqa xmm7, xmm3 ; p0
+ psubusb xmm7, xmm2 ; p0-p1
- por xmm0, xmm2 ; abs(p2-p3)
+ por xmm1, xmm5 ; abs(p2-p1)
+ pmaxub xmm0, xmm6
- movdqa xmm2, [rdx+32] ; p1
- movdqa xmm5, xmm2 ; p1
+ pmaxub xmm0, xmm1
+ movdqa xmm1, xmm2 ; p1
- psubusb xmm5, xmm1 ; p1-p2
- psubusb xmm1, xmm2 ; p2-p1
+ psubusb xmm2, xmm3 ; p1-p0
+ lea rdx, srct
- por xmm1, xmm5 ; abs(p2-p1)
- mov rdx, arg(3) ;limit
+ por xmm2, xmm7 ; abs(p1-p0)
- movdqa xmm4, [rdx] ; limit
- psubusb xmm7, xmm4 ;
+ movdqa t0, xmm2 ; save abs(p1-p0)
+ pmaxub xmm0, xmm2
- psubusb xmm0, xmm4 ; abs(p3-p2) > limit
- psubusb xmm1, xmm4 ; abs(p2-p1) > limit
+%if %1
+ movdqa xmm5, [rdx+32] ; q0
+ movdqa xmm7, [rdx+48] ; q1
+%else
+ movdqa xmm5, [rdx+64] ; q0
+ movdqa xmm7, [rdx+80] ; q1
+%endif
+ mov rdx, arg(3) ; limit
- psubusb xmm6, xmm4 ; abs(q2-q1) > limit
- por xmm7, xmm6 ; or
+ movdqa xmm6, xmm5 ; q0
+ movdqa xmm2, xmm7 ; q1
- por xmm0, xmm1 ;
- por xmm0, xmm7 ; abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit
+ psubusb xmm5, xmm7 ; q0-q1
+ psubusb xmm7, xmm6 ; q1-q0
- movdqa xmm1, xmm2 ; p1
+ por xmm7, xmm5 ; abs(q1-q0)
- movdqa xmm7, xmm3 ; p0
- psubusb xmm7, xmm2 ; p0-p1
+ movdqa t1, xmm7 ; save abs(q1-q0)
- psubusb xmm2, xmm3 ; p1-p0
- por xmm2, xmm7 ; abs(p1-p0)
+ movdqa xmm4, XMMWORD PTR [rdx]; limit
- movdqa t0, xmm2 ; save abs(p1-p0)
- lea rdx, srct
+ pmaxub xmm0, xmm7
+ mov rdx, arg(2) ; flimit
- psubusb xmm2, xmm4 ; abs(p1-p0)>limit
- por xmm0, xmm2 ; mask
+ psubusb xmm0, xmm4
+ movdqa xmm5, xmm2 ; q1
- movdqa xmm5, [rdx+64] ; q0
- movdqa xmm7, [rdx+80] ; q1
+ psubusb xmm5, xmm1 ; q1-=p1
+ psubusb xmm1, xmm2 ; p1-=q1
- movdqa xmm6, xmm5 ; q0
- movdqa xmm2, xmm7 ; q1
- psubusb xmm5, xmm7 ; q0-q1
+ por xmm5, xmm1 ; abs(p1-q1)
+ movdqa xmm1, xmm3 ; p0
- psubusb xmm7, xmm6 ; q1-q0
- por xmm7, xmm5 ; abs(q1-q0)
+ pand xmm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
+ psubusb xmm1, xmm6 ; p0-q0
- movdqa t1, xmm7 ; save abs(q1-q0)
- psubusb xmm7, xmm4 ; abs(q1-q0)> limit
+ psrlw xmm5, 1 ; abs(p1-q1)/2
+ psubusb xmm6, xmm3 ; q0-p0
- por xmm0, xmm7 ; mask
+ movdqa xmm2, XMMWORD PTR [rdx]; flimit
- movdqa xmm5, xmm2 ; q1
- psubusb xmm5, xmm1 ; q1-=p1
- psubusb xmm1, xmm2 ; p1-=q1
- por xmm5, xmm1 ; abs(p1-q1)
- pand xmm5, [tfe GLOBAL] ; set lsb of each byte to zero
- psrlw xmm5, 1 ; abs(p1-q1)/2
+ mov rdx, arg(4) ; get thresh
- mov rdx, arg(2) ;flimit ;
- movdqa xmm2, [rdx] ; flimit
+ por xmm1, xmm6 ; abs(q0-p0)
+ paddb xmm2, xmm2 ; flimit*2 (less than 255)
- movdqa xmm1, xmm3 ; p0
- movdqa xmm7, xmm6 ; q0
- psubusb xmm1, xmm7 ; p0-q0
- psubusb xmm7, xmm3 ; q0-p0
- por xmm1, xmm7 ; abs(q0-p0)
- paddusb xmm1, xmm1 ; abs(q0-p0)*2
- paddusb xmm1, xmm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
+ movdqa xmm6, t0 ; get abs (q1 - q0)
- paddb xmm2, xmm2 ; flimit*2 (less than 255)
- paddb xmm4, xmm2 ; flimit * 2 + limit (less than 255)
+ paddusb xmm1, xmm1 ; abs(q0-p0)*2
+
+ movdqa xmm3, t1 ; get abs (p1 - p0)
+
+ movdqa xmm7, XMMWORD PTR [rdx]
+
+ paddusb xmm1, xmm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
+ psubusb xmm6, xmm7 ; abs(q1 - q0) > thresh
+
+ paddb xmm4, xmm2 ; flimit * 2 + limit (less than 255)
+ psubusb xmm3, xmm7 ; abs(p1 - p0)> thresh
+
+ psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
+ por xmm6, xmm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
+
+ por xmm1, xmm0 ; mask
+ pcmpeqb xmm6, xmm0
- psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
- por xmm1, xmm0; ; mask
pxor xmm0, xmm0
+ pcmpeqb xmm4, xmm4
+
pcmpeqb xmm1, xmm0
+ pxor xmm4, xmm6
+%endmacro
- ; calculate high edge variance
- mov rdx, arg(4) ;thresh ; get thresh
- movdqa xmm7, [rdx]
+%macro BV_TRANSPOSE 0
+ ; xmm1 = f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+ ; xmm6 = f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
+ ; xmm3 = f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
+ ; xmm7 = f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
+ movdqa xmm2, xmm1 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+ punpcklbw xmm2, xmm6 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
- movdqa xmm4, t0 ; get abs (q1 - q0)
- psubusb xmm4, xmm7 ; abs(q1 - q0) > thresh
+ movdqa xmm4, xmm3 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
+ punpckhbw xmm1, xmm6 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
- movdqa xmm3, t1 ; get abs (p1 - p0)
- psubusb xmm3, xmm7 ; abs(p1 - p0)> thresh
+ punpcklbw xmm4, xmm7 ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
- por xmm4, xmm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
- pcmpeqb xmm4, xmm0
+ punpckhbw xmm3, xmm7 ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84
- pcmpeqb xmm0, xmm0
- pxor xmm4, xmm0
+ movdqa xmm6, xmm2 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
+ punpcklwd xmm2, xmm4 ; 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02
+ punpckhwd xmm6, xmm4 ; 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42
+ movdqa xmm5, xmm1 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
- ; start work on filters
- lea rdx, srct
+ punpcklwd xmm1, xmm3 ; b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82
- ; start work on filters
- movdqa xmm2, [rdx+32] ; p1
- movdqa xmm7, [rdx+80] ; q1
+ punpckhwd xmm5, xmm3 ; f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2
+ ; xmm2 = 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02
+ ; xmm6 = 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42
+ ; xmm1 = b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82
+ ; xmm5 = f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2
+%endmacro
- pxor xmm2, [t80 GLOBAL] ; p1 offset to convert to signed values
- pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values
+%macro BV_WRITEBACK 2
+ movd [rsi+2], %1
+ psrldq %1, 4
- psubsb xmm2, xmm7 ; p1 - q1
- movdqa xmm6, [rdx+48] ; p0
+ movd [rdi+2], %1
+ psrldq %1, 4
- movdqa xmm0, [rdx+64] ; q0
- pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values
+ movd [rsi+2*rax+2], %1
+ psrldq %1, 4
- pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values
- movdqa xmm3, xmm0 ; q0
+ movd [rdi+2*rax+2], %1
- psubsb xmm0, xmm6 ; q0 - p0
- paddsb xmm2, xmm0 ; 1 * (q0 - p0) + (p1 - q1)
+ movd [rsi+4*rax+2], %2
+ psrldq %2, 4
- paddsb xmm2, xmm0 ; 2 * (q0 - p0)
- paddsb xmm2, xmm0 ; 3 * (q0 - p0)+ (p1 - q1)
+ movd [rdi+4*rax+2], %2
+ psrldq %2, 4
- pand xmm1, xmm2 ; mask filter values we don't care about
+ movd [rsi+2*rcx+2], %2
+ psrldq %2, 4
- ; xmm1 = vp8_filter, xmm4=hev, xmm6=ps0, xmm3=qs0
- movdqa xmm2, xmm1 ; vp8_filter
- pand xmm2, xmm4; ; Filter2 = vp8_filter & hev
+ movd [rdi+2*rcx+2], %2
+%endmacro
- movdqa xmm5, xmm2
- paddsb xmm5, [t3 GLOBAL]
- pxor xmm0, xmm0 ; 0
- pxor xmm7, xmm7 ; 0
+;void vp8_loop_filter_vertical_edge_sse2
+;(
+; unsigned char *src_ptr,
+; int src_pixel_step,
+; const char *flimit,
+; const char *limit,
+; const char *thresh,
+; int count
+;)
+global sym(vp8_loop_filter_vertical_edge_sse2)
+sym(vp8_loop_filter_vertical_edge_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
- punpcklbw xmm0, xmm5 ; e0f0g0h0
- psraw xmm0, 11 ; sign extended shift right by 3
+ ALIGN_STACK 16, rax
+ sub rsp, 96 ; reserve 96 bytes
+ %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16];
+ %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16];
+ %define srct [rsp + 32] ;__declspec(align(16)) char srct[64];
- punpckhbw xmm7, xmm5 ; a0b0c0d0
- psraw xmm7, 11 ; sign extended shift right by 3
+ mov rsi, arg(0) ; src_ptr
+ movsxd rax, dword ptr arg(1) ; src_pixel_step
- packsswb xmm0, xmm7 ; Filter2 >>=3;
- movdqa xmm5, xmm0 ; Filter2
+ lea rsi, [rsi - 4]
+ lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing
+ lea rcx, [rax*2+rax]
- paddsb xmm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4)
- pxor xmm0, xmm0 ; 0
+ ;transpose 16x8 to 8x16, and store the 8-line result on stack.
+ TRANSPOSE_16X8 1, 1
- pxor xmm7, xmm7 ; 0
- punpcklbw xmm0, xmm2 ; e0f0g0h0
+ ; calculate filter mask and high edge variance
+ LFV_FILTER_MASK_HEV_MASK 1
- psraw xmm0, 11 ; sign extended shift right by 3
- punpckhbw xmm7, xmm2 ; a0b0c0d0
+ ; start work on filters
+ B_FILTER 2
- psraw xmm7, 11 ; sign extended shift right by 3
- packsswb xmm0, xmm7 ; Filter2 >>=3;
+ ; tranpose and write back - only work on q1, q0, p0, p1
+ BV_TRANSPOSE
+ ; store 16-line result
- ; xmm0= filter2 xmm1 = vp8_filter, xmm3 =qs0 xmm5=s xmm4 =hev xmm6=ps0
- psubsb xmm3, xmm0 ; qs0 =qs0 - filter1
- paddsb xmm6, xmm5 ; ps0 =ps0 + Fitler2
+ lea rdx, [rax]
+ neg rdx
+ BV_WRITEBACK xmm1, xmm5
- ; xmm1=vp8_filter, xmm3=qs0, xmm4 =hev xmm6=ps0
- ; vp8_filter &= ~hev;
- ; Filter2 = vp8_filter;
- pandn xmm4, xmm1 ; vp8_filter&=~hev
+ lea rsi, [rsi+rdx*8]
+ lea rdi, [rdi+rdx*8]
+ BV_WRITEBACK xmm2, xmm6
- ; xmm3=qs0, xmm4=filter2, xmm6=ps0
- ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7);
- ; s = vp8_signed_char_clamp(qs0 - u);
- ; *oq0 = s^0x80;
- ; s = vp8_signed_char_clamp(ps0 + u);
- ; *op0 = s^0x80;
- pxor xmm0, xmm0
- pxor xmm1, xmm1
+ add rsp, 96
+ pop rsp
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
- pxor xmm2, xmm2
- punpcklbw xmm1, xmm4
- punpckhbw xmm2, xmm4
- pmulhw xmm1, [s27 GLOBAL]
+;void vp8_loop_filter_vertical_edge_uv_sse2
+;(
+; unsigned char *u,
+; int src_pixel_step,
+; const char *flimit,
+; const char *limit,
+; const char *thresh,
+; unsigned char *v
+;)
+global sym(vp8_loop_filter_vertical_edge_uv_sse2)
+sym(vp8_loop_filter_vertical_edge_uv_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
- pmulhw xmm2, [s27 GLOBAL]
- paddw xmm1, [s63 GLOBAL]
+ ALIGN_STACK 16, rax
+ sub rsp, 96 ; reserve 96 bytes
+ %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16];
+ %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16];
+ %define srct [rsp + 32] ;__declspec(align(16)) char srct[64];
- paddw xmm2, [s63 GLOBAL]
- psraw xmm1, 7
+ mov rsi, arg(0) ; u_ptr
+ movsxd rax, dword ptr arg(1) ; src_pixel_step
- psraw xmm2, 7
- packsswb xmm1, xmm2
+ lea rsi, [rsi - 4]
+ lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing
+ lea rcx, [rax+2*rax]
- psubsb xmm3, xmm1
- paddsb xmm6, xmm1
+ lea rdx, srct
- pxor xmm3, [t80 GLOBAL]
- pxor xmm6, [t80 GLOBAL]
+ ;transpose 16x8 to 8x16, and store the 8-line result on stack.
+ TRANSPOSE_16X8 0, 1
- movdqa [rdx+48], xmm6
- movdqa [rdx+64], xmm3
+ ; calculate filter mask and high edge variance
+ LFV_FILTER_MASK_HEV_MASK 1
- ; roughly 2/7th difference across boundary
- ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7);
- ; s = vp8_signed_char_clamp(qs1 - u);
- ; *oq1 = s^0x80;
- ; s = vp8_signed_char_clamp(ps1 + u);
- ; *op1 = s^0x80;
- pxor xmm1, xmm1
- pxor xmm2, xmm2
+ ; start work on filters
+ B_FILTER 2
- punpcklbw xmm1, xmm4
- punpckhbw xmm2, xmm4
+ ; tranpose and write back - only work on q1, q0, p0, p1
+ BV_TRANSPOSE
- pmulhw xmm1, [s18 GLOBAL]
- pmulhw xmm2, [s18 GLOBAL]
+ lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing
- paddw xmm1, [s63 GLOBAL]
- paddw xmm2, [s63 GLOBAL]
+ ; store 16-line result
+ BV_WRITEBACK xmm1, xmm5
- psraw xmm1, 7
- psraw xmm2, 7
+ mov rsi, arg(0) ; u_ptr
+ lea rsi, [rsi - 4]
+ lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing
+ BV_WRITEBACK xmm2, xmm6
- packsswb xmm1, xmm2
+ add rsp, 96
+ pop rsp
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
- movdqa xmm3, [rdx + 80] ;/q1
- movdqa xmm6, [rdx + 32] ; p1
+%macro MBV_TRANSPOSE 0
+ movdqa xmm0, [rdx] ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
+ movdqa xmm1, xmm0 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
- pxor xmm3, [t80 GLOBAL]
- pxor xmm6, [t80 GLOBAL]
+ punpcklbw xmm0, xmm7 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
+ punpckhbw xmm1, xmm7 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
- paddsb xmm6, xmm1
- psubsb xmm3, xmm1
+ movdqa xmm2, [rdx+32] ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+ movdqa xmm6, xmm2 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
- pxor xmm6, [t80 GLOBAL]
- pxor xmm3, [t80 GLOBAL]
+ punpcklbw xmm2, [rdx+48] ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
+ punpckhbw xmm6, [rdx+48] ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
- movdqa [rdx + 80], xmm3
- movdqa [rdx + 32], xmm6
+ movdqa xmm3, xmm0 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
+ punpcklwd xmm0, xmm2 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
+ punpckhwd xmm3, xmm2 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
+ movdqa xmm4, xmm1 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
- ; roughly 1/7th difference across boundary
- ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7);
- ; s = vp8_signed_char_clamp(qs2 - u);
- ; *oq2 = s^0x80;
- ; s = vp8_signed_char_clamp(ps2 + u);
- ; *op2 = s^0x80;
- pxor xmm1, xmm1
- pxor xmm2, xmm2
+ punpcklwd xmm1, xmm6 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
+ punpckhwd xmm4, xmm6 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
- punpcklbw xmm1, xmm4
- punpckhbw xmm2, xmm4
+ movdqa xmm2, [rdx+64] ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
+ punpcklbw xmm2, [rdx+80] ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
- pmulhw xmm1, [s9 GLOBAL]
- pmulhw xmm2, [s9 GLOBAL]
+ movdqa xmm6, xmm5 ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06
+ punpcklbw xmm6, [rdx+112] ; 77 76 67 66 57 56 47 46 37 36 27 26 17 16 07 06
- paddw xmm1, [s63 GLOBAL]
- paddw xmm2, [s63 GLOBAL]
+ movdqa xmm7, xmm2 ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
+ punpcklwd xmm2, xmm6 ; 37 36 35 34 27 26 25 24 17 16 15 14 07 06 05 04
- psraw xmm1, 7
- psraw xmm2, 7
+ punpckhwd xmm7, xmm6 ; 77 76 75 74 67 66 65 64 57 56 55 54 47 46 45 44
+ movdqa xmm6, xmm0 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
- packsswb xmm1, xmm2
+ punpckldq xmm0, xmm2 ; 17 16 15 14 13 12 11 10 07 06 05 04 03 02 01 00
+ punpckhdq xmm6, xmm2 ; 37 36 35 34 33 32 31 30 27 26 25 24 23 22 21 20
+%endmacro
- movdqa xmm6, [rdx+16]
- movdqa xmm3, [rdx+96]
+%macro MBV_WRITEBACK_1 0
+ movq QWORD PTR [rsi], xmm0
+ movhps MMWORD PTR [rdi], xmm0
- pxor xmm6, [t80 GLOBAL]
- pxor xmm3, [t80 GLOBAL]
+ movq QWORD PTR [rsi+2*rax], xmm6
+ movhps MMWORD PTR [rdi+2*rax], xmm6
- paddsb xmm6, xmm1
- psubsb xmm3, xmm1
+ movdqa xmm0, xmm3 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
+ punpckldq xmm0, xmm7 ; 57 56 55 54 53 52 51 50 47 46 45 44 43 42 41 40
- pxor xmm6, [t80 GLOBAL] ; xmm6 = f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
- pxor xmm3, [t80 GLOBAL] ; xmm3 = f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 15 06
+ punpckhdq xmm3, xmm7 ; 77 76 75 74 73 72 71 70 67 66 65 64 63 62 61 60
+ movq QWORD PTR [rsi+4*rax], xmm0
+ movhps MMWORD PTR [rdi+4*rax], xmm0
- ; transpose and write back
- movdqa xmm0, [rdx] ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
- movdqa xmm1, xmm0 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
+ movq QWORD PTR [rsi+2*rcx], xmm3
+ movhps MMWORD PTR [rdi+2*rcx], xmm3
- punpcklbw xmm0, xmm6 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
- punpckhbw xmm1, xmm6 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
+ movdqa xmm2, [rdx+64] ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
+ punpckhbw xmm2, [rdx+80] ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84
- movdqa xmm2, [rdx+32] ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
- movdqa xmm6, xmm2 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+ punpckhbw xmm5, [rdx+112] ; f7 f6 e7 e6 d7 d6 c7 c6 b7 b6 a7 a6 97 96 87 86
+ movdqa xmm0, xmm2
- punpcklbw xmm2, [rdx+48] ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
- punpckhbw xmm6, [rdx+48] ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
+ punpcklwd xmm0, xmm5 ; b7 b6 b4 b4 a7 a6 a5 a4 97 96 95 94 87 86 85 84
+ punpckhwd xmm2, xmm5 ; f7 f6 f5 f4 e7 e6 e5 e4 d7 d6 d5 d4 c7 c6 c5 c4
- movdqa xmm5, xmm0 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
- punpcklwd xmm0, xmm2 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
+ movdqa xmm5, xmm1 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
+ punpckldq xmm1, xmm0 ; 97 96 95 94 93 92 91 90 87 86 85 83 84 82 81 80
- punpckhwd xmm5, xmm2 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
- movdqa xmm4, xmm1 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
+ punpckhdq xmm5, xmm0 ; b7 b6 b5 b4 b3 b2 b1 b0 a7 a6 a5 a4 a3 a2 a1 a0
+%endmacro
- punpcklwd xmm1, xmm6 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
- punpckhwd xmm4, xmm6 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
+%macro MBV_WRITEBACK_2 0
+ movq QWORD PTR [rsi], xmm1
+ movhps MMWORD PTR [rdi], xmm1
- movdqa xmm2, [rdx+64] ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
- punpcklbw xmm2, [rdx+80] ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
+ movq QWORD PTR [rsi+2*rax], xmm5
+ movhps MMWORD PTR [rdi+2*rax], xmm5
- movdqa xmm6, xmm3 ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06
- punpcklbw xmm6, [rdx+112] ; 77 76 67 66 57 56 47 46 37 36 27 26 17 16 07 06
+ movdqa xmm1, xmm4 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
+ punpckldq xmm1, xmm2 ; d7 d6 d5 d4 d3 d2 d1 d0 c7 c6 c5 c4 c3 c2 c1 c0
+ punpckhdq xmm4, xmm2 ; f7 f6 f4 f4 f3 f2 f1 f0 e7 e6 e5 e4 e3 e2 e1 e0
- movdqa xmm7, xmm2 ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
- punpcklwd xmm2, xmm6 ; 37 36 35 34 27 26 25 24 17 16 15 14 07 06 05 04
+ movq QWORD PTR [rsi+4*rax], xmm1
+ movhps MMWORD PTR [rdi+4*rax], xmm1
- punpckhwd xmm7, xmm6 ; 77 76 75 74 67 66 65 64 57 56 55 54 47 46 45 44
- movdqa xmm6, xmm0 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
+ movq QWORD PTR [rsi+2*rcx], xmm4
+ movhps MMWORD PTR [rdi+2*rcx], xmm4
+%endmacro
- punpckldq xmm0, xmm2 ; 17 16 15 14 13 12 11 10 07 06 05 04 03 02 01 00
- punpckhdq xmm6, xmm2 ; 37 36 35 34 33 32 31 30 27 26 25 24 23 22 21 20
- lea rsi, [rsi+rcx*8]
- lea rdi, [rdi+rcx*8]
+;void vp8_mbloop_filter_vertical_edge_sse2
+;(
+; unsigned char *src_ptr,
+; int src_pixel_step,
+; const char *flimit,
+; const char *limit,
+; const char *thresh,
+; int count
+;)
+global sym(vp8_mbloop_filter_vertical_edge_sse2)
+sym(vp8_mbloop_filter_vertical_edge_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
- movq QWORD PTR [rsi+rcx*4], xmm0
- psrldq xmm0, 8
+ ALIGN_STACK 16, rax
+ sub rsp, 160 ; reserve 160 bytes
+ %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16];
+ %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16];
+ %define srct [rsp + 32] ;__declspec(align(16)) char srct[128];
- movq QWORD PTR [rsi+rcx*2], xmm6
- psrldq xmm6, 8
+ mov rsi, arg(0) ; src_ptr
+ movsxd rax, dword ptr arg(1) ; src_pixel_step
- movq QWORD PTR [rdi+rcx*4], xmm0
- movq QWORD PTR [rsi+rcx], xmm6
+ lea rsi, [rsi - 4]
+ lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing
+ lea rcx, [rax*2+rax]
- movdqa xmm0, xmm5 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
- punpckldq xmm0, xmm7 ; 57 56 55 54 53 52 51 50 47 46 45 44 43 42 41 40
+ ; Transpose
+ TRANSPOSE_16X8 1, 0
- punpckhdq xmm5, xmm7 ; 77 76 75 74 73 72 71 70 67 66 65 64 63 62 61 60
+ ; calculate filter mask and high edge variance
+ LFV_FILTER_MASK_HEV_MASK 0
- movq QWORD PTR [rsi], xmm0
- psrldq xmm0, 8
+ neg rax
+ ; start work on filters
+ MB_FILTER_AND_WRITEBACK 2
- movq QWORD PTR [rsi+rax*2], xmm5
- psrldq xmm5, 8
+ lea rsi, [rsi+rax*8]
+ lea rdi, [rdi+rax*8]
- movq QWORD PTR [rsi+rax], xmm0
- movq QWORD PTR [rdi+rax*2], xmm5
+ ; transpose and write back
+ MBV_TRANSPOSE
- movdqa xmm2, [rdx+64] ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
- punpckhbw xmm2, [rdx+80] ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84
+ neg rax
- punpckhbw xmm3, [rdx+112] ; f7 f6 e7 e6 d7 d6 c7 c6 b7 b6 a7 a6 97 96 87 86
- movdqa xmm0, xmm2
+ MBV_WRITEBACK_1
- punpcklwd xmm0, xmm3 ; b7 b6 b4 b4 a7 a6 a5 a4 97 96 95 94 87 86 85 84
- punpckhwd xmm2, xmm3 ; f7 f6 f5 f4 e7 e6 e5 e4 d7 d6 d5 d4 c7 c6 c5 c4
+ lea rsi, [rsi+rax*8]
+ lea rdi, [rdi+rax*8]
+ MBV_WRITEBACK_2
+
+ add rsp, 160
+ pop rsp
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
- movdqa xmm3, xmm1 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
- punpckldq xmm1, xmm0 ; 97 96 95 94 93 92 91 90 87 86 85 83 84 82 81 80
- punpckhdq xmm3, xmm0 ; b7 b6 b5 b4 b3 b2 b1 b0 a7 a6 a5 a4 a3 a2 a1 a0
+;void vp8_mbloop_filter_vertical_edge_uv_sse2
+;(
+; unsigned char *u,
+; int src_pixel_step,
+; const char *flimit,
+; const char *limit,
+; const char *thresh,
+; unsigned char *v
+;)
+global sym(vp8_mbloop_filter_vertical_edge_uv_sse2)
+sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
- lea rsi, [rsi+rax*8]
- lea rdi, [rdi+rax*8]
+ ALIGN_STACK 16, rax
+ sub rsp, 160 ; reserve 160 bytes
+ %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16];
+ %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16];
+ %define srct [rsp + 32] ;__declspec(align(16)) char srct[128];
- movq QWORD PTR [rsi+rcx*4], xmm1
- psrldq xmm1, 8
+ mov rsi, arg(0) ; u_ptr
+ movsxd rax, dword ptr arg(1) ; src_pixel_step
- movq QWORD PTR [rsi+rcx*2], xmm3
- psrldq xmm3, 8
+ lea rsi, [rsi - 4]
+ lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing
+ lea rcx, [rax+2*rax]
- movq QWORD PTR [rdi+rcx*4], xmm1
- movq QWORD PTR [rsi+rcx], xmm3
+ lea rdx, srct
- movdqa xmm1, xmm4 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
- punpckldq xmm1, xmm2 ; d7 d6 d5 d4 d3 d2 d1 d0 c7 c6 c5 c4 c3 c2 c1 c0
+ ; Transpose
+ TRANSPOSE_16X8 0, 0
- punpckhdq xmm4, xmm2 ; f7 f6 f4 f4 f3 f2 f1 f0 e7 e6 e5 e4 e3 e2 e1 e0
- movq QWORD PTR [rsi], xmm1
+ ; calculate filter mask and high edge variance
+ LFV_FILTER_MASK_HEV_MASK 0
- psrldq xmm1, 8
+ ; start work on filters
+ MB_FILTER_AND_WRITEBACK 2
- movq QWORD PTR [rsi+rax*2], xmm4
- psrldq xmm4, 8
+ ; transpose and write back
+ MBV_TRANSPOSE
- movq QWORD PTR [rsi+rax], xmm1
- movq QWORD PTR [rdi+rax*2], xmm4
+ mov rsi, arg(0) ;u_ptr
+ lea rsi, [rsi - 4]
+ lea rdi, [rsi + rax]
+ MBV_WRITEBACK_1
+ mov rsi, arg(5) ;v_ptr
+ lea rsi, [rsi - 4]
+ lea rdi, [rsi + rax]
+ MBV_WRITEBACK_2
add rsp, 160
pop rsp
@@ -1563,6 +1366,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -1582,6 +1386,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -1610,7 +1415,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
psubusb xmm0, xmm1 ; q1-=p1
psubusb xmm1, xmm4 ; p1-=q1
por xmm1, xmm0 ; abs(p1-q1)
- pand xmm1, [tfe GLOBAL] ; set lsb of each byte to zero
+ pand xmm1, [GLOBAL(tfe)] ; set lsb of each byte to zero
psrlw xmm1, 1 ; abs(p1-q1)/2
movdqu xmm5, [rsi+rax] ; p0
@@ -1628,12 +1433,12 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
pcmpeqb xmm5, xmm3
; start work on filters
- pxor xmm2, [t80 GLOBAL] ; p1 offset to convert to signed values
- pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values
+ pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
+ pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
psubsb xmm2, xmm7 ; p1 - q1
- pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values
- pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values
+ pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values
+ pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values
movdqa xmm3, xmm0 ; q0
psubsb xmm0, xmm6 ; q0 - p0
paddsb xmm2, xmm0 ; p1 - q1 + 1 * (q0 - p0)
@@ -1642,7 +1447,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
pand xmm5, xmm2 ; mask filter values we don't care about
; do + 4 side
- paddsb xmm5, [t4 GLOBAL] ; 3* (q0 - p0) + (p1 - q1) + 4
+ paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4
movdqa xmm0, xmm5 ; get a copy of filters
psllw xmm0, 8 ; shift left 8
@@ -1655,11 +1460,11 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
por xmm0, xmm1 ; put the two together to get result
psubsb xmm3, xmm0 ; q0-= q0 add
- pxor xmm3, [t80 GLOBAL] ; unoffset
+ pxor xmm3, [GLOBAL(t80)] ; unoffset
movdqu [rsi], xmm3 ; write back
; now do +3 side
- psubsb xmm5, [t1s GLOBAL] ; +3 instead of +4
+ psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4
movdqa xmm0, xmm5 ; get a copy of filters
psllw xmm0, 8 ; shift left 8
@@ -1671,13 +1476,14 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
paddsb xmm6, xmm0 ; p0+= p0 add
- pxor xmm6, [t80 GLOBAL] ; unoffset
+ pxor xmm6, [GLOBAL(t80)] ; unoffset
movdqu [rsi+rax], xmm6 ; write back
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -1697,6 +1503,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
push rbp ; save old base pointer value.
mov rbp, rsp ; set new base pointer value.
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
GET_GOT rbx ; save callee-saved reg
push rsi
push rdi
@@ -1789,7 +1596,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
psubusb xmm7, xmm0 ; q1-=p1
psubusb xmm6, xmm3 ; p1-=q1
por xmm6, xmm7 ; abs(p1-q1)
- pand xmm6, [tfe GLOBAL] ; set lsb of each byte to zero
+ pand xmm6, [GLOBAL(tfe)] ; set lsb of each byte to zero
psrlw xmm6, 1 ; abs(p1-q1)/2
movdqa xmm5, xmm1 ; p0
@@ -1815,16 +1622,16 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
movdqa t0, xmm0
movdqa t1, xmm3
- pxor xmm0, [t80 GLOBAL] ; p1 offset to convert to signed values
- pxor xmm3, [t80 GLOBAL] ; q1 offset to convert to signed values
+ pxor xmm0, [GLOBAL(t80)] ; p1 offset to convert to signed values
+ pxor xmm3, [GLOBAL(t80)] ; q1 offset to convert to signed values
psubsb xmm0, xmm3 ; p1 - q1
movdqa xmm6, xmm1 ; p0
movdqa xmm7, xmm2 ; q0
- pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values
+ pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values
- pxor xmm7, [t80 GLOBAL] ; offset to convert to signed values
+ pxor xmm7, [GLOBAL(t80)] ; offset to convert to signed values
movdqa xmm3, xmm7 ; offseted ; q0
psubsb xmm7, xmm6 ; q0 - p0
@@ -1836,7 +1643,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
pand xmm5, xmm0 ; mask filter values we don't care about
- paddsb xmm5, [t4 GLOBAL] ; 3* (q0 - p0) + (p1 - q1) + 4
+ paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4
movdqa xmm0, xmm5 ; get a copy of filters
psllw xmm0, 8 ; shift left 8
@@ -1851,10 +1658,10 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
por xmm0, xmm7 ; put the two together to get result
psubsb xmm3, xmm0 ; q0-= q0sz add
- pxor xmm3, [t80 GLOBAL] ; unoffset q0
+ pxor xmm3, [GLOBAL(t80)] ; unoffset q0
; now do +3 side
- psubsb xmm5, [t1s GLOBAL] ; +3 instead of +4
+ psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4
movdqa xmm0, xmm5 ; get a copy of filters
psllw xmm0, 8 ; shift left 8
@@ -1867,7 +1674,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
por xmm0, xmm5 ; put the two together to get result
paddsb xmm6, xmm0 ; p0+= p0 add
- pxor xmm6, [t80 GLOBAL] ; unoffset p0
+ pxor xmm6, [GLOBAL(t80)] ; unoffset p0
movdqa xmm0, t0 ; p1
movdqa xmm4, t1 ; q1
@@ -1941,6 +1748,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -1965,12 +1773,6 @@ align 16
ones:
times 8 dw 0x0001
align 16
-s27:
- times 8 dw 0x1b00
-align 16
-s18:
- times 8 dw 0x1200
-align 16
s9:
times 8 dw 0x0900
align 16
diff --git a/vp8/common/x86/loopfilter_x86.c b/vp8/common/x86/loopfilter_x86.c
index 143ee7469..93107e179 100644
--- a/vp8/common/x86/loopfilter_x86.c
+++ b/vp8/common/x86/loopfilter_x86.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -33,8 +34,13 @@ prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2);
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2);
prototype_loopfilter(vp8_fast_loop_filter_vertical_edges_sse2);
+extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
+extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
+extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_sse2;
+extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2;
+
#if HAVE_MMX
-// Horizontal MB filtering
+/* Horizontal MB filtering */
void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
@@ -60,7 +66,7 @@ void vp8_loop_filter_mbhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign
}
-// Vertical MB Filtering
+/* Vertical MB Filtering */
void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
@@ -86,7 +92,7 @@ void vp8_loop_filter_mbvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign
}
-// Horizontal B Filtering
+/* Horizontal B Filtering */
void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
@@ -116,7 +122,7 @@ void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
}
-// Vertical B Filtering
+/* Vertical B Filtering */
void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
@@ -147,7 +153,7 @@ void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
#endif
-// Horizontal MB filtering
+/* Horizontal MB filtering */
#if HAVE_SSE2
void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
@@ -156,10 +162,7 @@ void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
if (u_ptr)
- vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
-
- if (v_ptr)
- vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
+ vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
}
@@ -174,7 +177,7 @@ void vp8_loop_filter_mbhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig
}
-// Vertical MB Filtering
+/* Vertical MB Filtering */
void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
@@ -182,10 +185,7 @@ void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
if (u_ptr)
- vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
-
- if (v_ptr)
- vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
+ vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
}
@@ -200,7 +200,7 @@ void vp8_loop_filter_mbvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig
}
-// Horizontal B Filtering
+/* Horizontal B Filtering */
void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
@@ -210,10 +210,7 @@ void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
if (u_ptr)
- vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
-
- if (v_ptr)
- vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
+ vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4 * uv_stride);
}
@@ -230,7 +227,7 @@ void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
}
-// Vertical B Filtering
+/* Vertical B Filtering */
void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
@@ -240,10 +237,7 @@ void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
if (u_ptr)
- vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
-
- if (v_ptr)
- vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
+ vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4);
}
diff --git a/vp8/common/x86/loopfilter_x86.h b/vp8/common/x86/loopfilter_x86.h
index c87f38a31..80dbebc8d 100644
--- a/vp8/common/x86/loopfilter_x86.h
+++ b/vp8/common/x86/loopfilter_x86.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/x86/postproc_mmx.asm b/vp8/common/x86/postproc_mmx.asm
index 721c8d612..787e83268 100644
--- a/vp8/common/x86/postproc_mmx.asm
+++ b/vp8/common/x86/postproc_mmx.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
@@ -36,16 +37,16 @@ sym(vp8_post_proc_down_and_across_mmx):
%if ABI_IS_32BIT=1 && CONFIG_PIC=1
; move the global rd onto the stack, since we don't have enough registers
; to do PIC addressing
- movq mm0, [rd GLOBAL]
+ movq mm0, [GLOBAL(rd)]
sub rsp, 8
movq [rsp], mm0
%define RD [rsp]
%else
-%define RD [rd GLOBAL]
+%define RD [GLOBAL(rd)]
%endif
push rbx
- lea rbx, [Blur GLOBAL]
+ lea rbx, [GLOBAL(Blur)]
movd mm2, dword ptr arg(6) ;flimit
punpcklwd mm2, mm2
punpckldq mm2, mm2
@@ -285,7 +286,7 @@ sym(vp8_mbpost_proc_down_mmx):
%define flimit2 [rsp+128]
%if ABI_IS_32BIT=0
- lea r8, [sym(vp8_rv) GLOBAL]
+ lea r8, [GLOBAL(sym(vp8_rv))]
%endif
;rows +=8;
@@ -403,7 +404,7 @@ loop_row:
and rcx, 127
%if ABI_IS_32BIT=1 && CONFIG_PIC=1
push rax
- lea rax, [sym(vp8_rv) GLOBAL]
+ lea rax, [GLOBAL(sym(vp8_rv))]
movq mm4, [rax + rcx*2] ;vp8_rv[rcx*2]
pop rax
%elif ABI_IS_32BIT=0
diff --git a/vp8/common/x86/postproc_mmx.c b/vp8/common/x86/postproc_mmx.c
index 095797b1e..6b6321ace 100644
--- a/vp8/common/x86/postproc_mmx.c
+++ b/vp8/common/x86/postproc_mmx.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/x86/postproc_sse2.asm b/vp8/common/x86/postproc_sse2.asm
index bfa36fa70..30b4bf53a 100644
--- a/vp8/common/x86/postproc_sse2.asm
+++ b/vp8/common/x86/postproc_sse2.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
@@ -25,6 +26,7 @@ sym(vp8_post_proc_down_and_across_xmm):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -34,12 +36,12 @@ sym(vp8_post_proc_down_and_across_xmm):
ALIGN_STACK 16, rax
; move the global rd onto the stack, since we don't have enough registers
; to do PIC addressing
- movdqa xmm0, [rd42 GLOBAL]
+ movdqa xmm0, [GLOBAL(rd42)]
sub rsp, 16
movdqa [rsp], xmm0
%define RD42 [rsp]
%else
-%define RD42 [rd42 GLOBAL]
+%define RD42 [GLOBAL(rd42)]
%endif
@@ -239,6 +241,7 @@ acrossnextcol:
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -253,6 +256,7 @@ sym(vp8_mbpost_proc_down_xmm):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -271,7 +275,7 @@ sym(vp8_mbpost_proc_down_xmm):
%define flimit4 [rsp+128]
%if ABI_IS_32BIT=0
- lea r8, [sym(vp8_rv) GLOBAL]
+ lea r8, [GLOBAL(sym(vp8_rv))]
%endif
;rows +=8;
@@ -389,7 +393,7 @@ loop_row:
and rcx, 127
%if ABI_IS_32BIT=1 && CONFIG_PIC=1
push rax
- lea rax, [sym(vp8_rv) GLOBAL]
+ lea rax, [GLOBAL(sym(vp8_rv))]
movdqu xmm4, [rax + rcx*2] ;vp8_rv[rcx*2]
pop rax
%elif ABI_IS_32BIT=0
@@ -438,6 +442,7 @@ loop_row:
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -451,6 +456,7 @@ sym(vp8_mbpost_proc_across_ip_xmm):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -573,7 +579,7 @@ nextcol4:
punpcklwd xmm1, xmm0
paddd xmm1, xmm6
- paddd xmm1, [four8s GLOBAL]
+ paddd xmm1, [GLOBAL(four8s)]
psrad xmm1, 4
packssdw xmm1, xmm0
@@ -611,6 +617,7 @@ nextcol4:
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
diff --git a/vp8/common/x86/postproc_x86.h b/vp8/common/x86/postproc_x86.h
index 49a190793..899dd2f89 100644
--- a/vp8/common/x86/postproc_x86.h
+++ b/vp8/common/x86/postproc_x86.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/x86/recon_mmx.asm b/vp8/common/x86/recon_mmx.asm
index ba60c5db7..e7211fccb 100644
--- a/vp8/common/x86/recon_mmx.asm
+++ b/vp8/common/x86/recon_mmx.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm
index f2685a76f..4ad3973ec 100644
--- a/vp8/common/x86/recon_sse2.asm
+++ b/vp8/common/x86/recon_sse2.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
@@ -66,6 +67,7 @@ sym(vp8_recon4b_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
+ SAVE_XMM
push rsi
push rdi
; end prolog
@@ -118,6 +120,7 @@ sym(vp8_recon4b_sse2):
; begin epilog
pop rdi
pop rsi
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
diff --git a/vp8/common/x86/recon_x86.h b/vp8/common/x86/recon_x86.h
index c46977842..40ee65a12 100644
--- a/vp8/common/x86/recon_x86.h
+++ b/vp8/common/x86/recon_x86.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/common/x86/subpixel_mmx.asm b/vp8/common/x86/subpixel_mmx.asm
index c50211813..23ed4e208 100644
--- a/vp8/common/x86/subpixel_mmx.asm
+++ b/vp8/common/x86/subpixel_mmx.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
@@ -83,7 +84,7 @@ nextrow:
pmullw mm5, [rdx] ; mm5 *= kernel 5 modifiers
paddsw mm3, mm5 ; mm3 += mm5
- paddsw mm3, [rd GLOBAL] ; mm3 += round value
+ paddsw mm3, [GLOBAL(rd)] ; mm3 += round value
psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128
packuswb mm3, mm0 ; pack and unpack to saturate
punpcklbw mm3, mm0 ;
@@ -135,7 +136,7 @@ sym(vp8_filter_block1d_v6_mmx):
push rdi
; end prolog
- movq mm5, [rd GLOBAL]
+ movq mm5, [GLOBAL(rd)]
push rbx
mov rbx, arg(6) ;vp8_filter
movq mm1, [rbx + 16] ; do both the negative taps first!!!
@@ -224,7 +225,7 @@ sym(vp8_filter_block1dc_v6_mmx):
push rdi
; end prolog
- movq mm5, [rd GLOBAL]
+ movq mm5, [GLOBAL(rd)]
push rbx
mov rbx, arg(7) ;vp8_filter
movq mm1, [rbx + 16] ; do both the negative taps first!!!
@@ -319,7 +320,7 @@ sym(vp8_bilinear_predict8x8_mmx):
mov rdi, arg(4) ;dst_ptr ;
shl rax, 5 ; offset * 32
- lea rcx, [sym(vp8_bilinear_filters_mmx) GLOBAL]
+ lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
add rax, rcx ; HFilter
mov rsi, arg(0) ;src_ptr ;
@@ -362,10 +363,10 @@ sym(vp8_bilinear_predict8x8_mmx):
paddw mm3, mm5 ;
paddw mm4, mm6 ;
- paddw mm3, [rd GLOBAL] ; xmm3 += round value
+ paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
- paddw mm4, [rd GLOBAL] ;
+ paddw mm4, [GLOBAL(rd)] ;
psraw mm4, VP8_FILTER_SHIFT ;
movq mm7, mm3 ;
@@ -403,10 +404,10 @@ next_row_8x8:
pmullw mm5, [rax] ;
pmullw mm6, [rax] ;
- paddw mm3, [rd GLOBAL] ; xmm3 += round value
+ paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
- paddw mm4, [rd GLOBAL] ;
+ paddw mm4, [GLOBAL(rd)] ;
psraw mm4, VP8_FILTER_SHIFT ;
movq mm7, mm3 ;
@@ -420,10 +421,10 @@ next_row_8x8:
paddw mm4, mm6 ;
- paddw mm3, [rd GLOBAL] ; xmm3 += round value
+ paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
- paddw mm4, [rd GLOBAL] ;
+ paddw mm4, [GLOBAL(rd)] ;
psraw mm4, VP8_FILTER_SHIFT ;
packuswb mm3, mm4
@@ -475,7 +476,7 @@ sym(vp8_bilinear_predict8x4_mmx):
movsxd rax, dword ptr arg(2) ;xoffset
mov rdi, arg(4) ;dst_ptr ;
- lea rcx, [sym(vp8_bilinear_filters_mmx) GLOBAL]
+ lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
shl rax, 5
mov rsi, arg(0) ;src_ptr ;
@@ -517,10 +518,10 @@ sym(vp8_bilinear_predict8x4_mmx):
paddw mm3, mm5 ;
paddw mm4, mm6 ;
- paddw mm3, [rd GLOBAL] ; xmm3 += round value
+ paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
- paddw mm4, [rd GLOBAL] ;
+ paddw mm4, [GLOBAL(rd)] ;
psraw mm4, VP8_FILTER_SHIFT ;
movq mm7, mm3 ;
@@ -558,10 +559,10 @@ next_row_8x4:
pmullw mm5, [rax] ;
pmullw mm6, [rax] ;
- paddw mm3, [rd GLOBAL] ; xmm3 += round value
+ paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
- paddw mm4, [rd GLOBAL] ;
+ paddw mm4, [GLOBAL(rd)] ;
psraw mm4, VP8_FILTER_SHIFT ;
movq mm7, mm3 ;
@@ -575,10 +576,10 @@ next_row_8x4:
paddw mm4, mm6 ;
- paddw mm3, [rd GLOBAL] ; xmm3 += round value
+ paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
- paddw mm4, [rd GLOBAL] ;
+ paddw mm4, [GLOBAL(rd)] ;
psraw mm4, VP8_FILTER_SHIFT ;
packuswb mm3, mm4
@@ -630,7 +631,7 @@ sym(vp8_bilinear_predict4x4_mmx):
movsxd rax, dword ptr arg(2) ;xoffset
mov rdi, arg(4) ;dst_ptr ;
- lea rcx, [sym(vp8_bilinear_filters_mmx) GLOBAL]
+ lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
shl rax, 5
add rax, rcx ; HFilter
@@ -661,7 +662,7 @@ sym(vp8_bilinear_predict4x4_mmx):
pmullw mm5, mm2 ;
paddw mm3, mm5 ;
- paddw mm3, [rd GLOBAL] ; xmm3 += round value
+ paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
@@ -685,7 +686,7 @@ next_row_4x4:
punpcklbw mm5, mm0 ;
pmullw mm5, [rax] ;
- paddw mm3, [rd GLOBAL] ; xmm3 += round value
+ paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
movq mm7, mm3 ;
@@ -696,7 +697,7 @@ next_row_4x4:
paddw mm3, mm5 ;
- paddw mm3, [rd GLOBAL] ; xmm3 += round value
+ paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
packuswb mm3, mm0
@@ -730,7 +731,7 @@ rd:
times 4 dw 0x40
align 16
-global sym(vp8_six_tap_mmx)
+global HIDDEN_DATA(sym(vp8_six_tap_mmx))
sym(vp8_six_tap_mmx):
times 8 dw 0
times 8 dw 0
@@ -790,7 +791,7 @@ sym(vp8_six_tap_mmx):
align 16
-global sym(vp8_bilinear_filters_mmx)
+global HIDDEN_DATA(sym(vp8_bilinear_filters_mmx))
sym(vp8_bilinear_filters_mmx):
times 8 dw 128
times 8 dw 0
diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm
index dee04f2d9..b87cad259 100644
--- a/vp8/common/x86/subpixel_sse2.asm
+++ b/vp8/common/x86/subpixel_sse2.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
@@ -36,6 +37,7 @@ sym(vp8_filter_block1d8_h6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -105,7 +107,7 @@ filter_block1d8_h6_rowloop:
paddsw xmm4, xmm6
paddsw xmm4, xmm1
- paddsw xmm4, [rd GLOBAL]
+ paddsw xmm4, [GLOBAL(rd)]
psraw xmm4, 7
@@ -128,6 +130,7 @@ filter_block1d8_h6_rowloop:
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -154,6 +157,7 @@ sym(vp8_filter_block1d16_h6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -227,7 +231,7 @@ filter_block1d16_h6_sse2_rowloop:
paddsw xmm4, xmm6
paddsw xmm4, xmm1
- paddsw xmm4, [rd GLOBAL]
+ paddsw xmm4, [GLOBAL(rd)]
psraw xmm4, 7
@@ -280,7 +284,7 @@ filter_block1d16_h6_sse2_rowloop:
paddsw xmm4, xmm6
paddsw xmm4, xmm2
- paddsw xmm4, [rd GLOBAL]
+ paddsw xmm4, [GLOBAL(rd)]
psraw xmm4, 7
@@ -303,6 +307,7 @@ filter_block1d16_h6_sse2_rowloop:
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -328,6 +333,7 @@ sym(vp8_filter_block1d8_v6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -345,7 +351,7 @@ sym(vp8_filter_block1d8_v6_sse2):
movsxd rcx, DWORD PTR arg(5) ;[output_height]
pxor xmm0, xmm0 ; clear xmm0
- movdqa xmm7, XMMWORD PTR [rd GLOBAL]
+ movdqa xmm7, XMMWORD PTR [GLOBAL(rd)]
%if ABI_IS_32BIT=0
movsxd r8, dword ptr arg(2) ; dst_ptich
%endif
@@ -396,221 +402,553 @@ vp8_filter_block1d8_v6_sse2_loop:
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
-;void vp8_unpack_block1d16_h6_sse2
+;void vp8_filter_block1d16_v6_sse2
+;(
+; unsigned short *src_ptr,
+; unsigned char *output_ptr,
+; int dst_ptich,
+; unsigned int pixels_per_line,
+; unsigned int pixel_step,
+; unsigned int output_height,
+; unsigned int output_width,
+; const short *vp8_filter
+;)
+;/************************************************************************************
+; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixels. The
+; input pixel array has output_height rows.
+;*************************************************************************************/
+global sym(vp8_filter_block1d16_v6_sse2)
+sym(vp8_filter_block1d16_v6_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 8
+ SAVE_XMM
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rax, arg(7) ;vp8_filter
+ movsxd rdx, dword ptr arg(3) ;pixels_per_line
+
+ mov rdi, arg(1) ;output_ptr
+ mov rsi, arg(0) ;src_ptr
+
+ sub rsi, rdx
+ sub rsi, rdx
+
+ movsxd rcx, DWORD PTR arg(5) ;[output_height]
+%if ABI_IS_32BIT=0
+ movsxd r8, dword ptr arg(2) ; dst_ptich
+%endif
+
+vp8_filter_block1d16_v6_sse2_loop:
+; The order for adding 6-tap is 2 5 3 1 4 6. Read in data in that order.
+ movdqa xmm1, XMMWORD PTR [rsi + rdx] ; line 2
+ movdqa xmm2, XMMWORD PTR [rsi + rdx + 16]
+ pmullw xmm1, [rax + 16]
+ pmullw xmm2, [rax + 16]
+
+ movdqa xmm3, XMMWORD PTR [rsi + rdx * 4] ; line 5
+ movdqa xmm4, XMMWORD PTR [rsi + rdx * 4 + 16]
+ pmullw xmm3, [rax + 64]
+ pmullw xmm4, [rax + 64]
+
+ movdqa xmm5, XMMWORD PTR [rsi + rdx * 2] ; line 3
+ movdqa xmm6, XMMWORD PTR [rsi + rdx * 2 + 16]
+ pmullw xmm5, [rax + 32]
+ pmullw xmm6, [rax + 32]
+
+ movdqa xmm7, XMMWORD PTR [rsi] ; line 1
+ movdqa xmm0, XMMWORD PTR [rsi + 16]
+ pmullw xmm7, [rax]
+ pmullw xmm0, [rax]
+
+ paddsw xmm1, xmm3
+ paddsw xmm2, xmm4
+ paddsw xmm1, xmm5
+ paddsw xmm2, xmm6
+ paddsw xmm1, xmm7
+ paddsw xmm2, xmm0
+
+ add rsi, rdx
+
+ movdqa xmm3, XMMWORD PTR [rsi + rdx * 2] ; line 4
+ movdqa xmm4, XMMWORD PTR [rsi + rdx * 2 + 16]
+ pmullw xmm3, [rax + 48]
+ pmullw xmm4, [rax + 48]
+
+ movdqa xmm5, XMMWORD PTR [rsi + rdx * 4] ; line 6
+ movdqa xmm6, XMMWORD PTR [rsi + rdx * 4 + 16]
+ pmullw xmm5, [rax + 80]
+ pmullw xmm6, [rax + 80]
+
+ movdqa xmm7, XMMWORD PTR [GLOBAL(rd)]
+ pxor xmm0, xmm0 ; clear xmm0
+
+ paddsw xmm1, xmm3
+ paddsw xmm2, xmm4
+ paddsw xmm1, xmm5
+ paddsw xmm2, xmm6
+
+ paddsw xmm1, xmm7
+ paddsw xmm2, xmm7
+
+ psraw xmm1, 7
+ psraw xmm2, 7
+
+ packuswb xmm1, xmm2 ; pack and saturate
+ movdqa XMMWORD PTR [rdi], xmm1 ; store the results in the destination
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(2) ;[dst_ptich]
+%else
+ add rdi, r8
+%endif
+ dec rcx ; decrement count
+ jnz vp8_filter_block1d16_v6_sse2_loop ; next row
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp8_filter_block1d8_h6_only_sse2
;(
; unsigned char *src_ptr,
-; unsigned short *output_ptr,
; unsigned int src_pixels_per_line,
+; unsigned char *output_ptr,
+; int dst_ptich,
; unsigned int output_height,
-; unsigned int output_width
+; const short *vp8_filter
;)
-global sym(vp8_unpack_block1d16_h6_sse2)
-sym(vp8_unpack_block1d16_h6_sse2):
+; First-pass filter only when yoffset==0
+global sym(vp8_filter_block1d8_h6_only_sse2)
+sym(vp8_filter_block1d8_h6_only_sse2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
; end prolog
+ mov rdx, arg(5) ;vp8_filter
mov rsi, arg(0) ;src_ptr
- mov rdi, arg(1) ;output_ptr
- movsxd rcx, dword ptr arg(3) ;output_height
- movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source
+ mov rdi, arg(2) ;output_ptr
- pxor xmm0, xmm0 ; clear xmm0 for unpack
+ movsxd rcx, dword ptr arg(4) ;output_height
+ movsxd rax, dword ptr arg(1) ;src_pixels_per_line ; Pitch for Source
%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(4) ;output_width ; Pitch for Source
+ movsxd r8, dword ptr arg(3) ;dst_ptich
%endif
+ pxor xmm0, xmm0 ; clear xmm0 for unpack
-unpack_block1d16_h6_sse2_rowloop:
- movq xmm1, MMWORD PTR [rsi] ; 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 -2
- movq xmm3, MMWORD PTR [rsi+8] ; make copy of xmm1
+filter_block1d8_h6_only_rowloop:
+ movq xmm3, MMWORD PTR [rsi - 2]
+ movq xmm1, MMWORD PTR [rsi + 6]
+
+ prefetcht2 [rsi+rax-2]
+
+ pslldq xmm1, 8
+ por xmm1, xmm3
+
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm1
+
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm1
punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
- punpcklbw xmm1, xmm0
+ psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
- movdqa XMMWORD Ptr [rdi], xmm1
- movdqa XMMWORD Ptr [rdi + 16], xmm3
+ pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1
+ punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
+
+ psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
+ pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2
+
+
+ punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
+ psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
+
+ pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3
+
+ punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
+ psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
+
+ pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4
+
+ punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
+ psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
+
+
+ pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5
+
+ punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
+ pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6
+
+ paddsw xmm4, xmm7
+ paddsw xmm4, xmm5
+
+ paddsw xmm4, xmm3
+ paddsw xmm4, xmm6
+
+ paddsw xmm4, xmm1
+ paddsw xmm4, [GLOBAL(rd)]
+
+ psraw xmm4, 7
+
+ packuswb xmm4, xmm0
+
+ movq QWORD PTR [rdi], xmm4 ; store the results in the destination
lea rsi, [rsi + rax]
+
%if ABI_IS_32BIT
- add rdi, DWORD Ptr arg(4) ;[output_width]
+ add rdi, DWORD Ptr arg(3) ;dst_ptich
%else
add rdi, r8
%endif
dec rcx
- jnz unpack_block1d16_h6_sse2_rowloop ; next row
+
+ jnz filter_block1d8_h6_only_rowloop ; next row
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
-;void vp8_unpack_block1d8_h6_sse2
+;void vp8_filter_block1d16_h6_only_sse2
;(
; unsigned char *src_ptr,
-; unsigned short *output_ptr,
; unsigned int src_pixels_per_line,
+; unsigned char *output_ptr,
+; int dst_ptich,
; unsigned int output_height,
-; unsigned int output_width
+; const short *vp8_filter
;)
-global sym(vp8_unpack_block1d8_h6_sse2)
-sym(vp8_unpack_block1d8_h6_sse2):
+; First-pass filter only when yoffset==0
+global sym(vp8_filter_block1d16_h6_only_sse2)
+sym(vp8_filter_block1d16_h6_only_sse2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
; end prolog
+ mov rdx, arg(5) ;vp8_filter
mov rsi, arg(0) ;src_ptr
- mov rdi, arg(1) ;output_ptr
- movsxd rcx, dword ptr arg(3) ;output_height
- movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source
+ mov rdi, arg(2) ;output_ptr
- pxor xmm0, xmm0 ; clear xmm0 for unpack
+ movsxd rcx, dword ptr arg(4) ;output_height
+ movsxd rax, dword ptr arg(1) ;src_pixels_per_line ; Pitch for Source
%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(4) ;output_width ; Pitch for Source
+ movsxd r8, dword ptr arg(3) ;dst_ptich
%endif
-unpack_block1d8_h6_sse2_rowloop:
- movq xmm1, MMWORD PTR [rsi] ; 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 -2
- lea rsi, [rsi + rax]
+ pxor xmm0, xmm0 ; clear xmm0 for unpack
- punpcklbw xmm1, xmm0
- movdqa XMMWORD Ptr [rdi], xmm1
+filter_block1d16_h6_only_sse2_rowloop:
+ movq xmm3, MMWORD PTR [rsi - 2]
+ movq xmm1, MMWORD PTR [rsi + 6]
+
+ movq xmm2, MMWORD PTR [rsi +14]
+ pslldq xmm2, 8
+
+ por xmm2, xmm1
+ prefetcht2 [rsi+rax-2]
+
+ pslldq xmm1, 8
+ por xmm1, xmm3
+
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm1
+
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm1
+
+ punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
+ psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
+
+ pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1
+ punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
+
+ psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
+ pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2
+
+ punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
+ psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
+
+ pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3
+
+ punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
+ psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
+
+ pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4
+
+ punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
+ psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
+
+ pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5
+
+ punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
+ pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6
+
+ paddsw xmm4, xmm7
+ paddsw xmm4, xmm5
+
+ paddsw xmm4, xmm3
+ paddsw xmm4, xmm6
+
+ paddsw xmm4, xmm1
+ paddsw xmm4, [GLOBAL(rd)]
+
+ psraw xmm4, 7
+ packuswb xmm4, xmm0 ; lower 8 bytes
+
+ movq QWORD Ptr [rdi], xmm4 ; store the results in the destination
+
+ movdqa xmm3, xmm2
+ movdqa xmm4, xmm2
+
+ movdqa xmm5, xmm2
+ movdqa xmm6, xmm2
+
+ movdqa xmm7, xmm2
+
+ punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
+ psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
+
+ pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1
+ punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
+
+ psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
+ pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2
+
+ punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
+ psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
+
+ pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3
+
+ punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
+ psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
+
+ pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4
+
+ punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
+ psrldq xmm2, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
+
+ pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5
+
+ punpcklbw xmm2, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
+ pmullw xmm2, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6
+
+ paddsw xmm4, xmm7
+ paddsw xmm4, xmm5
+
+ paddsw xmm4, xmm3
+ paddsw xmm4, xmm6
+
+ paddsw xmm4, xmm2
+ paddsw xmm4, [GLOBAL(rd)]
+
+ psraw xmm4, 7
+
+ packuswb xmm4, xmm0 ; higher 8 bytes
+
+ movq QWORD Ptr [rdi+8], xmm4 ; store the results in the destination
+
+ lea rsi, [rsi + rax]
%if ABI_IS_32BIT
- add rdi, DWORD Ptr arg(4) ;[output_width]
+ add rdi, DWORD Ptr arg(3) ;dst_ptich
%else
add rdi, r8
%endif
+
dec rcx
- jnz unpack_block1d8_h6_sse2_rowloop ; next row
+ jnz filter_block1d16_h6_only_sse2_rowloop ; next row
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
-;void vp8_pack_block1d8_v6_sse2
+;void vp8_filter_block1d8_v6_only_sse2
;(
-; short *src_ptr,
+; unsigned char *src_ptr,
+; unsigned int src_pixels_per_line,
; unsigned char *output_ptr,
; int dst_ptich,
-; unsigned int pixels_per_line,
; unsigned int output_height,
-; unsigned int output_width
+; const short *vp8_filter
;)
-global sym(vp8_pack_block1d8_v6_sse2)
-sym(vp8_pack_block1d8_v6_sse2):
+; Second-pass filter only when xoffset==0
+global sym(vp8_filter_block1d8_v6_only_sse2)
+sym(vp8_filter_block1d8_v6_only_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
; end prolog
- movsxd rdx, dword ptr arg(3) ;pixels_per_line
- mov rdi, arg(1) ;output_ptr
-
mov rsi, arg(0) ;src_ptr
- movsxd rcx, DWORD PTR arg(4) ;[output_height]
+ mov rdi, arg(2) ;output_ptr
+
+ movsxd rcx, dword ptr arg(4) ;output_height
+ movsxd rdx, dword ptr arg(1) ;src_pixels_per_line
+
+ mov rax, arg(5) ;vp8_filter
+
+ pxor xmm0, xmm0 ; clear xmm0
+
+ movdqa xmm7, XMMWORD PTR [GLOBAL(rd)]
%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(5) ;output_width ; Pitch for Source
+ movsxd r8, dword ptr arg(3) ; dst_ptich
%endif
-pack_block1d8_v6_sse2_loop:
- movdqa xmm0, XMMWORD PTR [rsi]
- packuswb xmm0, xmm0
+vp8_filter_block1d8_v6_only_sse2_loop:
+ movq xmm1, MMWORD PTR [rsi]
+ movq xmm2, MMWORD PTR [rsi + rdx]
+ movq xmm3, MMWORD PTR [rsi + rdx * 2]
+ movq xmm5, MMWORD PTR [rsi + rdx * 4]
+ add rsi, rdx
+ movq xmm4, MMWORD PTR [rsi + rdx * 2]
+ movq xmm6, MMWORD PTR [rsi + rdx * 4]
+
+ punpcklbw xmm1, xmm0
+ pmullw xmm1, [rax]
+
+ punpcklbw xmm2, xmm0
+ pmullw xmm2, [rax + 16]
+
+ punpcklbw xmm3, xmm0
+ pmullw xmm3, [rax + 32]
+
+ punpcklbw xmm5, xmm0
+ pmullw xmm5, [rax + 64]
+
+ punpcklbw xmm4, xmm0
+ pmullw xmm4, [rax + 48]
+
+ punpcklbw xmm6, xmm0
+ pmullw xmm6, [rax + 80]
- movq QWORD PTR [rdi], xmm0 ; store the results in the destination
- lea rsi, [rsi+rdx]
+ paddsw xmm2, xmm5
+ paddsw xmm2, xmm3
+ paddsw xmm2, xmm1
+ paddsw xmm2, xmm4
+
+ paddsw xmm2, xmm6
+ paddsw xmm2, xmm7
+
+ psraw xmm2, 7
+ packuswb xmm2, xmm0 ; pack and saturate
+
+ movq QWORD PTR [rdi], xmm2 ; store the results in the destination
%if ABI_IS_32BIT
- add rdi, DWORD Ptr arg(5) ;[output_width]
+ add rdi, DWORD PTR arg(3) ;[dst_ptich]
%else
add rdi, r8
%endif
dec rcx ; decrement count
- jnz pack_block1d8_v6_sse2_loop ; next row
+ jnz vp8_filter_block1d8_v6_only_sse2_loop ; next row
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
-;void vp8_pack_block1d16_v6_sse2
+;void vp8_unpack_block1d16_h6_sse2
;(
-; short *src_ptr,
-; unsigned char *output_ptr,
-; int dst_ptich,
-; unsigned int pixels_per_line,
-; unsigned int output_height,
-; unsigned int output_width
+; unsigned char *src_ptr,
+; unsigned short *output_ptr,
+; unsigned int src_pixels_per_line,
+; unsigned int output_height,
+; unsigned int output_width
;)
-global sym(vp8_pack_block1d16_v6_sse2)
-sym(vp8_pack_block1d16_v6_sse2):
+global sym(vp8_unpack_block1d16_h6_sse2)
+sym(vp8_unpack_block1d16_h6_sse2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
+ SHADOW_ARGS_TO_STACK 5
+ ;SAVE_XMM ;xmm6, xmm7 are not used here.
GET_GOT rbx
push rsi
push rdi
; end prolog
- movsxd rdx, dword ptr arg(3) ;pixels_per_line
+ mov rsi, arg(0) ;src_ptr
mov rdi, arg(1) ;output_ptr
- mov rsi, arg(0) ;src_ptr
- movsxd rcx, DWORD PTR arg(4) ;[output_height]
+ movsxd rcx, dword ptr arg(3) ;output_height
+ movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source
+
+ pxor xmm0, xmm0 ; clear xmm0 for unpack
%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(2) ;dst_pitch
+ movsxd r8, dword ptr arg(4) ;output_width ; Pitch for Source
%endif
-pack_block1d16_v6_sse2_loop:
- movdqa xmm0, XMMWORD PTR [rsi]
- movdqa xmm1, XMMWORD PTR [rsi+16]
+unpack_block1d16_h6_sse2_rowloop:
+ movq xmm1, MMWORD PTR [rsi] ; 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 -2
+ movq xmm3, MMWORD PTR [rsi+8] ; make copy of xmm1
- packuswb xmm0, xmm1
- movdqa XMMWORD PTR [rdi], xmm0 ; store the results in the destination
+ punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
+ punpcklbw xmm1, xmm0
- add rsi, rdx
+ movdqa XMMWORD Ptr [rdi], xmm1
+ movdqa XMMWORD Ptr [rdi + 16], xmm3
+
+ lea rsi, [rsi + rax]
%if ABI_IS_32BIT
- add rdi, DWORD Ptr arg(2) ;dst_pitch
+ add rdi, DWORD Ptr arg(4) ;[output_width]
%else
add rdi, r8
%endif
- dec rcx ; decrement count
- jnz pack_block1d16_v6_sse2_loop ; next row
+ dec rcx
+ jnz unpack_block1d16_h6_sse2_rowloop ; next row
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
+ ;RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -631,6 +969,7 @@ sym(vp8_bilinear_predict16x16_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -639,7 +978,7 @@ sym(vp8_bilinear_predict16x16_sse2):
;const short *HFilter = bilinear_filters_mmx[xoffset]
;const short *VFilter = bilinear_filters_mmx[yoffset]
- lea rcx, [sym(vp8_bilinear_filters_mmx) GLOBAL]
+ lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
movsxd rax, dword ptr arg(2) ;xoffset
cmp rax, 0 ;skip first_pass filter if xoffset=0
@@ -694,10 +1033,10 @@ sym(vp8_bilinear_predict16x16_sse2):
paddw xmm3, xmm5
paddw xmm4, xmm6
- paddw xmm3, [rd GLOBAL] ; xmm3 += round value
+ paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128
- paddw xmm4, [rd GLOBAL]
+ paddw xmm4, [GLOBAL(rd)]
psraw xmm4, VP8_FILTER_SHIFT
movdqa xmm7, xmm3
@@ -735,10 +1074,10 @@ next_row:
pmullw xmm5, [rax]
pmullw xmm6, [rax]
- paddw xmm3, [rd GLOBAL] ; xmm3 += round value
+ paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128
- paddw xmm4, [rd GLOBAL]
+ paddw xmm4, [GLOBAL(rd)]
psraw xmm4, VP8_FILTER_SHIFT
movdqa xmm7, xmm3
@@ -750,10 +1089,10 @@ next_row:
paddw xmm3, xmm5
paddw xmm4, xmm6
- paddw xmm3, [rd GLOBAL] ; xmm3 += round value
+ paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128
- paddw xmm4, [rd GLOBAL]
+ paddw xmm4, [GLOBAL(rd)]
psraw xmm4, VP8_FILTER_SHIFT
packuswb xmm3, xmm4
@@ -815,10 +1154,10 @@ next_row_spo:
paddw xmm3, xmm5
paddw xmm4, xmm6
- paddw xmm3, [rd GLOBAL] ; xmm3 += round value
+ paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128
- paddw xmm4, [rd GLOBAL]
+ paddw xmm4, [GLOBAL(rd)]
psraw xmm4, VP8_FILTER_SHIFT
packuswb xmm3, xmm4
@@ -859,10 +1198,10 @@ next_row_fpo:
paddw xmm3, xmm5
paddw xmm4, xmm6
- paddw xmm3, [rd GLOBAL] ; xmm3 += round value
+ paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128
- paddw xmm4, [rd GLOBAL]
+ paddw xmm4, [GLOBAL(rd)]
psraw xmm4, VP8_FILTER_SHIFT
packuswb xmm3, xmm4
@@ -878,6 +1217,7 @@ done:
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -898,6 +1238,7 @@ sym(vp8_bilinear_predict8x8_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -908,7 +1249,7 @@ sym(vp8_bilinear_predict8x8_sse2):
;const short *HFilter = bilinear_filters_mmx[xoffset]
;const short *VFilter = bilinear_filters_mmx[yoffset]
- lea rcx, [sym(vp8_bilinear_filters_mmx) GLOBAL]
+ lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
mov rsi, arg(0) ;src_ptr
movsxd rdx, dword ptr arg(1) ;src_pixels_per_line
@@ -974,7 +1315,7 @@ sym(vp8_bilinear_predict8x8_sse2):
paddw xmm3, xmm4
- paddw xmm3, [rd GLOBAL] ; xmm3 += round value
+ paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128
movdqa xmm7, xmm3
@@ -993,7 +1334,7 @@ next_row8x8:
paddw xmm3, xmm4
pmullw xmm7, xmm5
- paddw xmm3, [rd GLOBAL] ; xmm3 += round value
+ paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128
movdqa xmm4, xmm3
@@ -1003,7 +1344,7 @@ next_row8x8:
movdqa xmm7, xmm4
- paddw xmm3, [rd GLOBAL] ; xmm3 += round value
+ paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128
packuswb xmm3, xmm0
@@ -1021,6 +1362,7 @@ next_row8x8:
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm
new file mode 100644
index 000000000..7f6fd93e4
--- /dev/null
+++ b/vp8/common/x86/subpixel_ssse3.asm
@@ -0,0 +1,1554 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+%define BLOCK_HEIGHT_WIDTH 4
+%define VP8_FILTER_WEIGHT 128
+%define VP8_FILTER_SHIFT 7
+
+
+;/************************************************************************************
+; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The
+; input pixel array has output_height rows. This routine assumes that output_height is an
+; even number. This function handles 8 pixels in horizontal direction, calculating ONE
+; rows each iteration to take advantage of the 128 bits operations.
+;
+; This is an implementation of some of the SSE optimizations first seen in ffvp8
+;
+;*************************************************************************************/
+;void vp8_filter_block1d8_h6_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pixels_per_line,
+; unsigned char *output_ptr,
+; unsigned int output_pitch,
+; unsigned int output_height,
+; unsigned int vp8_filter_index
+;)
+global sym(vp8_filter_block1d8_h6_ssse3)
+sym(vp8_filter_block1d8_h6_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ movsxd rdx, DWORD PTR arg(5) ;table index
+ xor rsi, rsi
+ shl rdx, 4
+
+ movdqa xmm7, [GLOBAL(rd)]
+
+ lea rax, [GLOBAL(k0_k5)]
+ add rax, rdx
+ mov rdi, arg(2) ;output_ptr
+
+ cmp esi, DWORD PTR [rax]
+ je vp8_filter_block1d8_h4_ssse3
+
+ movdqa xmm4, XMMWORD PTR [rax] ;k0_k5
+ movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
+ movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
+
+ mov rsi, arg(0) ;src_ptr
+ movsxd rax, dword ptr arg(1) ;src_pixels_per_line
+ movsxd rcx, dword ptr arg(4) ;output_height
+
+ movsxd rdx, dword ptr arg(3) ;output_pitch
+
+ sub rdi, rdx
+;xmm3 free
+filter_block1d8_h6_rowloop_ssse3:
+ movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5
+
+ movq xmm2, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10
+
+ punpcklbw xmm0, xmm2 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10
+
+ movdqa xmm1, xmm0
+ pmaddubsw xmm0, xmm4
+
+ movdqa xmm2, xmm1
+ pshufb xmm1, [GLOBAL(shuf2bfrom1)]
+
+ pshufb xmm2, [GLOBAL(shuf3bfrom1)]
+ pmaddubsw xmm1, xmm5
+
+ lea rdi, [rdi + rdx]
+ pmaddubsw xmm2, xmm6
+
+ lea rsi, [rsi + rax]
+ dec rcx
+
+ paddsw xmm0, xmm1
+ paddsw xmm2, xmm7
+
+ paddsw xmm0, xmm2
+
+ psraw xmm0, 7
+
+ packuswb xmm0, xmm0
+
+ movq MMWORD Ptr [rdi], xmm0
+ jnz filter_block1d8_h6_rowloop_ssse3
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+vp8_filter_block1d8_h4_ssse3:
+ movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
+ movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
+
+ movdqa xmm3, XMMWORD PTR [GLOBAL(shuf2bfrom1)]
+ movdqa xmm4, XMMWORD PTR [GLOBAL(shuf3bfrom1)]
+
+ mov rsi, arg(0) ;src_ptr
+
+ movsxd rax, dword ptr arg(1) ;src_pixels_per_line
+ movsxd rcx, dword ptr arg(4) ;output_height
+
+ movsxd rdx, dword ptr arg(3) ;output_pitch
+
+ sub rdi, rdx
+
+filter_block1d8_h4_rowloop_ssse3:
+ movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5
+
+ movq xmm1, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10
+
+ punpcklbw xmm0, xmm1 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10
+
+ movdqa xmm2, xmm0
+ pshufb xmm0, xmm3
+
+ pshufb xmm2, xmm4
+ pmaddubsw xmm0, xmm5
+
+ lea rdi, [rdi + rdx]
+ pmaddubsw xmm2, xmm6
+
+ lea rsi, [rsi + rax]
+ dec rcx
+
+ paddsw xmm0, xmm7
+
+ paddsw xmm0, xmm2
+
+ psraw xmm0, 7
+
+ packuswb xmm0, xmm0
+
+ movq MMWORD Ptr [rdi], xmm0
+
+ jnz filter_block1d8_h4_rowloop_ssse3
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+;void vp8_filter_block1d16_h6_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pixels_per_line,
+; unsigned char *output_ptr,
+; unsigned int output_pitch,
+; unsigned int output_height,
+; unsigned int vp8_filter_index
+;)
+global sym(vp8_filter_block1d16_h6_ssse3)
+sym(vp8_filter_block1d16_h6_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ movsxd rdx, DWORD PTR arg(5) ;table index
+ xor rsi, rsi
+ shl rdx, 4 ;
+
+ lea rax, [GLOBAL(k0_k5)]
+ add rax, rdx
+
+ mov rdi, arg(2) ;output_ptr
+
+;;
+;; cmp esi, DWORD PTR [rax]
+;; je vp8_filter_block1d16_h4_ssse3
+
+ mov rsi, arg(0) ;src_ptr
+
+ movdqa xmm4, XMMWORD PTR [rax] ;k0_k5
+ movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
+ movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
+
+ movsxd rax, dword ptr arg(1) ;src_pixels_per_line
+ movsxd rcx, dword ptr arg(4) ;output_height
+ movsxd rdx, dword ptr arg(3) ;output_pitch
+
+filter_block1d16_h6_rowloop_ssse3:
+ movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5
+
+ movq xmm3, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10
+
+ punpcklbw xmm0, xmm3 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10
+
+ movdqa xmm1, xmm0
+ pmaddubsw xmm0, xmm4
+
+ movdqa xmm2, xmm1
+ pshufb xmm1, [GLOBAL(shuf2bfrom1)]
+
+ pshufb xmm2, [GLOBAL(shuf3bfrom1)]
+ movq xmm3, MMWORD PTR [rsi + 6]
+
+ pmaddubsw xmm1, xmm5
+ movq xmm7, MMWORD PTR [rsi + 11]
+
+ pmaddubsw xmm2, xmm6
+ punpcklbw xmm3, xmm7
+
+ paddsw xmm0, xmm1
+ movdqa xmm1, xmm3
+
+ pmaddubsw xmm3, xmm4
+ paddsw xmm0, xmm2
+
+ movdqa xmm2, xmm1
+ paddsw xmm0, [GLOBAL(rd)]
+
+ pshufb xmm1, [GLOBAL(shuf2bfrom1)]
+ pshufb xmm2, [GLOBAL(shuf3bfrom1)]
+
+ psraw xmm0, 7
+ pmaddubsw xmm1, xmm5
+
+ pmaddubsw xmm2, xmm6
+ packuswb xmm0, xmm0
+
+ lea rsi, [rsi + rax]
+ paddsw xmm3, xmm1
+
+ paddsw xmm3, xmm2
+
+ paddsw xmm3, [GLOBAL(rd)]
+
+ psraw xmm3, 7
+
+ packuswb xmm3, xmm3
+
+ punpcklqdq xmm0, xmm3
+
+ movdqa XMMWORD Ptr [rdi], xmm0
+
+ lea rdi, [rdi + rdx]
+ dec rcx
+ jnz filter_block1d16_h6_rowloop_ssse3
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+vp8_filter_block1d16_h4_ssse3:
+ movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
+ movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
+
+ mov rsi, arg(0) ;src_ptr
+ movsxd rax, dword ptr arg(1) ;src_pixels_per_line
+ movsxd rcx, dword ptr arg(4) ;output_height
+ movsxd rdx, dword ptr arg(3) ;output_pitch
+
+filter_block1d16_h4_rowloop_ssse3:
+ movdqu xmm1, XMMWORD PTR [rsi - 2]
+
+ movdqa xmm2, xmm1
+ pshufb xmm1, [GLOBAL(shuf2b)]
+ pshufb xmm2, [GLOBAL(shuf3b)]
+ pmaddubsw xmm1, xmm5
+
+ movdqu xmm3, XMMWORD PTR [rsi + 6]
+
+ pmaddubsw xmm2, xmm6
+ movdqa xmm0, xmm3
+ pshufb xmm3, [GLOBAL(shuf3b)]
+ pshufb xmm0, [GLOBAL(shuf2b)]
+
+ paddsw xmm1, [GLOBAL(rd)]
+ paddsw xmm1, xmm2
+
+ pmaddubsw xmm0, xmm5
+ pmaddubsw xmm3, xmm6
+
+ psraw xmm1, 7
+ packuswb xmm1, xmm1
+ lea rsi, [rsi + rax]
+ paddsw xmm3, xmm0
+ paddsw xmm3, [GLOBAL(rd)]
+ psraw xmm3, 7
+ packuswb xmm3, xmm3
+
+ punpcklqdq xmm1, xmm3
+
+ movdqa XMMWORD Ptr [rdi], xmm1
+
+ add rdi, rdx
+ dec rcx
+ jnz filter_block1d16_h4_rowloop_ssse3
+
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp8_filter_block1d4_h6_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pixels_per_line,
+; unsigned char *output_ptr,
+; unsigned int output_pitch,
+; unsigned int output_height,
+; unsigned int vp8_filter_index
+;)
+global sym(vp8_filter_block1d4_h6_ssse3)
+sym(vp8_filter_block1d4_h6_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ movsxd rdx, DWORD PTR arg(5) ;table index
+ xor rsi, rsi
+ shl rdx, 4 ;
+
+ lea rax, [GLOBAL(k0_k5)]
+ add rax, rdx
+ movdqa xmm7, [GLOBAL(rd)]
+
+ cmp esi, DWORD PTR [rax]
+ je vp8_filter_block1d4_h4_ssse3
+
+ movdqa xmm4, XMMWORD PTR [rax] ;k0_k5
+ movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
+ movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;output_ptr
+ movsxd rax, dword ptr arg(1) ;src_pixels_per_line
+ movsxd rcx, dword ptr arg(4) ;output_height
+
+ movsxd rdx, dword ptr arg(3) ;output_pitch
+
+;xmm3 free
+filter_block1d4_h6_rowloop_ssse3:
+ movdqu xmm0, XMMWORD PTR [rsi - 2]
+
+ movdqa xmm1, xmm0
+ pshufb xmm0, [GLOBAL(shuf1b)]
+
+ movdqa xmm2, xmm1
+ pshufb xmm1, [GLOBAL(shuf2b)]
+ pmaddubsw xmm0, xmm4
+ pshufb xmm2, [GLOBAL(shuf3b)]
+ pmaddubsw xmm1, xmm5
+
+;--
+ pmaddubsw xmm2, xmm6
+
+ lea rsi, [rsi + rax]
+;--
+ paddsw xmm0, xmm1
+ paddsw xmm0, xmm7
+ pxor xmm1, xmm1
+ paddsw xmm0, xmm2
+ psraw xmm0, 7
+ packuswb xmm0, xmm0
+
+ movd DWORD PTR [rdi], xmm0
+
+ add rdi, rdx
+ dec rcx
+ jnz filter_block1d4_h6_rowloop_ssse3
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+vp8_filter_block1d4_h4_ssse3:
+ movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
+ movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
+ movdqa xmm0, XMMWORD PTR [GLOBAL(shuf2b)]
+ movdqa xmm3, XMMWORD PTR [GLOBAL(shuf3b)]
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;output_ptr
+ movsxd rax, dword ptr arg(1) ;src_pixels_per_line
+ movsxd rcx, dword ptr arg(4) ;output_height
+
+ movsxd rdx, dword ptr arg(3) ;output_pitch
+
+filter_block1d4_h4_rowloop_ssse3:
+ movdqu xmm1, XMMWORD PTR [rsi - 2]
+
+ movdqa xmm2, xmm1
+ pshufb xmm1, xmm0 ;;[GLOBAL(shuf2b)]
+ pshufb xmm2, xmm3 ;;[GLOBAL(shuf3b)]
+ pmaddubsw xmm1, xmm5
+
+;--
+ pmaddubsw xmm2, xmm6
+
+ lea rsi, [rsi + rax]
+;--
+ paddsw xmm1, xmm7
+ paddsw xmm1, xmm2
+ psraw xmm1, 7
+ packuswb xmm1, xmm1
+
+ movd DWORD PTR [rdi], xmm1
+
+ add rdi, rdx
+ dec rcx
+ jnz filter_block1d4_h4_rowloop_ssse3
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+
+;void vp8_filter_block1d16_v6_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pitch,
+; unsigned char *output_ptr,
+; unsigned int out_pitch,
+; unsigned int output_height,
+; unsigned int vp8_filter_index
+;)
+global sym(vp8_filter_block1d16_v6_ssse3)
+sym(vp8_filter_block1d16_v6_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ movsxd rdx, DWORD PTR arg(5) ;table index
+ xor rsi, rsi
+ shl rdx, 4 ;
+
+ lea rax, [GLOBAL(k0_k5)]
+ add rax, rdx
+
+ cmp esi, DWORD PTR [rax]
+ je vp8_filter_block1d16_v4_ssse3
+
+ movdqa xmm5, XMMWORD PTR [rax] ;k0_k5
+ movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4
+ movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3
+
+ mov rsi, arg(0) ;src_ptr
+ movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
+ mov rdi, arg(2) ;output_ptr
+
+%if ABI_IS_32BIT=0
+ movsxd r8, DWORD PTR arg(3) ;out_pitch
+%endif
+ mov rax, rsi
+ movsxd rcx, DWORD PTR arg(4) ;output_height
+ add rax, rdx
+
+
+vp8_filter_block1d16_v6_ssse3_loop:
+ movq xmm1, MMWORD PTR [rsi] ;A
+ movq xmm2, MMWORD PTR [rsi + rdx] ;B
+ movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C
+ movq xmm4, MMWORD PTR [rax + rdx * 2] ;D
+ movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E
+
+ punpcklbw xmm2, xmm4 ;B D
+ punpcklbw xmm3, xmm0 ;C E
+
+ movq xmm0, MMWORD PTR [rax + rdx * 4] ;F
+
+ pmaddubsw xmm3, xmm6
+ punpcklbw xmm1, xmm0 ;A F
+ pmaddubsw xmm2, xmm7
+ pmaddubsw xmm1, xmm5
+
+ paddsw xmm2, xmm3
+ paddsw xmm2, xmm1
+ paddsw xmm2, [GLOBAL(rd)]
+ psraw xmm2, 7
+ packuswb xmm2, xmm2
+
+ movq MMWORD PTR [rdi], xmm2 ;store the results
+
+ movq xmm1, MMWORD PTR [rsi + 8] ;A
+ movq xmm2, MMWORD PTR [rsi + rdx + 8] ;B
+ movq xmm3, MMWORD PTR [rsi + rdx * 2 + 8] ;C
+ movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D
+ movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E
+
+ punpcklbw xmm2, xmm4 ;B D
+ punpcklbw xmm3, xmm0 ;C E
+
+ movq xmm0, MMWORD PTR [rax + rdx * 4 + 8] ;F
+ pmaddubsw xmm3, xmm6
+ punpcklbw xmm1, xmm0 ;A F
+ pmaddubsw xmm2, xmm7
+ pmaddubsw xmm1, xmm5
+
+ add rsi, rdx
+ add rax, rdx
+;--
+;--
+ paddsw xmm2, xmm3
+ paddsw xmm2, xmm1
+ paddsw xmm2, [GLOBAL(rd)]
+ psraw xmm2, 7
+ packuswb xmm2, xmm2
+
+ movq MMWORD PTR [rdi+8], xmm2
+
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(3) ;out_pitch
+%else
+ add rdi, r8
+%endif
+ dec rcx
+ jnz vp8_filter_block1d16_v6_ssse3_loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+vp8_filter_block1d16_v4_ssse3:
+ movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4
+ movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3
+
+ mov rsi, arg(0) ;src_ptr
+ movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
+ mov rdi, arg(2) ;output_ptr
+
+%if ABI_IS_32BIT=0
+ movsxd r8, DWORD PTR arg(3) ;out_pitch
+%endif
+ mov rax, rsi
+ movsxd rcx, DWORD PTR arg(4) ;output_height
+ add rax, rdx
+
+vp8_filter_block1d16_v4_ssse3_loop:
+ movq xmm2, MMWORD PTR [rsi + rdx] ;B
+ movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C
+ movq xmm4, MMWORD PTR [rax + rdx * 2] ;D
+ movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E
+
+ punpcklbw xmm2, xmm4 ;B D
+ punpcklbw xmm3, xmm0 ;C E
+
+ pmaddubsw xmm3, xmm6
+ pmaddubsw xmm2, xmm7
+ movq xmm5, MMWORD PTR [rsi + rdx + 8] ;B
+ movq xmm1, MMWORD PTR [rsi + rdx * 2 + 8] ;C
+ movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D
+ movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E
+
+ paddsw xmm2, [GLOBAL(rd)]
+ paddsw xmm2, xmm3
+ psraw xmm2, 7
+ packuswb xmm2, xmm2
+
+ punpcklbw xmm5, xmm4 ;B D
+ punpcklbw xmm1, xmm0 ;C E
+
+ pmaddubsw xmm1, xmm6
+ pmaddubsw xmm5, xmm7
+
+ movdqa xmm4, [GLOBAL(rd)]
+ add rsi, rdx
+ add rax, rdx
+;--
+;--
+ paddsw xmm5, xmm1
+ paddsw xmm5, xmm4
+ psraw xmm5, 7
+ packuswb xmm5, xmm5
+
+ punpcklqdq xmm2, xmm5
+
+ movdqa XMMWORD PTR [rdi], xmm2
+
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(3) ;out_pitch
+%else
+ add rdi, r8
+%endif
+ dec rcx
+ jnz vp8_filter_block1d16_v4_ssse3_loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp8_filter_block1d8_v6_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pitch,
+; unsigned char *output_ptr,
+; unsigned int out_pitch,
+; unsigned int output_height,
+; unsigned int vp8_filter_index
+;)
+global sym(vp8_filter_block1d8_v6_ssse3)
+sym(vp8_filter_block1d8_v6_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ movsxd rdx, DWORD PTR arg(5) ;table index
+ xor rsi, rsi
+ shl rdx, 4 ;
+
+ lea rax, [GLOBAL(k0_k5)]
+ add rax, rdx
+
+ movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
+ mov rdi, arg(2) ;output_ptr
+%if ABI_IS_32BIT=0
+ movsxd r8, DWORD PTR arg(3) ; out_pitch
+%endif
+ movsxd rcx, DWORD PTR arg(4) ;[output_height]
+
+ cmp esi, DWORD PTR [rax]
+ je vp8_filter_block1d8_v4_ssse3
+
+ movdqa xmm5, XMMWORD PTR [rax] ;k0_k5
+ movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4
+ movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3
+
+ mov rsi, arg(0) ;src_ptr
+
+ mov rax, rsi
+ add rax, rdx
+
+vp8_filter_block1d8_v6_ssse3_loop:
+ movq xmm1, MMWORD PTR [rsi] ;A
+ movq xmm2, MMWORD PTR [rsi + rdx] ;B
+ movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C
+ movq xmm4, MMWORD PTR [rax + rdx * 2] ;D
+ movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E
+
+ punpcklbw xmm2, xmm4 ;B D
+ punpcklbw xmm3, xmm0 ;C E
+
+ movq xmm0, MMWORD PTR [rax + rdx * 4] ;F
+ movdqa xmm4, [GLOBAL(rd)]
+
+ pmaddubsw xmm3, xmm6
+ punpcklbw xmm1, xmm0 ;A F
+ pmaddubsw xmm2, xmm7
+ pmaddubsw xmm1, xmm5
+ add rsi, rdx
+ add rax, rdx
+;--
+;--
+ paddsw xmm2, xmm3
+ paddsw xmm2, xmm1
+ paddsw xmm2, xmm4
+ psraw xmm2, 7
+ packuswb xmm2, xmm2
+
+ movq MMWORD PTR [rdi], xmm2
+
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(3) ;[out_pitch]
+%else
+ add rdi, r8
+%endif
+ dec rcx
+ jnz vp8_filter_block1d8_v6_ssse3_loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+vp8_filter_block1d8_v4_ssse3:
+ movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4
+ movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3
+ movdqa xmm5, [GLOBAL(rd)]
+
+ mov rsi, arg(0) ;src_ptr
+
+ mov rax, rsi
+ add rax, rdx
+
+vp8_filter_block1d8_v4_ssse3_loop:
+ movq xmm2, MMWORD PTR [rsi + rdx] ;B
+ movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C
+ movq xmm4, MMWORD PTR [rax + rdx * 2] ;D
+ movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E
+
+ punpcklbw xmm2, xmm4 ;B D
+ punpcklbw xmm3, xmm0 ;C E
+
+ pmaddubsw xmm3, xmm6
+ pmaddubsw xmm2, xmm7
+ add rsi, rdx
+ add rax, rdx
+;--
+;--
+ paddsw xmm2, xmm3
+ paddsw xmm2, xmm5
+ psraw xmm2, 7
+ packuswb xmm2, xmm2
+
+ movq MMWORD PTR [rdi], xmm2
+
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(3) ;[out_pitch]
+%else
+ add rdi, r8
+%endif
+ dec rcx
+ jnz vp8_filter_block1d8_v4_ssse3_loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+;void vp8_filter_block1d4_v6_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pitch,
+; unsigned char *output_ptr,
+; unsigned int out_pitch,
+; unsigned int output_height,
+; unsigned int vp8_filter_index
+;)
+global sym(vp8_filter_block1d4_v6_ssse3)
+sym(vp8_filter_block1d4_v6_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ movsxd rdx, DWORD PTR arg(5) ;table index
+ xor rsi, rsi
+ shl rdx, 4 ;
+
+ lea rax, [GLOBAL(k0_k5)]
+ add rax, rdx
+
+ movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
+ mov rdi, arg(2) ;output_ptr
+%if ABI_IS_32BIT=0
+ movsxd r8, DWORD PTR arg(3) ; out_pitch
+%endif
+ movsxd rcx, DWORD PTR arg(4) ;[output_height]
+
+ cmp esi, DWORD PTR [rax]
+ je vp8_filter_block1d4_v4_ssse3
+
+ movq mm5, MMWORD PTR [rax] ;k0_k5
+ movq mm6, MMWORD PTR [rax+256] ;k2_k4
+ movq mm7, MMWORD PTR [rax+128] ;k1_k3
+
+ mov rsi, arg(0) ;src_ptr
+
+ mov rax, rsi
+ add rax, rdx
+
+vp8_filter_block1d4_v6_ssse3_loop:
+ movd mm1, DWORD PTR [rsi] ;A
+ movd mm2, DWORD PTR [rsi + rdx] ;B
+ movd mm3, DWORD PTR [rsi + rdx * 2] ;C
+ movd mm4, DWORD PTR [rax + rdx * 2] ;D
+ movd mm0, DWORD PTR [rsi + rdx * 4] ;E
+
+ punpcklbw mm2, mm4 ;B D
+ punpcklbw mm3, mm0 ;C E
+
+ movd mm0, DWORD PTR [rax + rdx * 4] ;F
+
+ movq mm4, [GLOBAL(rd)]
+
+ pmaddubsw mm3, mm6
+ punpcklbw mm1, mm0 ;A F
+ pmaddubsw mm2, mm7
+ pmaddubsw mm1, mm5
+ add rsi, rdx
+ add rax, rdx
+;--
+;--
+ paddsw mm2, mm3
+ paddsw mm2, mm1
+ paddsw mm2, mm4
+ psraw mm2, 7
+ packuswb mm2, mm2
+
+ movd DWORD PTR [rdi], mm2
+
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(3) ;[out_pitch]
+%else
+ add rdi, r8
+%endif
+ dec rcx
+ jnz vp8_filter_block1d4_v6_ssse3_loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+vp8_filter_block1d4_v4_ssse3:
+ movq mm6, MMWORD PTR [rax+256] ;k2_k4
+ movq mm7, MMWORD PTR [rax+128] ;k1_k3
+ movq mm5, MMWORD PTR [GLOBAL(rd)]
+
+ mov rsi, arg(0) ;src_ptr
+
+ mov rax, rsi
+ add rax, rdx
+
+vp8_filter_block1d4_v4_ssse3_loop:
+ movd mm2, DWORD PTR [rsi + rdx] ;B
+ movd mm3, DWORD PTR [rsi + rdx * 2] ;C
+ movd mm4, DWORD PTR [rax + rdx * 2] ;D
+ movd mm0, DWORD PTR [rsi + rdx * 4] ;E
+
+ punpcklbw mm2, mm4 ;B D
+ punpcklbw mm3, mm0 ;C E
+
+ pmaddubsw mm3, mm6
+ pmaddubsw mm2, mm7
+ add rsi, rdx
+ add rax, rdx
+;--
+;--
+ paddsw mm2, mm3
+ paddsw mm2, mm5
+ psraw mm2, 7
+ packuswb mm2, mm2
+
+ movd DWORD PTR [rdi], mm2
+
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(3) ;[out_pitch]
+%else
+ add rdi, r8
+%endif
+ dec rcx
+ jnz vp8_filter_block1d4_v4_ssse3_loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp8_bilinear_predict16x16_ssse3
+;(
+; unsigned char *src_ptr,
+; int src_pixels_per_line,
+; int xoffset,
+; int yoffset,
+; unsigned char *dst_ptr,
+; int dst_pitch
+;)
+global sym(vp8_bilinear_predict16x16_ssse3)
+sym(vp8_bilinear_predict16x16_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)]
+ movsxd rax, dword ptr arg(2) ; xoffset
+
+ cmp rax, 0 ; skip first_pass filter if xoffset=0
+ je b16x16_sp_only
+
+ shl rax, 4
+ lea rax, [rax + rcx] ; HFilter
+
+ mov rdi, arg(4) ; dst_ptr
+ mov rsi, arg(0) ; src_ptr
+ movsxd rdx, dword ptr arg(5) ; dst_pitch
+
+ movdqa xmm1, [rax]
+
+ movsxd rax, dword ptr arg(3) ; yoffset
+
+ cmp rax, 0 ; skip second_pass filter if yoffset=0
+ je b16x16_fp_only
+
+ shl rax, 4
+ lea rax, [rax + rcx] ; VFilter
+
+ lea rcx, [rdi+rdx*8]
+ lea rcx, [rcx+rdx*8]
+ movsxd rdx, dword ptr arg(1) ; src_pixels_per_line
+
+ movdqa xmm2, [rax]
+
+%if ABI_IS_32BIT=0
+ movsxd r8, dword ptr arg(5) ; dst_pitch
+%endif
+ movq xmm3, [rsi] ; 00 01 02 03 04 05 06 07
+ movq xmm5, [rsi+1] ; 01 02 03 04 05 06 07 08
+
+ punpcklbw xmm3, xmm5 ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08
+ movq xmm4, [rsi+8] ; 08 09 10 11 12 13 14 15
+
+ movq xmm5, [rsi+9] ; 09 10 11 12 13 14 15 16
+
+ lea rsi, [rsi + rdx] ; next line
+
+ pmaddubsw xmm3, xmm1 ; 00 02 04 06 08 10 12 14
+
+ punpcklbw xmm4, xmm5 ; 08 09 09 10 10 11 11 12 12 13 13 14 14 15 15 16
+ pmaddubsw xmm4, xmm1 ; 01 03 05 07 09 11 13 15
+
+ paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
+ psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128
+
+ paddw xmm4, [GLOBAL(rd)] ; xmm4 += round value
+ psraw xmm4, VP8_FILTER_SHIFT ; xmm4 /= 128
+
+ movdqa xmm7, xmm3
+ packuswb xmm7, xmm4 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
+
+.next_row:
+ movq xmm6, [rsi] ; 00 01 02 03 04 05 06 07
+ movq xmm5, [rsi+1] ; 01 02 03 04 05 06 07 08
+
+ punpcklbw xmm6, xmm5
+ movq xmm4, [rsi+8] ; 08 09 10 11 12 13 14 15
+
+ movq xmm5, [rsi+9] ; 09 10 11 12 13 14 15 16
+ lea rsi, [rsi + rdx] ; next line
+
+ pmaddubsw xmm6, xmm1
+
+ punpcklbw xmm4, xmm5
+ pmaddubsw xmm4, xmm1
+
+ paddw xmm6, [GLOBAL(rd)] ; xmm6 += round value
+ psraw xmm6, VP8_FILTER_SHIFT ; xmm6 /= 128
+
+ paddw xmm4, [GLOBAL(rd)] ; xmm4 += round value
+ psraw xmm4, VP8_FILTER_SHIFT ; xmm4 /= 128
+
+ packuswb xmm6, xmm4
+ movdqa xmm5, xmm7
+
+ punpcklbw xmm5, xmm6
+ pmaddubsw xmm5, xmm2
+
+ punpckhbw xmm7, xmm6
+ pmaddubsw xmm7, xmm2
+
+ paddw xmm5, [GLOBAL(rd)] ; xmm5 += round value
+ psraw xmm5, VP8_FILTER_SHIFT ; xmm5 /= 128
+
+ paddw xmm7, [GLOBAL(rd)] ; xmm7 += round value
+ psraw xmm7, VP8_FILTER_SHIFT ; xmm7 /= 128
+
+ packuswb xmm5, xmm7
+ movdqa xmm7, xmm6
+
+ movdqa [rdi], xmm5 ; store the results in the destination
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(5) ; dst_pitch
+%else
+ add rdi, r8
+%endif
+
+ cmp rdi, rcx
+ jne .next_row
+
+ jmp done
+
+b16x16_sp_only:
+ movsxd rax, dword ptr arg(3) ; yoffset
+ shl rax, 4
+ lea rax, [rax + rcx] ; VFilter
+
+ mov rdi, arg(4) ; dst_ptr
+ mov rsi, arg(0) ; src_ptr
+ movsxd rdx, dword ptr arg(5) ; dst_pitch
+
+ movdqa xmm1, [rax] ; VFilter
+
+ lea rcx, [rdi+rdx*8]
+ lea rcx, [rcx+rdx*8]
+ movsxd rax, dword ptr arg(1) ; src_pixels_per_line
+
+ ; get the first horizontal line done
+ movq xmm4, [rsi] ; load row 0
+ movq xmm2, [rsi + 8] ; load row 0
+
+ lea rsi, [rsi + rax] ; next line
+.next_row:
+ movq xmm3, [rsi] ; load row + 1
+ movq xmm5, [rsi + 8] ; load row + 1
+
+ punpcklbw xmm4, xmm3
+ punpcklbw xmm2, xmm5
+
+ pmaddubsw xmm4, xmm1
+ movq xmm7, [rsi + rax] ; load row + 2
+
+ pmaddubsw xmm2, xmm1
+ movq xmm6, [rsi + rax + 8] ; load row + 2
+
+ punpcklbw xmm3, xmm7
+ punpcklbw xmm5, xmm6
+
+ pmaddubsw xmm3, xmm1
+ paddw xmm4, [GLOBAL(rd)]
+
+ pmaddubsw xmm5, xmm1
+ paddw xmm2, [GLOBAL(rd)]
+
+ psraw xmm4, VP8_FILTER_SHIFT
+ psraw xmm2, VP8_FILTER_SHIFT
+
+ packuswb xmm4, xmm2
+ paddw xmm3, [GLOBAL(rd)]
+
+ movdqa [rdi], xmm4 ; store row 0
+ paddw xmm5, [GLOBAL(rd)]
+
+ psraw xmm3, VP8_FILTER_SHIFT
+ psraw xmm5, VP8_FILTER_SHIFT
+
+ packuswb xmm3, xmm5
+ movdqa xmm4, xmm7
+
+ movdqa [rdi + rdx],xmm3 ; store row 1
+ lea rsi, [rsi + 2*rax]
+
+ movdqa xmm2, xmm6
+ lea rdi, [rdi + 2*rdx]
+
+ cmp rdi, rcx
+ jne .next_row
+
+ jmp done
+
+b16x16_fp_only:
+ lea rcx, [rdi+rdx*8]
+ lea rcx, [rcx+rdx*8]
+ movsxd rax, dword ptr arg(1) ; src_pixels_per_line
+
+.next_row:
+ movq xmm2, [rsi] ; 00 01 02 03 04 05 06 07
+ movq xmm4, [rsi+1] ; 01 02 03 04 05 06 07 08
+
+ punpcklbw xmm2, xmm4
+ movq xmm3, [rsi+8] ; 08 09 10 11 12 13 14 15
+
+ pmaddubsw xmm2, xmm1
+ movq xmm4, [rsi+9] ; 09 10 11 12 13 14 15 16
+
+ lea rsi, [rsi + rax] ; next line
+ punpcklbw xmm3, xmm4
+
+ pmaddubsw xmm3, xmm1
+ movq xmm5, [rsi]
+
+ paddw xmm2, [GLOBAL(rd)]
+ movq xmm7, [rsi+1]
+
+ movq xmm6, [rsi+8]
+ psraw xmm2, VP8_FILTER_SHIFT
+
+ punpcklbw xmm5, xmm7
+ movq xmm7, [rsi+9]
+
+ paddw xmm3, [GLOBAL(rd)]
+ pmaddubsw xmm5, xmm1
+
+ psraw xmm3, VP8_FILTER_SHIFT
+ punpcklbw xmm6, xmm7
+
+ packuswb xmm2, xmm3
+ pmaddubsw xmm6, xmm1
+
+ movdqa [rdi], xmm2 ; store the results in the destination
+ paddw xmm5, [GLOBAL(rd)]
+
+ lea rdi, [rdi + rdx] ; dst_pitch
+ psraw xmm5, VP8_FILTER_SHIFT
+
+ paddw xmm6, [GLOBAL(rd)]
+ psraw xmm6, VP8_FILTER_SHIFT
+
+ packuswb xmm5, xmm6
+ lea rsi, [rsi + rax] ; next line
+
+ movdqa [rdi], xmm5 ; store the results in the destination
+ lea rdi, [rdi + rdx] ; dst_pitch
+
+ cmp rdi, rcx
+
+ jne .next_row
+
+done:
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp8_bilinear_predict8x8_ssse3
+;(
+; unsigned char *src_ptr,
+; int src_pixels_per_line,
+; int xoffset,
+; int yoffset,
+; unsigned char *dst_ptr,
+; int dst_pitch
+;)
+global sym(vp8_bilinear_predict8x8_ssse3)
+sym(vp8_bilinear_predict8x8_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 144 ; reserve 144 bytes
+
+ lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)]
+
+ mov rsi, arg(0) ;src_ptr
+ movsxd rdx, dword ptr arg(1) ;src_pixels_per_line
+
+ ;Read 9-line unaligned data in and put them on stack. This gives a big
+ ;performance boost.
+ movdqu xmm0, [rsi]
+ lea rax, [rdx + rdx*2]
+ movdqu xmm1, [rsi+rdx]
+ movdqu xmm2, [rsi+rdx*2]
+ add rsi, rax
+ movdqu xmm3, [rsi]
+ movdqu xmm4, [rsi+rdx]
+ movdqu xmm5, [rsi+rdx*2]
+ add rsi, rax
+ movdqu xmm6, [rsi]
+ movdqu xmm7, [rsi+rdx]
+
+ movdqa XMMWORD PTR [rsp], xmm0
+
+ movdqu xmm0, [rsi+rdx*2]
+
+ movdqa XMMWORD PTR [rsp+16], xmm1
+ movdqa XMMWORD PTR [rsp+32], xmm2
+ movdqa XMMWORD PTR [rsp+48], xmm3
+ movdqa XMMWORD PTR [rsp+64], xmm4
+ movdqa XMMWORD PTR [rsp+80], xmm5
+ movdqa XMMWORD PTR [rsp+96], xmm6
+ movdqa XMMWORD PTR [rsp+112], xmm7
+ movdqa XMMWORD PTR [rsp+128], xmm0
+
+ movsxd rax, dword ptr arg(2) ; xoffset
+ cmp rax, 0 ; skip first_pass filter if xoffset=0
+ je b8x8_sp_only
+
+ shl rax, 4
+ add rax, rcx ; HFilter
+
+ mov rdi, arg(4) ; dst_ptr
+ movsxd rdx, dword ptr arg(5) ; dst_pitch
+
+ movdqa xmm0, [rax]
+
+ movsxd rax, dword ptr arg(3) ; yoffset
+ cmp rax, 0 ; skip second_pass filter if yoffset=0
+ je b8x8_fp_only
+
+ shl rax, 4
+ lea rax, [rax + rcx] ; VFilter
+
+ lea rcx, [rdi+rdx*8]
+
+ movdqa xmm1, [rax]
+
+ ; get the first horizontal line done
+ movdqa xmm3, [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
+ movdqa xmm5, xmm3 ; 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 xx
+
+ psrldq xmm5, 1
+ lea rsp, [rsp + 16] ; next line
+
+ punpcklbw xmm3, xmm5 ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08
+ pmaddubsw xmm3, xmm0 ; 00 02 04 06 08 10 12 14
+
+ paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
+ psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128
+
+ movdqa xmm7, xmm3
+ packuswb xmm7, xmm7 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
+
+.next_row:
+ movdqa xmm6, [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
+ lea rsp, [rsp + 16] ; next line
+
+ movdqa xmm5, xmm6
+
+ psrldq xmm5, 1
+
+ punpcklbw xmm6, xmm5
+ pmaddubsw xmm6, xmm0
+
+ paddw xmm6, [GLOBAL(rd)] ; xmm6 += round value
+ psraw xmm6, VP8_FILTER_SHIFT ; xmm6 /= 128
+
+ packuswb xmm6, xmm6
+
+ punpcklbw xmm7, xmm6
+ pmaddubsw xmm7, xmm1
+
+ paddw xmm7, [GLOBAL(rd)] ; xmm7 += round value
+ psraw xmm7, VP8_FILTER_SHIFT ; xmm7 /= 128
+
+ packuswb xmm7, xmm7
+
+ movq [rdi], xmm7 ; store the results in the destination
+ lea rdi, [rdi + rdx]
+
+ movdqa xmm7, xmm6
+
+ cmp rdi, rcx
+ jne .next_row
+
+ jmp done8x8
+
+b8x8_sp_only:
+ movsxd rax, dword ptr arg(3) ; yoffset
+ shl rax, 4
+ lea rax, [rax + rcx] ; VFilter
+
+ mov rdi, arg(4) ;dst_ptr
+ movsxd rdx, dword ptr arg(5) ; dst_pitch
+
+ movdqa xmm0, [rax] ; VFilter
+
+ movq xmm1, XMMWORD PTR [rsp]
+ movq xmm2, XMMWORD PTR [rsp+16]
+
+ movq xmm3, XMMWORD PTR [rsp+32]
+ punpcklbw xmm1, xmm2
+
+ movq xmm4, XMMWORD PTR [rsp+48]
+ punpcklbw xmm2, xmm3
+
+ movq xmm5, XMMWORD PTR [rsp+64]
+ punpcklbw xmm3, xmm4
+
+ movq xmm6, XMMWORD PTR [rsp+80]
+ punpcklbw xmm4, xmm5
+
+ movq xmm7, XMMWORD PTR [rsp+96]
+ punpcklbw xmm5, xmm6
+
+ pmaddubsw xmm1, xmm0
+ pmaddubsw xmm2, xmm0
+
+ pmaddubsw xmm3, xmm0
+ pmaddubsw xmm4, xmm0
+
+ pmaddubsw xmm5, xmm0
+ punpcklbw xmm6, xmm7
+
+ pmaddubsw xmm6, xmm0
+ paddw xmm1, [GLOBAL(rd)]
+
+ paddw xmm2, [GLOBAL(rd)]
+ psraw xmm1, VP8_FILTER_SHIFT
+
+ paddw xmm3, [GLOBAL(rd)]
+ psraw xmm2, VP8_FILTER_SHIFT
+
+ paddw xmm4, [GLOBAL(rd)]
+ psraw xmm3, VP8_FILTER_SHIFT
+
+ paddw xmm5, [GLOBAL(rd)]
+ psraw xmm4, VP8_FILTER_SHIFT
+
+ paddw xmm6, [GLOBAL(rd)]
+ psraw xmm5, VP8_FILTER_SHIFT
+
+ psraw xmm6, VP8_FILTER_SHIFT
+ packuswb xmm1, xmm1
+
+ packuswb xmm2, xmm2
+ movq [rdi], xmm1
+
+ packuswb xmm3, xmm3
+ movq [rdi+rdx], xmm2
+
+ packuswb xmm4, xmm4
+ movq xmm1, XMMWORD PTR [rsp+112]
+
+ lea rdi, [rdi + 2*rdx]
+ movq xmm2, XMMWORD PTR [rsp+128]
+
+ packuswb xmm5, xmm5
+ movq [rdi], xmm3
+
+ packuswb xmm6, xmm6
+ movq [rdi+rdx], xmm4
+
+ lea rdi, [rdi + 2*rdx]
+ punpcklbw xmm7, xmm1
+
+ movq [rdi], xmm5
+ pmaddubsw xmm7, xmm0
+
+ movq [rdi+rdx], xmm6
+ punpcklbw xmm1, xmm2
+
+ pmaddubsw xmm1, xmm0
+ paddw xmm7, [GLOBAL(rd)]
+
+ psraw xmm7, VP8_FILTER_SHIFT
+ paddw xmm1, [GLOBAL(rd)]
+
+ psraw xmm1, VP8_FILTER_SHIFT
+ packuswb xmm7, xmm7
+
+ packuswb xmm1, xmm1
+ lea rdi, [rdi + 2*rdx]
+
+ movq [rdi], xmm7
+
+ movq [rdi+rdx], xmm1
+ lea rsp, [rsp + 144]
+
+ jmp done8x8
+
+b8x8_fp_only:
+ lea rcx, [rdi+rdx*8]
+
+.next_row:
+ movdqa xmm1, XMMWORD PTR [rsp]
+ movdqa xmm3, XMMWORD PTR [rsp+16]
+
+ movdqa xmm2, xmm1
+ movdqa xmm5, XMMWORD PTR [rsp+32]
+
+ psrldq xmm2, 1
+ movdqa xmm7, XMMWORD PTR [rsp+48]
+
+ movdqa xmm4, xmm3
+ psrldq xmm4, 1
+
+ movdqa xmm6, xmm5
+ psrldq xmm6, 1
+
+ punpcklbw xmm1, xmm2
+ pmaddubsw xmm1, xmm0
+
+ punpcklbw xmm3, xmm4
+ pmaddubsw xmm3, xmm0
+
+ punpcklbw xmm5, xmm6
+ pmaddubsw xmm5, xmm0
+
+ movdqa xmm2, xmm7
+ psrldq xmm2, 1
+
+ punpcklbw xmm7, xmm2
+ pmaddubsw xmm7, xmm0
+
+ paddw xmm1, [GLOBAL(rd)]
+ psraw xmm1, VP8_FILTER_SHIFT
+
+ paddw xmm3, [GLOBAL(rd)]
+ psraw xmm3, VP8_FILTER_SHIFT
+
+ paddw xmm5, [GLOBAL(rd)]
+ psraw xmm5, VP8_FILTER_SHIFT
+
+ paddw xmm7, [GLOBAL(rd)]
+ psraw xmm7, VP8_FILTER_SHIFT
+
+ packuswb xmm1, xmm1
+ packuswb xmm3, xmm3
+
+ packuswb xmm5, xmm5
+ movq [rdi], xmm1
+
+ packuswb xmm7, xmm7
+ movq [rdi+rdx], xmm3
+
+ lea rdi, [rdi + 2*rdx]
+ movq [rdi], xmm5
+
+ lea rsp, [rsp + 4*16]
+ movq [rdi+rdx], xmm7
+
+ lea rdi, [rdi + 2*rdx]
+ cmp rdi, rcx
+
+ jne .next_row
+
+ lea rsp, [rsp + 16]
+
+done8x8:
+ ;add rsp, 144
+ pop rsp
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+shuf1b:
+ db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12
+shuf2b:
+ db 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11
+shuf3b:
+ db 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10
+
+align 16
+shuf2bfrom1:
+ db 4, 8, 6, 1, 8, 3, 1, 5, 3, 7, 5, 9, 7,11, 9,13
+align 16
+shuf3bfrom1:
+ db 2, 6, 4, 8, 6, 1, 8, 3, 1, 5, 3, 7, 5, 9, 7,11
+
+align 16
+rd:
+ times 8 dw 0x40
+
+align 16
+k0_k5:
+ times 8 db 0, 0 ;placeholder
+ times 8 db 0, 0
+ times 8 db 2, 1
+ times 8 db 0, 0
+ times 8 db 3, 3
+ times 8 db 0, 0
+ times 8 db 1, 2
+ times 8 db 0, 0
+k1_k3:
+ times 8 db 0, 0 ;placeholder
+ times 8 db -6, 12
+ times 8 db -11, 36
+ times 8 db -9, 50
+ times 8 db -16, 77
+ times 8 db -6, 93
+ times 8 db -8, 108
+ times 8 db -1, 123
+k2_k4:
+ times 8 db 128, 0 ;placeholder
+ times 8 db 123, -1
+ times 8 db 108, -8
+ times 8 db 93, -6
+ times 8 db 77, -16
+ times 8 db 50, -9
+ times 8 db 36, -11
+ times 8 db 12, -6
+align 16
+vp8_bilinear_filters_ssse3:
+ times 8 db 128, 0
+ times 8 db 112, 16
+ times 8 db 96, 32
+ times 8 db 80, 48
+ times 8 db 64, 64
+ times 8 db 48, 80
+ times 8 db 32, 96
+ times 8 db 16, 112
+
diff --git a/vp8/common/x86/subpixel_x86.h b/vp8/common/x86/subpixel_x86.h
index efa7b2e09..75991cc4f 100644
--- a/vp8/common/x86/subpixel_x86.h
+++ b/vp8/common/x86/subpixel_x86.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -85,4 +86,37 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_sse2);
#endif
#endif
+#if HAVE_SSSE3
+extern prototype_subpixel_predict(vp8_sixtap_predict16x16_ssse3);
+extern prototype_subpixel_predict(vp8_sixtap_predict8x8_ssse3);
+extern prototype_subpixel_predict(vp8_sixtap_predict8x4_ssse3);
+extern prototype_subpixel_predict(vp8_sixtap_predict4x4_ssse3);
+extern prototype_subpixel_predict(vp8_bilinear_predict16x16_ssse3);
+extern prototype_subpixel_predict(vp8_bilinear_predict8x8_ssse3);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef vp8_subpix_sixtap16x16
+#define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_ssse3
+
+#undef vp8_subpix_sixtap8x8
+#define vp8_subpix_sixtap8x8 vp8_sixtap_predict8x8_ssse3
+
+#undef vp8_subpix_sixtap8x4
+#define vp8_subpix_sixtap8x4 vp8_sixtap_predict8x4_ssse3
+
+#undef vp8_subpix_sixtap4x4
+#define vp8_subpix_sixtap4x4 vp8_sixtap_predict4x4_ssse3
+
+
+#undef vp8_subpix_bilinear16x16
+#define vp8_subpix_bilinear16x16 vp8_bilinear_predict16x16_ssse3
+
+#undef vp8_subpix_bilinear8x8
+#define vp8_subpix_bilinear8x8 vp8_bilinear_predict8x8_ssse3
+
+#endif
+#endif
+
+
+
#endif
diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c
index 68454f709..8dd07c90d 100644
--- a/vp8/common/x86/vp8_asm_stubs.c
+++ b/vp8/common/x86/vp8_asm_stubs.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -67,6 +68,17 @@ extern void vp8_filter_block1d8_v6_sse2
unsigned int output_width,
const short *vp8_filter
);
+extern void vp8_filter_block1d16_v6_sse2
+(
+ unsigned short *src_ptr,
+ unsigned char *output_ptr,
+ int dst_ptich,
+ unsigned int pixels_per_line,
+ unsigned int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ const short *vp8_filter
+);
extern void vp8_unpack_block1d16_h6_sse2
(
unsigned char *src_ptr,
@@ -75,31 +87,32 @@ extern void vp8_unpack_block1d16_h6_sse2
unsigned int output_height,
unsigned int output_width
);
-extern void vp8_unpack_block1d8_h6_sse2
+extern void vp8_filter_block1d8_h6_only_sse2
(
unsigned char *src_ptr,
- unsigned short *output_ptr,
unsigned int src_pixels_per_line,
+ unsigned char *output_ptr,
+ int dst_ptich,
unsigned int output_height,
- unsigned int output_width
+ const short *vp8_filter
);
-extern void vp8_pack_block1d8_v6_sse2
+extern void vp8_filter_block1d16_h6_only_sse2
(
- unsigned short *src_ptr,
- unsigned char *output_ptr,
+ unsigned char *src_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned char *output_ptr,
int dst_ptich,
- unsigned int pixels_per_line,
- unsigned int output_height,
- unsigned int output_width
+ unsigned int output_height,
+ const short *vp8_filter
);
-extern void vp8_pack_block1d16_v6_sse2
+extern void vp8_filter_block1d8_v6_only_sse2
(
- unsigned short *src_ptr,
+ unsigned char *src_ptr,
+ unsigned int src_pixels_per_line,
unsigned char *output_ptr,
int dst_ptich,
- unsigned int pixels_per_line,
- unsigned int output_height,
- unsigned int output_width
+ unsigned int output_height,
+ const short *vp8_filter
);
extern prototype_subpixel_predict(vp8_bilinear_predict8x8_mmx);
@@ -115,7 +128,7 @@ void vp8_sixtap_predict4x4_mmx
int dst_pitch
)
{
- DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 16*16); // Temp data bufffer used in filtering
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 16*16); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
HFilter = vp8_six_tap_mmx[xoffset];
vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 8, HFilter);
@@ -136,7 +149,7 @@ void vp8_sixtap_predict16x16_mmx
)
{
- DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); // Temp data bufffer used in filtering
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
@@ -168,7 +181,7 @@ void vp8_sixtap_predict8x8_mmx
)
{
- DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); // Temp data bufffer used in filtering
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
@@ -194,7 +207,7 @@ void vp8_sixtap_predict8x4_mmx
)
{
- DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); // Temp data bufffer used in filtering
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
@@ -240,34 +253,75 @@ void vp8_sixtap_predict16x16_sse2
)
{
- DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); // Temp data bufffer used in filtering
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
if (xoffset)
{
- HFilter = vp8_six_tap_mmx[xoffset];
- vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter);
+ if (yoffset)
+ {
+ HFilter = vp8_six_tap_mmx[xoffset];
+ vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter);
+ VFilter = vp8_six_tap_mmx[yoffset];
+ vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
+ }
+ else
+ {
+ /* First-pass only */
+ HFilter = vp8_six_tap_mmx[xoffset];
+ vp8_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, HFilter);
+ }
}
else
{
+ /* Second-pass only */
+ VFilter = vp8_six_tap_mmx[yoffset];
vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 21, 32);
+ vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
}
+}
+
+
+void vp8_sixtap_predict8x8_sse2
+(
+ unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch
+)
+{
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
+ const short *HFilter, *VFilter;
- if (yoffset)
+ if (xoffset)
{
- VFilter = vp8_six_tap_mmx[yoffset];
- vp8_filter_block1d8_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, 16, VFilter);
- vp8_filter_block1d8_v6_sse2(FData2 + 40, dst_ptr + 8, dst_pitch, 32, 16 , 16, 16, VFilter);
+ if (yoffset)
+ {
+ HFilter = vp8_six_tap_mmx[xoffset];
+ vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter);
+ VFilter = vp8_six_tap_mmx[yoffset];
+ vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 8, dst_pitch, VFilter);
+ }
+ else
+ {
+ /* First-pass only */
+ HFilter = vp8_six_tap_mmx[xoffset];
+ vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, HFilter);
+ }
}
else
{
- vp8_pack_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16, 16);
+ /* Second-pass only */
+ VFilter = vp8_six_tap_mmx[yoffset];
+ vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, VFilter);
}
}
-void vp8_sixtap_predict8x8_sse2
+void vp8_sixtap_predict8x4_sse2
(
unsigned char *src_ptr,
int src_pixels_per_line,
@@ -277,34 +331,131 @@ void vp8_sixtap_predict8x8_sse2
int dst_pitch
)
{
- DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); // Temp data bufffer used in filtering
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
if (xoffset)
{
- HFilter = vp8_six_tap_mmx[xoffset];
- vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter);
+ if (yoffset)
+ {
+ HFilter = vp8_six_tap_mmx[xoffset];
+ vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter);
+ VFilter = vp8_six_tap_mmx[yoffset];
+ vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 4, dst_pitch, VFilter);
+ }
+ else
+ {
+ /* First-pass only */
+ HFilter = vp8_six_tap_mmx[xoffset];
+ vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, HFilter);
+ }
}
else
{
- vp8_unpack_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 13, 16);
+ /* Second-pass only */
+ VFilter = vp8_six_tap_mmx[yoffset];
+ vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, VFilter);
}
+}
+
+#endif
+
+#if HAVE_SSSE3
+
+extern void vp8_filter_block1d8_h6_ssse3
+(
+ unsigned char *src_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned char *output_ptr,
+ unsigned int output_pitch,
+ unsigned int output_height,
+ unsigned int vp8_filter_index
+);
+
+extern void vp8_filter_block1d16_h6_ssse3
+(
+ unsigned char *src_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned char *output_ptr,
+ unsigned int output_pitch,
+ unsigned int output_height,
+ unsigned int vp8_filter_index
+);
+
+extern void vp8_filter_block1d16_v6_ssse3
+(
+ unsigned char *src_ptr,
+ unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ unsigned int vp8_filter_index
+);
+
+extern void vp8_filter_block1d8_v6_ssse3
+(
+ unsigned char *src_ptr,
+ unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ unsigned int vp8_filter_index
+);
+
+extern void vp8_filter_block1d4_h6_ssse3
+(
+ unsigned char *src_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned char *output_ptr,
+ unsigned int output_pitch,
+ unsigned int output_height,
+ unsigned int vp8_filter_index
+);
+
+extern void vp8_filter_block1d4_v6_ssse3
+(
+ unsigned char *src_ptr,
+ unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ unsigned int vp8_filter_index
+);
+
+void vp8_sixtap_predict16x16_ssse3
+(
+ unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch
+
+)
+{
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 24*24);
- if (yoffset)
+ if (xoffset)
{
- VFilter = vp8_six_tap_mmx[yoffset];
- vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 8, dst_pitch, VFilter);
+ if (yoffset)
+ {
+ vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 16, 21, xoffset);
+ vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, 16, yoffset);
+ }
+ else
+ {
+ /* First-pass only */
+ vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, xoffset);
+ }
}
else
{
- vp8_pack_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8, dst_pitch);
+ /* Second-pass only */
+ vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line) , src_pixels_per_line, dst_ptr, dst_pitch, 16, yoffset);
}
-
-
}
-
-void vp8_sixtap_predict8x4_sse2
+void vp8_sixtap_predict8x8_ssse3
(
unsigned char *src_ptr,
int src_pixels_per_line,
@@ -314,29 +465,89 @@ void vp8_sixtap_predict8x4_sse2
int dst_pitch
)
{
- DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); // Temp data bufffer used in filtering
- const short *HFilter, *VFilter;
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
if (xoffset)
{
- HFilter = vp8_six_tap_mmx[xoffset];
- vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter);
+ if (yoffset)
+ {
+ vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 13, xoffset);
+ vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 8, yoffset);
+ }
+ else
+ {
+ vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, xoffset);
+ }
}
else
{
- vp8_unpack_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 9, 16);
+ /* Second-pass only */
+ vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, yoffset);
}
+}
+
+
+void vp8_sixtap_predict8x4_ssse3
+(
+ unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch
+)
+{
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
- if (yoffset)
+ if (xoffset)
{
- VFilter = vp8_six_tap_mmx[yoffset];
- vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 4, dst_pitch, VFilter);
+ if (yoffset)
+ {
+ vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 9, xoffset);
+ vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 4, yoffset);
+ }
+ else
+ {
+ /* First-pass only */
+ vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
+ }
}
else
{
- vp8_pack_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 4, dst_pitch);
+ /* Second-pass only */
+ vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
}
+}
+void vp8_sixtap_predict4x4_ssse3
+(
+ unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch
+)
+{
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 4*9);
+
+ if (xoffset)
+ {
+ if (yoffset)
+ {
+ vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 4, 9, xoffset);
+ vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, 4, yoffset);
+ }
+ else
+ {
+ vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
+ }
+ }
+ else
+ {
+ vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
+ }
}
+
#endif
diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index 5312e06da..38500fd01 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -26,6 +27,7 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
int mmx_enabled = flags & HAS_MMX;
int xmm_enabled = flags & HAS_SSE;
int wmt_enabled = flags & HAS_SSE2;
+ int SSSE3Enabled = flags & HAS_SSSE3;
/* Note:
*
@@ -41,7 +43,7 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
{
rtcd->idct.idct1 = vp8_short_idct4x4llm_1_mmx;
rtcd->idct.idct16 = vp8_short_idct4x4llm_mmx;
- rtcd->idct.idct1_scalar = vp8_dc_only_idct_mmx;
+ rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_mmx;
rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_mmx;
rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_mmx;
@@ -72,7 +74,7 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
#if CONFIG_POSTPROC
rtcd->postproc.down = vp8_mbpost_proc_down_mmx;
- //rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;
+ /*rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;*/
rtcd->postproc.downacross = vp8_post_proc_down_and_across_mmx;
rtcd->postproc.addnoise = vp8_plane_add_noise_mmx;
#endif
@@ -113,5 +115,19 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
}
#endif
+
+#if HAVE_SSSE3
+
+ if (SSSE3Enabled)
+ {
+ rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_ssse3;
+ rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_ssse3;
+ rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_ssse3;
+ rtcd->subpix.sixtap4x4 = vp8_sixtap_predict4x4_ssse3;
+ rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_ssse3;
+ rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_ssse3;
+ }
+#endif
+
#endif
}
diff --git a/vp8/decoder/arm/arm_dsystemdependent.c b/vp8/decoder/arm/arm_dsystemdependent.c
new file mode 100644
index 000000000..e9741e286
--- /dev/null
+++ b/vp8/decoder/arm/arm_dsystemdependent.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
+#include "blockd.h"
+#include "pragmas.h"
+#include "postproc.h"
+#include "dboolhuff.h"
+#include "dequantize.h"
+#include "onyxd_int.h"
+
+void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+ int flags = pbi->common.rtcd.flags;
+ int has_edsp = flags & HAS_EDSP;
+ int has_media = flags & HAS_MEDIA;
+ int has_neon = flags & HAS_NEON;
+
+#if HAVE_ARMV6
+ if (has_media)
+ {
+ pbi->dequant.block = vp8_dequantize_b_v6;
+ pbi->dequant.idct_add = vp8_dequant_idct_add_v6;
+ pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_v6;
+ pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_v6;
+ pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_v6;
+ pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_v6;
+#if 0 /*For use with RTCD, when implemented*/
+ pbi->dboolhuff.start = vp8dx_start_decode_c;
+ pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
+ pbi->dboolhuff.debool = vp8dx_decode_bool_c;
+ pbi->dboolhuff.devalue = vp8dx_decode_value_c;
+#endif
+ }
+#endif
+
+#if HAVE_ARMV7
+ if (has_neon)
+ {
+ pbi->dequant.block = vp8_dequantize_b_neon;
+ pbi->dequant.idct_add = vp8_dequant_idct_add_neon;
+ /*This is not used: NEON always dequants two blocks at once.
+ pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_neon;*/
+ pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_neon;
+ pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_neon;
+ pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon;
+#if 0 /*For use with RTCD, when implemented*/
+ pbi->dboolhuff.start = vp8dx_start_decode_c;
+ pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
+ pbi->dboolhuff.debool = vp8dx_decode_bool_c;
+ pbi->dboolhuff.devalue = vp8dx_decode_value_c;
+#endif
+ }
+#endif
+#endif
+}
diff --git a/vp8/decoder/arm/armv5/dequantize_v5.asm b/vp8/decoder/arm/armv5/dequantize_v5.asm
index eb3f0307c..de3648ae2 100644
--- a/vp8/decoder/arm/armv5/dequantize_v5.asm
+++ b/vp8/decoder/arm/armv5/dequantize_v5.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/decoder/arm/armv6/dboolhuff_v6.asm b/vp8/decoder/arm/armv6/dboolhuff_v6.asm
index 143e33e46..6515804bb 100644
--- a/vp8/decoder/arm/armv6/dboolhuff_v6.asm
+++ b/vp8/decoder/arm/armv6/dboolhuff_v6.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm b/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
new file mode 100644
index 000000000..6bebda24f
--- /dev/null
+++ b/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
@@ -0,0 +1,218 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_dequant_dc_idct_add_v6|
+
+ AREA |.text|, CODE, READONLY
+
+;void vp8_dequant_dc_idct_v6(short *input, short *dq, unsigned char *pred,
+; unsigned char *dest, int pitch, int stride, int Dc)
+; r0 = input
+; r1 = dq
+; r2 = pred
+; r3 = dest
+; sp + 36 = pitch ; +4 = 40
+; sp + 40 = stride ; +4 = 44
+; sp + 44 = Dc ; +4 = 48
+
+
+|vp8_dequant_dc_idct_add_v6| PROC
+ stmdb sp!, {r4-r11, lr}
+
+ ldr r6, [sp, #44]
+
+ ldr r4, [r0] ;input
+ ldr r5, [r1], #4 ;dq
+
+ sub sp, sp, #4
+ str r3, [sp]
+
+ smultt r7, r4, r5
+
+ ldr r4, [r0, #4] ;input
+ ldr r5, [r1], #4 ;dq
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ smulbb r6, r4, r5
+ smultt r7, r4, r5
+
+ ldr r4, [r0, #4] ;input
+ ldr r5, [r1], #4 ;dq
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ mov r12, #3
+
+vp8_dequant_dc_add_loop
+ smulbb r6, r4, r5
+ smultt r7, r4, r5
+
+ ldr r4, [r0, #4] ;input
+ ldr r5, [r1], #4 ;dq
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ smulbb r6, r4, r5
+ smultt r7, r4, r5
+
+ subs r12, r12, #1
+
+ ldrne r4, [r0, #4]
+ ldrne r5, [r1], #4
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ bne vp8_dequant_dc_add_loop
+
+ sub r0, r0, #32
+ mov r1, r0
+
+; short_idct4x4llm_v6_dual
+ ldr r3, cospi8sqrt2minus1
+ ldr r4, sinpi8sqrt2
+ ldr r6, [r0, #8]
+ mov r5, #2
+vp8_dequant_dc_idct_loop1_v6
+ ldr r12, [r0, #24]
+ ldr r14, [r0, #16]
+ smulwt r9, r3, r6
+ smulwb r7, r3, r6
+ smulwt r10, r4, r6
+ smulwb r8, r4, r6
+ pkhbt r7, r7, r9, lsl #16
+ smulwt r11, r3, r12
+ pkhbt r8, r8, r10, lsl #16
+ uadd16 r6, r6, r7
+ smulwt r7, r4, r12
+ smulwb r9, r3, r12
+ smulwb r10, r4, r12
+ subs r5, r5, #1
+ pkhbt r9, r9, r11, lsl #16
+ ldr r11, [r0], #4
+ pkhbt r10, r10, r7, lsl #16
+ uadd16 r7, r12, r9
+ usub16 r7, r8, r7
+ uadd16 r6, r6, r10
+ uadd16 r10, r11, r14
+ usub16 r8, r11, r14
+ uadd16 r9, r10, r6
+ usub16 r10, r10, r6
+ uadd16 r6, r8, r7
+ usub16 r7, r8, r7
+ str r6, [r1, #8]
+ ldrne r6, [r0, #8]
+ str r7, [r1, #16]
+ str r10, [r1, #24]
+ str r9, [r1], #4
+ bne vp8_dequant_dc_idct_loop1_v6
+
+ mov r5, #2
+ sub r0, r1, #8
+vp8_dequant_dc_idct_loop2_v6
+ ldr r6, [r0], #4
+ ldr r7, [r0], #4
+ ldr r8, [r0], #4
+ ldr r9, [r0], #4
+ smulwt r1, r3, r6
+ smulwt r12, r4, r6
+ smulwt lr, r3, r8
+ smulwt r10, r4, r8
+ pkhbt r11, r8, r6, lsl #16
+ pkhbt r1, lr, r1, lsl #16
+ pkhbt r12, r10, r12, lsl #16
+ pkhtb r6, r6, r8, asr #16
+ uadd16 r6, r1, r6
+ pkhbt lr, r9, r7, lsl #16
+ uadd16 r10, r11, lr
+ usub16 lr, r11, lr
+ pkhtb r8, r7, r9, asr #16
+ subs r5, r5, #1
+ smulwt r1, r3, r8
+ smulwb r7, r3, r8
+ smulwt r11, r4, r8
+ smulwb r9, r4, r8
+ pkhbt r1, r7, r1, lsl #16
+ uadd16 r8, r1, r8
+ pkhbt r11, r9, r11, lsl #16
+ usub16 r1, r12, r8
+ uadd16 r8, r11, r6
+ ldr r9, c0x00040004
+ ldr r12, [sp, #40]
+ uadd16 r6, r10, r8
+ usub16 r7, r10, r8
+ uadd16 r7, r7, r9
+ uadd16 r6, r6, r9
+ uadd16 r10, r14, r1
+ usub16 r1, r14, r1
+ uadd16 r10, r10, r9
+ uadd16 r1, r1, r9
+ ldr r11, [r2], r12
+ mov r8, r7, asr #3
+ pkhtb r9, r8, r10, asr #19
+ mov r8, r1, asr #3
+ pkhtb r8, r8, r6, asr #19
+ uxtb16 lr, r11, ror #8
+ qadd16 r9, r9, lr
+ uxtb16 lr, r11
+ qadd16 r8, r8, lr
+ usat16 r9, #8, r9
+ usat16 r8, #8, r8
+ orr r9, r8, r9, lsl #8
+ ldr r11, [r2], r12
+ ldr lr, [sp]
+ ldr r12, [sp, #44]
+ mov r7, r7, lsl #16
+ mov r1, r1, lsl #16
+ mov r10, r10, lsl #16
+ mov r6, r6, lsl #16
+ mov r7, r7, asr #3
+ pkhtb r7, r7, r10, asr #19
+ mov r1, r1, asr #3
+ pkhtb r1, r1, r6, asr #19
+ uxtb16 r8, r11, ror #8
+ qadd16 r7, r7, r8
+ uxtb16 r8, r11
+ qadd16 r1, r1, r8
+ usat16 r7, #8, r7
+ usat16 r1, #8, r1
+ orr r1, r1, r7, lsl #8
+ str r9, [lr], r12
+ str r1, [lr], r12
+ str lr, [sp]
+ bne vp8_dequant_dc_idct_loop2_v6
+
+; vpx_memset
+ sub r0, r0, #32
+ add sp, sp, #4
+
+ mov r12, #0
+ str r12, [r0]
+ str r12, [r0, #4]
+ str r12, [r0, #8]
+ str r12, [r0, #12]
+ str r12, [r0, #16]
+ str r12, [r0, #20]
+ str r12, [r0, #24]
+ str r12, [r0, #28]
+
+ ldmia sp!, {r4 - r11, pc}
+ ENDP ; |vp8_dequant_dc_idct_add_v6|
+
+; Constant Pool
+cospi8sqrt2minus1 DCD 0x00004E7B
+sinpi8sqrt2 DCD 0x00008A8C
+c0x00040004 DCD 0x00040004
+
+ END
diff --git a/vp8/decoder/arm/armv6/dequant_idct_v6.asm b/vp8/decoder/arm/armv6/dequant_idct_v6.asm
new file mode 100644
index 000000000..47b671ca6
--- /dev/null
+++ b/vp8/decoder/arm/armv6/dequant_idct_v6.asm
@@ -0,0 +1,196 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+ EXPORT |vp8_dequant_idct_add_v6|
+
+ AREA |.text|, CODE, READONLY
+;void vp8_dequant_idct_v6(short *input, short *dq, unsigned char *pred,
+; unsigned char *dest, int pitch, int stride)
+; r0 = input
+; r1 = dq
+; r2 = pred
+; r3 = dest
+; sp + 36 = pitch ; +4 = 40
+; sp + 40 = stride ; +4 = 44
+
+
+|vp8_dequant_idct_add_v6| PROC
+ stmdb sp!, {r4-r11, lr}
+
+ ldr r4, [r0] ;input
+ ldr r5, [r1], #4 ;dq
+
+ sub sp, sp, #4
+ str r3, [sp]
+
+ mov r12, #4
+
+vp8_dequant_add_loop
+ smulbb r6, r4, r5
+ smultt r7, r4, r5
+
+ ldr r4, [r0, #4] ;input
+ ldr r5, [r1], #4 ;dq
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ smulbb r6, r4, r5
+ smultt r7, r4, r5
+
+ subs r12, r12, #1
+
+ ldrne r4, [r0, #4]
+ ldrne r5, [r1], #4
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ bne vp8_dequant_add_loop
+
+ sub r0, r0, #32
+ mov r1, r0
+
+; short_idct4x4llm_v6_dual
+ ldr r3, cospi8sqrt2minus1
+ ldr r4, sinpi8sqrt2
+ ldr r6, [r0, #8]
+ mov r5, #2
+vp8_dequant_idct_loop1_v6
+ ldr r12, [r0, #24]
+ ldr r14, [r0, #16]
+ smulwt r9, r3, r6
+ smulwb r7, r3, r6
+ smulwt r10, r4, r6
+ smulwb r8, r4, r6
+ pkhbt r7, r7, r9, lsl #16
+ smulwt r11, r3, r12
+ pkhbt r8, r8, r10, lsl #16
+ uadd16 r6, r6, r7
+ smulwt r7, r4, r12
+ smulwb r9, r3, r12
+ smulwb r10, r4, r12
+ subs r5, r5, #1
+ pkhbt r9, r9, r11, lsl #16
+ ldr r11, [r0], #4
+ pkhbt r10, r10, r7, lsl #16
+ uadd16 r7, r12, r9
+ usub16 r7, r8, r7
+ uadd16 r6, r6, r10
+ uadd16 r10, r11, r14
+ usub16 r8, r11, r14
+ uadd16 r9, r10, r6
+ usub16 r10, r10, r6
+ uadd16 r6, r8, r7
+ usub16 r7, r8, r7
+ str r6, [r1, #8]
+ ldrne r6, [r0, #8]
+ str r7, [r1, #16]
+ str r10, [r1, #24]
+ str r9, [r1], #4
+ bne vp8_dequant_idct_loop1_v6
+
+ mov r5, #2
+ sub r0, r1, #8
+vp8_dequant_idct_loop2_v6
+ ldr r6, [r0], #4
+ ldr r7, [r0], #4
+ ldr r8, [r0], #4
+ ldr r9, [r0], #4
+ smulwt r1, r3, r6
+ smulwt r12, r4, r6
+ smulwt lr, r3, r8
+ smulwt r10, r4, r8
+ pkhbt r11, r8, r6, lsl #16
+ pkhbt r1, lr, r1, lsl #16
+ pkhbt r12, r10, r12, lsl #16
+ pkhtb r6, r6, r8, asr #16
+ uadd16 r6, r1, r6
+ pkhbt lr, r9, r7, lsl #16
+ uadd16 r10, r11, lr
+ usub16 lr, r11, lr
+ pkhtb r8, r7, r9, asr #16
+ subs r5, r5, #1
+ smulwt r1, r3, r8
+ smulwb r7, r3, r8
+ smulwt r11, r4, r8
+ smulwb r9, r4, r8
+ pkhbt r1, r7, r1, lsl #16
+ uadd16 r8, r1, r8
+ pkhbt r11, r9, r11, lsl #16
+ usub16 r1, r12, r8
+ uadd16 r8, r11, r6
+ ldr r9, c0x00040004
+ ldr r12, [sp, #40]
+ uadd16 r6, r10, r8
+ usub16 r7, r10, r8
+ uadd16 r7, r7, r9
+ uadd16 r6, r6, r9
+ uadd16 r10, r14, r1
+ usub16 r1, r14, r1
+ uadd16 r10, r10, r9
+ uadd16 r1, r1, r9
+ ldr r11, [r2], r12
+ mov r8, r7, asr #3
+ pkhtb r9, r8, r10, asr #19
+ mov r8, r1, asr #3
+ pkhtb r8, r8, r6, asr #19
+ uxtb16 lr, r11, ror #8
+ qadd16 r9, r9, lr
+ uxtb16 lr, r11
+ qadd16 r8, r8, lr
+ usat16 r9, #8, r9
+ usat16 r8, #8, r8
+ orr r9, r8, r9, lsl #8
+ ldr r11, [r2], r12
+ ldr lr, [sp]
+ ldr r12, [sp, #44]
+ mov r7, r7, lsl #16
+ mov r1, r1, lsl #16
+ mov r10, r10, lsl #16
+ mov r6, r6, lsl #16
+ mov r7, r7, asr #3
+ pkhtb r7, r7, r10, asr #19
+ mov r1, r1, asr #3
+ pkhtb r1, r1, r6, asr #19
+ uxtb16 r8, r11, ror #8
+ qadd16 r7, r7, r8
+ uxtb16 r8, r11
+ qadd16 r1, r1, r8
+ usat16 r7, #8, r7
+ usat16 r1, #8, r1
+ orr r1, r1, r7, lsl #8
+ str r9, [lr], r12
+ str r1, [lr], r12
+ str lr, [sp]
+ bne vp8_dequant_idct_loop2_v6
+
+; vpx_memset
+ sub r0, r0, #32
+ add sp, sp, #4
+
+ mov r12, #0
+ str r12, [r0]
+ str r12, [r0, #4]
+ str r12, [r0, #8]
+ str r12, [r0, #12]
+ str r12, [r0, #16]
+ str r12, [r0, #20]
+ str r12, [r0, #24]
+ str r12, [r0, #28]
+
+ ldmia sp!, {r4 - r11, pc}
+ ENDP ; |vp8_dequant_idct_add_v6|
+
+; Constant Pool
+cospi8sqrt2minus1 DCD 0x00004E7B
+sinpi8sqrt2 DCD 0x00008A8C
+c0x00040004 DCD 0x00040004
+
+ END
diff --git a/vp8/decoder/arm/armv6/dequantdcidct_v6.asm b/vp8/decoder/arm/armv6/dequantdcidct_v6.asm
deleted file mode 100644
index 3daa9b34f..000000000
--- a/vp8/decoder/arm/armv6/dequantdcidct_v6.asm
+++ /dev/null
@@ -1,202 +0,0 @@
-;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |vp8_dequant_dc_idct_v6|
- ; ARM
- ; REQUIRE8
- ; PRESERVE8
-
- AREA |.text|, CODE, READONLY ; name this block of code
-;void vp8_dequant_dc_idct_v6(short *input, short *dq, short *output, int pitch,int Dc)
-|vp8_dequant_dc_idct_v6| PROC
- stmdb sp!, {r4-r11, lr}
-
- ldr r6, [sp, #36] ;load Dc
-
- ldr r4, [r0] ;input
- ldr r5, [r1], #4 ;dq
-
- sub sp, sp, #4
- str r0, [sp]
-
- smultt r7, r4, r5
-
- ldr r4, [r0, #4] ;input
- ldr r5, [r1], #4 ;dq
-
- strh r6, [r0], #2
- strh r7, [r0], #2
-
- smulbb r6, r4, r5
- smultt r7, r4, r5
-
- ldr r4, [r0, #4] ;input
- ldr r5, [r1], #4 ;dq
-
- strh r6, [r0], #2
- strh r7, [r0], #2
-
- mov r12, #3
-
-dequant_dc_idct_loop
- smulbb r6, r4, r5
- smultt r7, r4, r5
-
- ldr r4, [r0, #4] ;input
- ldr r5, [r1], #4 ;dq
-
- strh r6, [r0], #2
- strh r7, [r0], #2
-
- smulbb r6, r4, r5
- smultt r7, r4, r5
-
- subs r12, r12, #1
-
- ldrne r4, [r0, #4]
- ldrne r5, [r1], #4
-
- strh r6, [r0], #2
- strh r7, [r0], #2
-
- bne dequant_dc_idct_loop
-
- sub r0, r0, #32
- mov r1, r2
- mov r2, r3
-
-; short_idct4x4llm_v6_dual
-
- mov r3, #0x00004E00 ; cos
- orr r3, r3, #0x0000007B ; cospi8sqrt2minus1
- mov r4, #0x00008A00 ; sin
- orr r4, r4, #0x0000008C ; sinpi8sqrt2
- mov r5, #0x2 ; i=2 i
-loop1_dual_11
- ldr r6, [r0, #(4*2)] ; i5 | i4 5|4
- ldr r12, [r0, #(12*2)] ; i13 | i12 13|12
- ldr r14, [r0, #(8*2)] ; i9 | i8 9|8
-
- smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c
- smulwb r7, r3, r6 ; (ip[4] * cospi8sqrt2minus1) >> 16 4c
- smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s
- smulwb r8, r4, r6 ; (ip[4] * sinpi8sqrt2) >> 16 4s
- pkhbt r7, r7, r9, lsl #16 ; 5c | 4c
- smulwt r11, r3, r12 ; (ip[13] * cospi8sqrt2minus1) >> 16 13c
- pkhbt r8, r8, r10, lsl #16 ; 5s | 4s
- uadd16 r6, r6, r7 ; 5c+5 | 4c+4
- smulwt r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16 13s
- smulwb r9, r3, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16 12c
- smulwb r10, r4, r12 ; (ip[12] * sinpi8sqrt2) >> 16 12s
- subs r5, r5, #0x1 ; i-- --
- pkhbt r9, r9, r11, lsl #16 ; 13c | 12c
- ldr r11, [r0], #0x4 ; i1 | i0 ++ 1|0
- pkhbt r10, r10, r7, lsl #16 ; 13s | 12s
- uadd16 r7, r12, r9 ; 13c+13 | 12c+12
- usub16 r7, r8, r7 ; c c
- uadd16 r6, r6, r10 ; d d
- uadd16 r10, r11, r14 ; a a
- usub16 r8, r11, r14 ; b b
- uadd16 r9, r10, r6 ; a+d a+d
- usub16 r10, r10, r6 ; a-d a-d
- uadd16 r6, r8, r7 ; b+c b+c
- usub16 r7, r8, r7 ; b-c b-c
- str r6, [r1, r2] ; o5 | o4
- add r6, r2, r2 ; pitch * 2 p2
- str r7, [r1, r6] ; o9 | o8
- add r6, r6, r2 ; pitch * 3 p3
- str r10, [r1, r6] ; o13 | o12
- str r9, [r1], #0x4 ; o1 | o0 ++
- bne loop1_dual_11 ;
- mov r5, #0x2 ; i=2 i
- sub r0, r1, #8 ; reset input/output i/o
-loop2_dual_22
- ldr r6, [r0, r2] ; i5 | i4 5|4
- ldr r1, [r0] ; i1 | i0 1|0
- ldr r12, [r0, #0x4] ; i3 | i2 3|2
- add r14, r2, #0x4 ; pitch + 2 p+2
- ldr r14, [r0, r14] ; i7 | i6 7|6
- smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c
- smulwt r7, r3, r1 ; (ip[1] * cospi8sqrt2minus1) >> 16 1c
- smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s
- smulwt r8, r4, r1 ; (ip[1] * sinpi8sqrt2) >> 16 1s
- pkhbt r11, r6, r1, lsl #16 ; i0 | i4 0|4
- pkhbt r7, r9, r7, lsl #16 ; 1c | 5c
- pkhbt r8, r10, r8, lsl #16 ; 1s | 5s = temp1 © tc1
- pkhtb r1, r1, r6, asr #16 ; i1 | i5 1|5
- uadd16 r1, r7, r1 ; 1c+1 | 5c+5 = temp2 (d) td2
- pkhbt r9, r14, r12, lsl #16 ; i2 | i6 2|6
- uadd16 r10, r11, r9 ; a a
- usub16 r9, r11, r9 ; b b
- pkhtb r6, r12, r14, asr #16 ; i3 | i7 3|7
- subs r5, r5, #0x1 ; i-- --
- smulwt r7, r3, r6 ; (ip[3] * cospi8sqrt2minus1) >> 16 3c
- smulwt r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16 3s
- smulwb r12, r3, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16 7c
- smulwb r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16 7s
-
- pkhbt r7, r12, r7, lsl #16 ; 3c | 7c
- pkhbt r11, r14, r11, lsl #16 ; 3s | 7s = temp1 (d) td1
- uadd16 r6, r7, r6 ; 3c+3 | 7c+7 = temp2 (c) tc2
- usub16 r12, r8, r6 ; c (o1 | o5) c
- uadd16 r6, r11, r1 ; d (o3 | o7) d
- uadd16 r7, r10, r6 ; a+d a+d
- mov r8, #0x4 ; set up 4's 4
- orr r8, r8, #0x40000 ; 4|4
- usub16 r6, r10, r6 ; a-d a-d
- uadd16 r6, r6, r8 ; a-d+4 3|7
- uadd16 r7, r7, r8 ; a+d+4 0|4
- uadd16 r10, r9, r12 ; b+c b+c
- usub16 r1, r9, r12 ; b-c b-c
- uadd16 r10, r10, r8 ; b+c+4 1|5
- uadd16 r1, r1, r8 ; b-c+4 2|6
- mov r8, r10, asr #19 ; o1 >> 3
- strh r8, [r0, #2] ; o1
- mov r8, r1, asr #19 ; o2 >> 3
- strh r8, [r0, #4] ; o2
- mov r8, r6, asr #19 ; o3 >> 3
- strh r8, [r0, #6] ; o3
- mov r8, r7, asr #19 ; o0 >> 3
- strh r8, [r0], r2 ; o0 +p
- sxth r10, r10 ;
- mov r8, r10, asr #3 ; o5 >> 3
- strh r8, [r0, #2] ; o5
- sxth r1, r1 ;
- mov r8, r1, asr #3 ; o6 >> 3
- strh r8, [r0, #4] ; o6
- sxth r6, r6 ;
- mov r8, r6, asr #3 ; o7 >> 3
- strh r8, [r0, #6] ; o7
- sxth r7, r7 ;
- mov r8, r7, asr #3 ; o4 >> 3
- strh r8, [r0], r2 ; o4 +p
-;;;;; subs r5, r5, #0x1 ; i-- --
- bne loop2_dual_22 ;
-
-
-;vpx_memset
- ldr r0, [sp]
- add sp, sp, #4
-
- mov r12, #0
- str r12, [r0]
- str r12, [r0, #4]
- str r12, [r0, #8]
- str r12, [r0, #12]
- str r12, [r0, #16]
- str r12, [r0, #20]
- str r12, [r0, #24]
- str r12, [r0, #28]
-
- ldmia sp!, {r4 - r11, pc} ; replace vars, return restore
-
- ENDP ;|vp8_dequant_dc_idct_v68|
-
- END
diff --git a/vp8/decoder/arm/armv6/dequantidct_v6.asm b/vp8/decoder/arm/armv6/dequantidct_v6.asm
deleted file mode 100644
index 61bb48d04..000000000
--- a/vp8/decoder/arm/armv6/dequantidct_v6.asm
+++ /dev/null
@@ -1,183 +0,0 @@
-;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |vp8_dequant_idct_v6|
- ; ARM
- ; REQUIRE8
- ; PRESERVE8
-
- AREA |.text|, CODE, READONLY ; name this block of code
-;void vp8_dequant_idct_v6(short *input, short *dq, short *output, int pitch)
-|vp8_dequant_idct_v6| PROC
- stmdb sp!, {r4-r11, lr}
-
- ldr r4, [r0] ;input
- ldr r5, [r1], #4 ;dq
-
- sub sp, sp, #4
- str r0, [sp]
-
- mov r12, #4
-
-dequant_idct_loop
- smulbb r6, r4, r5
- smultt r7, r4, r5
-
- ldr r4, [r0, #4] ;input
- ldr r5, [r1], #4 ;dq
-
- strh r6, [r0], #2
- strh r7, [r0], #2
-
- smulbb r6, r4, r5
- smultt r7, r4, r5
-
- subs r12, r12, #1
-
- ldrne r4, [r0, #4]
- ldrne r5, [r1], #4
-
- strh r6, [r0], #2
- strh r7, [r0], #2
-
- bne dequant_idct_loop
-
- sub r0, r0, #32
- mov r1, r2
- mov r2, r3
-
-; short_idct4x4llm_v6_dual
-
- mov r3, #0x00004E00 ; cos
- orr r3, r3, #0x0000007B ; cospi8sqrt2minus1
- mov r4, #0x00008A00 ; sin
- orr r4, r4, #0x0000008C ; sinpi8sqrt2
- mov r5, #0x2 ; i=2 i
-loop1_dual_1
- ldr r6, [r0, #(4*2)] ; i5 | i4 5|4
- ldr r12, [r0, #(12*2)] ; i13 | i12 13|12
- ldr r14, [r0, #(8*2)] ; i9 | i8 9|8
-
- smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c
- smulwb r7, r3, r6 ; (ip[4] * cospi8sqrt2minus1) >> 16 4c
- smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s
- smulwb r8, r4, r6 ; (ip[4] * sinpi8sqrt2) >> 16 4s
- pkhbt r7, r7, r9, lsl #16 ; 5c | 4c
- smulwt r11, r3, r12 ; (ip[13] * cospi8sqrt2minus1) >> 16 13c
- pkhbt r8, r8, r10, lsl #16 ; 5s | 4s
- uadd16 r6, r6, r7 ; 5c+5 | 4c+4
- smulwt r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16 13s
- smulwb r9, r3, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16 12c
- smulwb r10, r4, r12 ; (ip[12] * sinpi8sqrt2) >> 16 12s
- subs r5, r5, #0x1 ; i-- --
- pkhbt r9, r9, r11, lsl #16 ; 13c | 12c
- ldr r11, [r0], #0x4 ; i1 | i0 ++ 1|0
- pkhbt r10, r10, r7, lsl #16 ; 13s | 12s
- uadd16 r7, r12, r9 ; 13c+13 | 12c+12
- usub16 r7, r8, r7 ; c c
- uadd16 r6, r6, r10 ; d d
- uadd16 r10, r11, r14 ; a a
- usub16 r8, r11, r14 ; b b
- uadd16 r9, r10, r6 ; a+d a+d
- usub16 r10, r10, r6 ; a-d a-d
- uadd16 r6, r8, r7 ; b+c b+c
- usub16 r7, r8, r7 ; b-c b-c
- str r6, [r1, r2] ; o5 | o4
- add r6, r2, r2 ; pitch * 2 p2
- str r7, [r1, r6] ; o9 | o8
- add r6, r6, r2 ; pitch * 3 p3
- str r10, [r1, r6] ; o13 | o12
- str r9, [r1], #0x4 ; o1 | o0 ++
- bne loop1_dual_1 ;
- mov r5, #0x2 ; i=2 i
- sub r0, r1, #8 ; reset input/output i/o
-loop2_dual_2
- ldr r6, [r0, r2] ; i5 | i4 5|4
- ldr r1, [r0] ; i1 | i0 1|0
- ldr r12, [r0, #0x4] ; i3 | i2 3|2
- add r14, r2, #0x4 ; pitch + 2 p+2
- ldr r14, [r0, r14] ; i7 | i6 7|6
- smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c
- smulwt r7, r3, r1 ; (ip[1] * cospi8sqrt2minus1) >> 16 1c
- smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s
- smulwt r8, r4, r1 ; (ip[1] * sinpi8sqrt2) >> 16 1s
- pkhbt r11, r6, r1, lsl #16 ; i0 | i4 0|4
- pkhbt r7, r9, r7, lsl #16 ; 1c | 5c
- pkhbt r8, r10, r8, lsl #16 ; 1s | 5s = temp1 © tc1
- pkhtb r1, r1, r6, asr #16 ; i1 | i5 1|5
- uadd16 r1, r7, r1 ; 1c+1 | 5c+5 = temp2 (d) td2
- pkhbt r9, r14, r12, lsl #16 ; i2 | i6 2|6
- uadd16 r10, r11, r9 ; a a
- usub16 r9, r11, r9 ; b b
- pkhtb r6, r12, r14, asr #16 ; i3 | i7 3|7
- subs r5, r5, #0x1 ; i-- --
- smulwt r7, r3, r6 ; (ip[3] * cospi8sqrt2minus1) >> 16 3c
- smulwt r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16 3s
- smulwb r12, r3, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16 7c
- smulwb r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16 7s
-
- pkhbt r7, r12, r7, lsl #16 ; 3c | 7c
- pkhbt r11, r14, r11, lsl #16 ; 3s | 7s = temp1 (d) td1
- uadd16 r6, r7, r6 ; 3c+3 | 7c+7 = temp2 (c) tc2
- usub16 r12, r8, r6 ; c (o1 | o5) c
- uadd16 r6, r11, r1 ; d (o3 | o7) d
- uadd16 r7, r10, r6 ; a+d a+d
- mov r8, #0x4 ; set up 4's 4
- orr r8, r8, #0x40000 ; 4|4
- usub16 r6, r10, r6 ; a-d a-d
- uadd16 r6, r6, r8 ; a-d+4 3|7
- uadd16 r7, r7, r8 ; a+d+4 0|4
- uadd16 r10, r9, r12 ; b+c b+c
- usub16 r1, r9, r12 ; b-c b-c
- uadd16 r10, r10, r8 ; b+c+4 1|5
- uadd16 r1, r1, r8 ; b-c+4 2|6
- mov r8, r10, asr #19 ; o1 >> 3
- strh r8, [r0, #2] ; o1
- mov r8, r1, asr #19 ; o2 >> 3
- strh r8, [r0, #4] ; o2
- mov r8, r6, asr #19 ; o3 >> 3
- strh r8, [r0, #6] ; o3
- mov r8, r7, asr #19 ; o0 >> 3
- strh r8, [r0], r2 ; o0 +p
- sxth r10, r10 ;
- mov r8, r10, asr #3 ; o5 >> 3
- strh r8, [r0, #2] ; o5
- sxth r1, r1 ;
- mov r8, r1, asr #3 ; o6 >> 3
- strh r8, [r0, #4] ; o6
- sxth r6, r6 ;
- mov r8, r6, asr #3 ; o7 >> 3
- strh r8, [r0, #6] ; o7
- sxth r7, r7 ;
- mov r8, r7, asr #3 ; o4 >> 3
- strh r8, [r0], r2 ; o4 +p
-;;;;; subs r5, r5, #0x1 ; i-- --
- bne loop2_dual_2 ;
- ;
-
-;vpx_memset
- ldr r0, [sp]
- add sp, sp, #4
-
- mov r12, #0
- str r12, [r0]
- str r12, [r0, #4]
- str r12, [r0, #8]
- str r12, [r0, #12]
- str r12, [r0, #16]
- str r12, [r0, #20]
- str r12, [r0, #24]
- str r12, [r0, #28]
-
- ldmia sp!, {r4 - r11, pc} ; replace vars, return restore
-
- ENDP ;|vp8_dequant_idct_v6|
-
- END
diff --git a/vp8/decoder/arm/armv6/dequantize_v6.asm b/vp8/decoder/arm/armv6/dequantize_v6.asm
index 95e38594f..72f7e0ee5 100644
--- a/vp8/decoder/arm/armv6/dequantize_v6.asm
+++ b/vp8/decoder/arm/armv6/dequantize_v6.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/decoder/arm/armv6/idct_blk_v6.c b/vp8/decoder/arm/armv6/idct_blk_v6.c
new file mode 100644
index 000000000..3c7bc502f
--- /dev/null
+++ b/vp8/decoder/arm/armv6/idct_blk_v6.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "idct.h"
+#include "dequantize.h"
+
+void vp8_dequant_dc_idct_add_y_block_v6
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs, short *dc)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_dc_idct_add_v6 (q, dq, pre, dst, 16, stride, dc[0]);
+ else
+ vp8_dc_only_idct_add_v6 (dc[0], pre, dst, 16, stride);
+
+ if (eobs[1] > 1)
+ vp8_dequant_dc_idct_add_v6 (q+16, dq, pre+4, dst+4, 16, stride, dc[1]);
+ else
+ vp8_dc_only_idct_add_v6 (dc[1], pre+4, dst+4, 16, stride);
+
+ if (eobs[2] > 1)
+ vp8_dequant_dc_idct_add_v6 (q+32, dq, pre+8, dst+8, 16, stride, dc[2]);
+ else
+ vp8_dc_only_idct_add_v6 (dc[2], pre+8, dst+8, 16, stride);
+
+ if (eobs[3] > 1)
+ vp8_dequant_dc_idct_add_v6 (q+48, dq, pre+12, dst+12, 16, stride, dc[3]);
+ else
+ vp8_dc_only_idct_add_v6 (dc[3], pre+12, dst+12, 16, stride);
+
+ q += 64;
+ dc += 4;
+ pre += 64;
+ dst += 4*stride;
+ eobs += 4;
+ }
+}
+
+void vp8_dequant_idct_add_y_block_v6
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_idct_add_v6 (q, dq, pre, dst, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[0]*dq[0], pre, dst, 16, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp8_dequant_idct_add_v6 (q+16, dq, pre+4, dst+4, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[16]*dq[0], pre+4, dst+4, 16, stride);
+ ((int *)(q+16))[0] = 0;
+ }
+
+ if (eobs[2] > 1)
+ vp8_dequant_idct_add_v6 (q+32, dq, pre+8, dst+8, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[32]*dq[0], pre+8, dst+8, 16, stride);
+ ((int *)(q+32))[0] = 0;
+ }
+
+ if (eobs[3] > 1)
+ vp8_dequant_idct_add_v6 (q+48, dq, pre+12, dst+12, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[48]*dq[0], pre+12, dst+12, 16, stride);
+ ((int *)(q+48))[0] = 0;
+ }
+
+ q += 64;
+ pre += 64;
+ dst += 4*stride;
+ eobs += 4;
+ }
+}
+
+void vp8_dequant_idct_add_uv_block_v6
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+ int i;
+
+ for (i = 0; i < 2; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_idct_add_v6 (q, dq, pre, dstu, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[0]*dq[0], pre, dstu, 8, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp8_dequant_idct_add_v6 (q+16, dq, pre+4, dstu+4, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[16]*dq[0], pre+4, dstu+4, 8, stride);
+ ((int *)(q+16))[0] = 0;
+ }
+
+ q += 32;
+ pre += 32;
+ dstu += 4*stride;
+ eobs += 2;
+ }
+
+ for (i = 0; i < 2; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_idct_add_v6 (q, dq, pre, dstv, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[0]*dq[0], pre, dstv, 8, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp8_dequant_idct_add_v6 (q+16, dq, pre+4, dstv+4, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_v6 (q[16]*dq[0], pre+4, dstv+4, 8, stride);
+ ((int *)(q+16))[0] = 0;
+ }
+
+ q += 32;
+ pre += 32;
+ dstv += 4*stride;
+ eobs += 2;
+ }
+}
diff --git a/vp8/decoder/arm/dboolhuff_arm.h b/vp8/decoder/arm/dboolhuff_arm.h
index 495004f9c..985951c7c 100644
--- a/vp8/decoder/arm/dboolhuff_arm.h
+++ b/vp8/decoder/arm/dboolhuff_arm.h
@@ -11,14 +11,11 @@
* to be useless. However, its been left (for now)
* for reference.
*/
-/*
+#if 0
#if HAVE_ARMV6
#undef vp8_dbool_start
#define vp8_dbool_start vp8dx_start_decode_v6
-#undef vp8_dbool_stop
-#define vp8_dbool_stop vp8dx_stop_decode_v6
-
#undef vp8_dbool_fill
#define vp8_dbool_fill vp8_bool_decoder_fill_v6
@@ -27,15 +24,12 @@
#undef vp8_dbool_devalue
#define vp8_dbool_devalue vp8_decode_value_v6
-#endif // HAVE_ARMV6
+#endif /* HAVE_ARMV6 */
#if HAVE_ARMV7
#undef vp8_dbool_start
#define vp8_dbool_start vp8dx_start_decode_neon
-#undef vp8_dbool_stop
-#define vp8_dbool_stop vp8dx_stop_decode_neon
-
#undef vp8_dbool_fill
#define vp8_dbool_fill vp8_bool_decoder_fill_neon
@@ -44,6 +38,6 @@
#undef vp8_dbool_devalue
#define vp8_dbool_devalue vp8_decode_value_neon
-#endif // HAVE_ARMV7
-*/
-#endif // DBOOLHUFF_ARM_H
+#endif /* HAVE_ARMV7 */
+#endif
+#endif /* DBOOLHUFF_ARM_H */
diff --git a/vp8/decoder/arm/dequantize_arm.c b/vp8/decoder/arm/dequantize_arm.c
index 54006a921..b3e14b793 100644
--- a/vp8/decoder/arm/dequantize_arm.c
+++ b/vp8/decoder/arm/dequantize_arm.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -29,7 +30,7 @@ void vp8_dequantize_b_neon(BLOCKD *d)
int i;
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
- short *DQC = &d->dequant[0][0];
+ short *DQC = d->dequant;
vp8_dequantize_b_loop_neon(Q, DQC, DQ);
}
@@ -41,7 +42,7 @@ void vp8_dequantize_b_v6(BLOCKD *d)
int i;
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
- short *DQC = &d->dequant[0][0];
+ short *DQC = d->dequant;
vp8_dequantize_b_loop_v6(Q, DQC, DQ);
}
diff --git a/vp8/decoder/arm/dequantize_arm.h b/vp8/decoder/arm/dequantize_arm.h
index c8a61a4a7..b7d800d26 100644
--- a/vp8/decoder/arm/dequantize_arm.h
+++ b/vp8/decoder/arm/dequantize_arm.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -13,32 +14,60 @@
#if HAVE_ARMV6
extern prototype_dequant_block(vp8_dequantize_b_v6);
-extern prototype_dequant_idct(vp8_dequant_idct_v6);
-extern prototype_dequant_idct_dc(vp8_dequant_dc_idct_v6);
+extern prototype_dequant_idct_add(vp8_dequant_idct_add_v6);
+extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_v6);
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_v6);
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_v6);
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_dequant_block
#define vp8_dequant_block vp8_dequantize_b_v6
-#undef vp8_dequant_idct
-#define vp8_dequant_idct vp8_dequant_idct_v6
+#undef vp8_dequant_idct_add
+#define vp8_dequant_idct_add vp8_dequant_idct_add_v6
-#undef vp8_dequant_idct_dc
-#define vp8_dequant_idct_dc vp8_dequant_dc_idct_v6
+#undef vp8_dequant_dc_idct_add
+#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_v6
+
+#undef vp8_dequant_dc_idct_add_y_block
+#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_v6
+
+#undef vp8_dequant_idct_add_y_block
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_v6
+
+#undef vp8_dequant_idct_add_uv_block
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_v6
+#endif
#endif
#if HAVE_ARMV7
extern prototype_dequant_block(vp8_dequantize_b_neon);
-extern prototype_dequant_idct(vp8_dequant_idct_neon);
-extern prototype_dequant_idct_dc(vp8_dequant_dc_idct_neon);
+extern prototype_dequant_idct_add(vp8_dequant_idct_add_neon);
+extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_neon);
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_neon);
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_neon);
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_dequant_block
#define vp8_dequant_block vp8_dequantize_b_neon
-#undef vp8_dequant_idct
-#define vp8_dequant_idct vp8_dequant_idct_neon
+#undef vp8_dequant_idct_add
+#define vp8_dequant_idct_add vp8_dequant_idct_add_neon
-#undef vp8_dequant_idct_dc
-#define vp8_dequant_idct_dc vp8_dequant_dc_idct_neon
+#undef vp8_dequant_dc_idct_add
+#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_neon
+
+#undef vp8_dequant_dc_idct_add_y_block
+#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_neon
+
+#undef vp8_dequant_idct_add_y_block
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_neon
+
+#undef vp8_dequant_idct_add_uv_block
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_neon
+#endif
#endif
#endif
diff --git a/vp8/decoder/arm/detokenize.asm b/vp8/decoder/arm/detokenize.asm
new file mode 100644
index 000000000..45e068a9f
--- /dev/null
+++ b/vp8/decoder/arm/detokenize.asm
@@ -0,0 +1,320 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_decode_mb_tokens_v6|
+
+ AREA |.text|, CODE, READONLY ; name this block of code
+
+ INCLUDE vpx_asm_offsets.asm
+
+l_qcoeff EQU 0
+l_i EQU 4
+l_type EQU 8
+l_stop EQU 12
+l_c EQU 16
+l_l_ptr EQU 20
+l_a_ptr EQU 24
+l_bc EQU 28
+l_coef_ptr EQU 32
+l_stacksize EQU 64
+
+
+;; constant offsets -- these should be created at build time
+c_block2above_offset EQU 25
+c_entropy_nodes EQU 11
+c_dct_eob_token EQU 11
+
+|vp8_decode_mb_tokens_v6| PROC
+ stmdb sp!, {r4 - r11, lr}
+ sub sp, sp, #l_stacksize
+ mov r7, r1 ; type
+ mov r9, r0 ; detoken
+
+ ldr r1, [r9, #detok_current_bc]
+ ldr r0, [r9, #detok_qcoeff_start_ptr]
+ mov r11, #0 ; i
+ mov r3, #16 ; stop
+
+ cmp r7, #1 ; type ?= 1
+ addeq r11, r11, #24 ; i = 24
+ addeq r3, r3, #8 ; stop = 24
+ addeq r0, r0, #3, 24 ; qcoefptr += 24*16
+
+ str r0, [sp, #l_qcoeff]
+ str r11, [sp, #l_i]
+ str r7, [sp, #l_type]
+ str r3, [sp, #l_stop]
+ str r1, [sp, #l_bc]
+
+ add lr, r9, r7, lsl #2 ; detoken + type*4
+
+ ldr r8, [r1, #bool_decoder_user_buffer]
+
+ ldr r10, [lr, #detok_coef_probs]
+ ldr r5, [r1, #bool_decoder_count]
+ ldr r6, [r1, #bool_decoder_range]
+ ldr r4, [r1, #bool_decoder_value]
+
+ str r10, [sp, #l_coef_ptr]
+
+BLOCK_LOOP
+ ldr r3, [r9, #detok_ptr_block2leftabove]
+ ldr r1, [r9, #detok_L]
+ ldr r2, [r9, #detok_A]
+ ldrb r12, [r3, r11]! ; block2left[i]
+ ldrb r3, [r3, #c_block2above_offset]; block2above[i]
+
+ cmp r7, #0 ; c = !type
+ moveq r7, #1
+ movne r7, #0
+
+ ldrb r0, [r1, r12]! ; *(L += block2left[i])
+ ldrb r3, [r2, r3]! ; *(A += block2above[i])
+ mov lr, #c_entropy_nodes ; ENTROPY_NODES = 11
+
+; VP8_COMBINEENTROPYCONTETEXTS(t, *a, *l) => t = ((*a) != 0) + ((*l) !=0)
+ cmp r0, #0 ; *l ?= 0
+ movne r0, #1
+ cmp r3, #0 ; *a ?= 0
+ addne r0, r0, #1 ; t
+
+ str r1, [sp, #l_l_ptr] ; save &l
+ str r2, [sp, #l_a_ptr] ; save &a
+ smlabb r0, r0, lr, r10 ; Prob = coef_probs + (t * ENTROPY_NODES)
+ mov r1, #0 ; t = 0
+ str r7, [sp, #l_c]
+
+ ;align 4
+COEFF_LOOP
+ ldr r3, [r9, #detok_ptr_coef_bands_x]
+ ldr lr, [r9, #detok_coef_tree_ptr]
+ ;STALL
+ ldrb r3, [r3, r7] ; coef_bands_x[c]
+ ;STALL
+ ;STALL
+ add r0, r0, r3 ; Prob += coef_bands_x[c]
+
+get_token_loop
+ ldrb r2, [r0, +r1, asr #1] ; Prob[t >> 1]
+ mov r3, r6, lsl #8 ; range << 8
+ sub r3, r3, #256 ; (range << 8) - (1 << 8)
+ mov r10, #1 ; 1
+
+ smlawb r2, r3, r2, r10 ; split = 1 + (((range-1) * probability) >> 8)
+
+ ldrb r12, [r8] ; load cx data byte in stall slot : r8 = bufptr
+ ;++
+
+ subs r3, r4, r2, lsl #24 ; value-(split<<24): used later to calculate shift for NORMALIZE
+ addhs r1, r1, #1 ; t += 1
+ movhs r4, r3 ; value -= bigsplit (split << 24)
+ subhs r2, r6, r2 ; range -= split
+ ; movlo r6, r2 ; range = split
+
+ ldrsb r1, [lr, r1] ; t = onyx_coef_tree_ptr[t]
+
+; NORMALIZE
+ clz r3, r2 ; vp8dx_bitreader_norm[range] + 24
+ sub r3, r3, #24 ; vp8dx_bitreader_norm[range]
+ subs r5, r5, r3 ; count -= shift
+ mov r6, r2, lsl r3 ; range <<= shift
+ mov r4, r4, lsl r3 ; value <<= shift
+
+; if count <= 0, += BR_COUNT; value |= *bufptr++ << (BR_COUNT-count); BR_COUNT = 8, but need to upshift values by +16
+ addle r5, r5, #8 ; count += 8
+ rsble r3, r5, #24 ; 24 - count
+ addle r8, r8, #1 ; bufptr++
+ orrle r4, r4, r12, lsl r3 ; value |= *bufptr << shift + 16
+
+ cmp r1, #0 ; t ?= 0
+ bgt get_token_loop ; while (t > 0)
+
+ cmn r1, #c_dct_eob_token ; if(t == -DCT_EOB_TOKEN)
+ beq END_OF_BLOCK ; break
+
+ rsb lr, r1, #0 ; v = -t;
+
+ cmp lr, #4 ; if(v > FOUR_TOKEN)
+ ble SKIP_EXTRABITS
+
+ ldr r3, [r9, #detok_teb_base_ptr]
+ mov r11, #1 ; 1 in split = 1 + ... nope, v+= 1 << bits_count
+ add r7, r3, lr, lsl #4 ; detok_teb_base_ptr + (v << 4)
+
+ ldrsh lr, [r7, #tokenextrabits_min_val] ; v = teb_ptr->min_val
+ ldrsh r0, [r7, #tokenextrabits_length] ; bits_count = teb_ptr->Length
+
+extrabits_loop
+ add r3, r0, r7 ; &teb_ptr->Probs[bits_count]
+
+ ldrb r2, [r3, #4] ; probability. why +4?
+ mov r3, r6, lsl #8 ; range << 8
+ sub r3, r3, #256 ; range << 8 + 1 << 8
+
+ smlawb r2, r3, r2, r11 ; split = 1 + (((range-1) * probability) >> 8)
+
+ ldrb r12, [r8] ; *bufptr
+ ;++
+
+ subs r10, r4, r2, lsl #24 ; value - (split<<24)
+ movhs r4, r10 ; value = value - (split << 24)
+ subhs r2, r6, r2 ; range = range - split
+ addhs lr, lr, r11, lsl r0 ; v += ((UINT16)1<<bits_count)
+
+; NORMALIZE
+ clz r3, r2 ; shift - leading zeros in split
+ sub r3, r3, #24 ; don't count first 3 bytes
+ subs r5, r5, r3 ; count -= shift
+ mov r6, r2, lsl r3 ; range = range << shift
+ mov r4, r4, lsl r3 ; value <<= shift
+
+ addle r5, r5, #8 ; count += BR_COUNT
+ addle r8, r8, #1 ; bufptr++
+ rsble r3, r5, #24 ; BR_COUNT - count
+ orrle r4, r4, r12, lsl r3 ; value |= *bufptr << (BR_COUNT - count)
+
+ subs r0, r0, #1 ; bits_count --
+ bpl extrabits_loop
+
+
+SKIP_EXTRABITS
+ ldr r11, [sp, #l_qcoeff]
+ ldr r0, [sp, #l_coef_ptr] ; Prob = coef_probs
+
+ cmp r1, #0 ; check for nonzero token - if (t)
+ beq SKIP_EOB_CHECK ; if t is zero, we will skip the eob table chec
+
+ add r3, r6, #1 ; range + 1
+ mov r2, r3, lsr #1 ; split = (range + 1) >> 1
+
+ subs r3, r4, r2, lsl #24 ; value - (split<<24)
+ movhs r4, r3 ; value -= (split << 24)
+ subhs r2, r6, r2 ; range -= split
+ mvnhs r3, lr ; -v
+ addhs lr, r3, #1 ; v = (v ^ -1) + 1
+
+; NORMALIZE
+ clz r3, r2 ; leading 0s in split
+ sub r3, r3, #24 ; shift
+ subs r5, r5, r3 ; count -= shift
+ mov r6, r2, lsl r3 ; range <<= shift
+ mov r4, r4, lsl r3 ; value <<= shift
+ ldrleb r2, [r8], #1 ; *(bufptr++)
+ addle r5, r5, #8 ; count += 8
+ rsble r3, r5, #24 ; BR_COUNT - count
+ orrle r4, r4, r2, lsl r3 ; value |= *bufptr << (BR_COUNT - count)
+
+ add r0, r0, #11 ; Prob += ENTROPY_NODES (11)
+
+ cmn r1, #1 ; t < -ONE_TOKEN
+
+ addlt r0, r0, #11 ; Prob += ENTROPY_NODES (11)
+
+ mvn r1, #1 ; t = -1 ???? C is -2
+
+SKIP_EOB_CHECK
+ ldr r7, [sp, #l_c] ; c
+ ldr r3, [r9, #detok_scan]
+ add r1, r1, #2 ; t+= 2
+ cmp r7, #15 ; c should will be one higher
+
+ ldr r3, [r3, +r7, lsl #2] ; scan[c] this needs pre-inc c value
+ add r7, r7, #1 ; c++
+ add r3, r11, r3, lsl #1 ; qcoeff + scan[c]
+
+ str r7, [sp, #l_c] ; store c
+ strh lr, [r3] ; qcoef_ptr[scan[c]] = v
+
+ blt COEFF_LOOP
+
+ sub r7, r7, #1 ; if(t != -DCT_EOB_TOKEN) --c
+
+END_OF_BLOCK
+ ldr r3, [sp, #l_type] ; type
+ ldr r10, [sp, #l_coef_ptr] ; coef_ptr
+ ldr r0, [sp, #l_qcoeff] ; qcoeff
+ ldr r11, [sp, #l_i] ; i
+ ldr r12, [sp, #l_stop] ; stop
+
+ cmp r3, #0 ; type ?= 0
+ moveq r1, #1
+ movne r1, #0
+ add r3, r11, r9 ; detok + i
+
+ cmp r7, r1 ; c ?= !type
+ strb r7, [r3, #detok_eob] ; eob[i] = c
+
+ ldr r7, [sp, #l_l_ptr] ; l
+ ldr r2, [sp, #l_a_ptr] ; a
+ movne r3, #1 ; t
+ moveq r3, #0
+
+ add r0, r0, #32 ; qcoeff += 32 (16 * 2?)
+ add r11, r11, #1 ; i++
+ strb r3, [r7] ; *l = t
+ strb r3, [r2] ; *a = t
+ str r0, [sp, #l_qcoeff] ; qcoeff
+ str r11, [sp, #l_i] ; i
+
+ cmp r11, r12 ; i < stop
+ ldr r7, [sp, #l_type] ; type
+
+ blt BLOCK_LOOP
+
+ cmp r11, #25 ; i ?= 25
+ bne ln2_decode_mb_to
+
+ ldr r12, [r9, #detok_qcoeff_start_ptr]
+ ldr r10, [r9, #detok_coef_probs]
+ mov r7, #0 ; type/i = 0
+ mov r3, #16 ; stop = 16
+ str r12, [sp, #l_qcoeff] ; qcoeff_ptr = qcoeff_start_ptr
+ str r7, [sp, #l_i]
+ str r7, [sp, #l_type]
+ str r3, [sp, #l_stop]
+
+ str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type=0]
+
+ b BLOCK_LOOP
+
+ln2_decode_mb_to
+ cmp r11, #16 ; i ?= 16
+ bne ln1_decode_mb_to
+
+ mov r10, #detok_coef_probs
+ add r10, r10, #2*4 ; coef_probs[type]
+ ldr r10, [r9, r10] ; detok + detok_coef_probs[type]
+
+ mov r7, #2 ; type = 2
+ mov r3, #24 ; stop = 24
+
+ str r7, [sp, #l_type]
+ str r3, [sp, #l_stop]
+
+ str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type]
+ b BLOCK_LOOP
+
+ln1_decode_mb_to
+ ldr r2, [sp, #l_bc]
+ mov r0, #0
+ nop
+
+ str r8, [r2, #bool_decoder_user_buffer]
+ str r5, [r2, #bool_decoder_count]
+ str r4, [r2, #bool_decoder_value]
+ str r6, [r2, #bool_decoder_range]
+
+ add sp, sp, #l_stacksize
+ ldmia sp!, {r4 - r11, pc}
+
+ ENDP ; |vp8_decode_mb_tokens_v6|
+
+ END
diff --git a/vp8/decoder/arm/detokenize_arm.h b/vp8/decoder/arm/detokenize_arm.h
new file mode 100644
index 000000000..9bb19b6cf
--- /dev/null
+++ b/vp8/decoder/arm/detokenize_arm.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef DETOKENIZE_ARM_H
+#define DETOKENIZE_ARM_H
+
+#if HAVE_ARMV6
+#if CONFIG_ARM_ASM_DETOK
+void vp8_init_detokenizer(VP8D_COMP *dx);
+void vp8_decode_mb_tokens_v6(DETOK *detoken, int type);
+#endif
+#endif
+
+#endif
diff --git a/vp8/decoder/arm/detokenizearm_sjl.c b/vp8/decoder/arm/detokenizearm_sjl.c
deleted file mode 100644
index c714452a6..000000000
--- a/vp8/decoder/arm/detokenizearm_sjl.c
+++ /dev/null
@@ -1,730 +0,0 @@
-/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-
-#include "type_aliases.h"
-#include "blockd.h"
-#include "onyxd_int.h"
-#include "vpx_mem/vpx_mem.h"
-#include "vpx_ports/mem.h"
-
-#define BR_COUNT 8
-#define BOOL_DATA UINT8
-
-#define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
-//ALIGN16 UINT16 onyx_coef_bands_x[16] = { 0, 1*OCB_X, 2*OCB_X, 3*OCB_X, 6*OCB_X, 4*OCB_X, 5*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 7*OCB_X};
-DECLARE_ALIGNED(16, UINT8, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X};
-
-#define EOB_CONTEXT_NODE 0
-#define ZERO_CONTEXT_NODE 1
-#define ONE_CONTEXT_NODE 2
-#define LOW_VAL_CONTEXT_NODE 3
-#define TWO_CONTEXT_NODE 4
-#define THREE_CONTEXT_NODE 5
-#define HIGH_LOW_CONTEXT_NODE 6
-#define CAT_ONE_CONTEXT_NODE 7
-#define CAT_THREEFOUR_CONTEXT_NODE 8
-#define CAT_THREE_CONTEXT_NODE 9
-#define CAT_FIVE_CONTEXT_NODE 10
-
-
-
-
-DECLARE_ALIGNED(16, static const TOKENEXTRABITS, vp8d_token_extra_bits2[MAX_ENTROPY_TOKENS]) =
-{
- { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //ZERO_TOKEN
- { 1, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //ONE_TOKEN
- { 2, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //TWO_TOKEN
- { 3, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //THREE_TOKEN
- { 4, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //FOUR_TOKEN
- { 5, 0, { 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY1
- { 7, 1, { 145, 165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY2
- { 11, 2, { 140, 148, 173, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY3
- { 19, 3, { 135, 140, 155, 176, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY4
- { 35, 4, { 130, 134, 141, 157, 180, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY5
- { 67, 10, { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0 } }, //DCT_VAL_CATEGORY6
- { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, // EOB TOKEN
-};
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-DECLARE_ALIGNED(16, const UINT8, vp8_block2context_leftabove[25*3]) =
-{
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, //end of vp8_block2context
- 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 1, 1, 0, 0, 1, 1, 0, //end of vp8_block2left
- 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0 //end of vp8_block2above
-};
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
-{
- ENTROPY_CONTEXT **const A = x->above_context;
- ENTROPY_CONTEXT(* const L)[4] = x->left_context;
-
- ENTROPY_CONTEXT *a;
- ENTROPY_CONTEXT *l;
- int i;
-
- for (i = 0; i < 24; i++)
- {
-
- a = A[ vp8_block2context[i] ] + vp8_block2above[i];
- l = L[ vp8_block2context[i] ] + vp8_block2left[i];
-
- *a = *l = 0;
- }
-
- if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
- {
- a = A[Y2CONTEXT] + vp8_block2above[24];
- l = L[Y2CONTEXT] + vp8_block2left[24];
- *a = *l = 0;
- }
-
-
-}
-
-#define ONYXBLOCK2CONTEXT_OFFSET 0
-#define ONYXBLOCK2LEFT_OFFSET 25
-#define ONYXBLOCK2ABOVE_OFFSET 50
-
-DECLARE_ALIGNED(16, const static unsigned char, norm[128]) =
-{
- 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-};
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-void init_detokenizer(VP8D_COMP *dx)
-{
- const VP8_COMMON *const oc = & dx->common;
- MACROBLOCKD *x = & dx->mb;
-
- dx->detoken.norm_ptr = (unsigned char *)norm;
- dx->detoken.vp8_coef_tree_ptr = (vp8_tree_index *)vp8_coef_tree;
- dx->detoken.ptr_onyxblock2context_leftabove = (UINT8 *)vp8_block2context_leftabove;
- dx->detoken.ptr_onyx_coef_bands_x = vp8_coef_bands_x;
- dx->detoken.scan = (int *)vp8_default_zig_zag1d;
- dx->detoken.teb_base_ptr = (TOKENEXTRABITS *)vp8d_token_extra_bits2;
-
- dx->detoken.qcoeff_start_ptr = &x->qcoeff[0];
-
-
- dx->detoken.coef_probs[0] = (unsigned char *)(oc->fc.coef_probs [0] [ 0 ] [0]);
- dx->detoken.coef_probs[1] = (unsigned char *)(oc->fc.coef_probs [1] [ 0 ] [0]);
- dx->detoken.coef_probs[2] = (unsigned char *)(oc->fc.coef_probs [2] [ 0 ] [0]);
- dx->detoken.coef_probs[3] = (unsigned char *)(oc->fc.coef_probs [3] [ 0 ] [0]);
-
-}
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-
-//shift = norm[range]; \
-// shift = norm_ptr[range]; \
-
-#define NORMALIZE \
- /*if(range < 0x80)*/ \
- { \
- shift = detoken->norm_ptr[range]; \
- range <<= shift; \
- value <<= shift; \
- count -= shift; \
- if(count <= 0) \
- { \
- count += BR_COUNT ; \
- value |= (*bufptr) << (BR_COUNT-count); \
- bufptr++; \
- } \
- }
-#if 1
-#define DECODE_AND_APPLYSIGN(value_to_sign) \
- split = (range + 1) >> 1; \
- if ( (value >> 24) < split ) \
- { \
- range = split; \
- v= value_to_sign; \
- } \
- else \
- { \
- range = range-split; \
- value = value-(split<<24); \
- v = -value_to_sign; \
- } \
- range +=range; \
- value +=value; \
- if (!--count) \
- { \
- count = BR_COUNT; \
- value |= *bufptr; \
- bufptr++; \
- }
-
-#define DECODE_AND_BRANCH_IF_ZERO(probability,branch) \
- { \
- split = 1 + ((( probability*(range-1) ) )>> 8); \
- if ( (value >> 24) < split ) \
- { \
- range = split; \
- NORMALIZE \
- goto branch; \
- } \
- value -= (split<<24); \
- range = range - split; \
- NORMALIZE \
- }
-
-#define DECODE_AND_LOOP_IF_ZERO(probability,branch) \
- { \
- split = 1 + ((( probability*(range-1) ) ) >> 8); \
- if ( (value >> 24) < split ) \
- { \
- range = split; \
- NORMALIZE \
- Prob = coef_probs; \
- ++c; \
- Prob += vp8_coef_bands_x[c]; \
- goto branch; \
- } \
- value -= (split<<24); \
- range = range - split; \
- NORMALIZE \
- }
-
-#define DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val) \
- DECODE_AND_APPLYSIGN(val) \
- Prob = coef_probs + (ENTROPY_NODES*2); \
- if(c < 15){\
- qcoeff_ptr [ scan[c] ] = (INT16) v; \
- ++c; \
- goto DO_WHILE; }\
- qcoeff_ptr [ scan[15] ] = (INT16) v; \
- goto BLOCK_FINISHED;
-
-
-#define DECODE_EXTRABIT_AND_ADJUST_VAL(t,bits_count)\
- split = 1 + (((range-1) * vp8d_token_extra_bits2[t].Probs[bits_count]) >> 8); \
- if(value >= (split<<24))\
- {\
- range = range-split;\
- value = value-(split<<24);\
- val += ((UINT16)1<<bits_count);\
- }\
- else\
- {\
- range = split;\
- }\
- NORMALIZE
-#endif
-
-#if 0
-int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
-{
- ENTROPY_CONTEXT **const A = x->above_context;
- ENTROPY_CONTEXT(* const L)[4] = x->left_context;
- const VP8_COMMON *const oc = & dx->common;
-
- BOOL_DECODER *bc = x->current_bc;
-
- ENTROPY_CONTEXT *a;
- ENTROPY_CONTEXT *l;
- int i;
-
- int eobtotal = 0;
-
- register int count;
-
- BOOL_DATA *bufptr;
- register unsigned int range;
- register unsigned int value;
- const int *scan;
- register unsigned int shift;
- UINT32 split;
- INT16 *qcoeff_ptr;
-
- UINT8 *coef_probs;
- int type;
- int stop;
- INT16 val, bits_count;
- INT16 c;
- INT16 t;
- INT16 v;
- vp8_prob *Prob;
-
- //int *scan;
- type = 3;
- i = 0;
- stop = 16;
-
- if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
- {
- i = 24;
- stop = 24;
- type = 1;
- qcoeff_ptr = &x->qcoeff[24*16];
- scan = vp8_default_zig_zag1d;
- eobtotal -= 16;
- }
- else
- {
- scan = vp8_default_zig_zag1d;
- qcoeff_ptr = &x->qcoeff[0];
- }
-
- count = bc->count;
- range = bc->range;
- value = bc->value;
- bufptr = &bc->buffer[bc->pos];
-
-
- coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
-
-BLOCK_LOOP:
- a = A[ vp8_block2context[i] ] + vp8_block2above[i];
- l = L[ vp8_block2context[i] ] + vp8_block2left[i];
- c = (INT16)(!type);
-
- VP8_COMBINEENTROPYCONTEXTS(t, *a, *l);
- Prob = coef_probs;
- Prob += t * ENTROPY_NODES;
-
-DO_WHILE:
- Prob += vp8_coef_bands_x[c];
- DECODE_AND_BRANCH_IF_ZERO(Prob[EOB_CONTEXT_NODE], BLOCK_FINISHED);
-
-CHECK_0_:
- DECODE_AND_LOOP_IF_ZERO(Prob[ZERO_CONTEXT_NODE], CHECK_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[ONE_CONTEXT_NODE], ONE_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[LOW_VAL_CONTEXT_NODE], LOW_VAL_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[HIGH_LOW_CONTEXT_NODE], HIGH_LOW_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREEFOUR_CONTEXT_NODE], CAT_THREEFOUR_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_FIVE_CONTEXT_NODE], CAT_FIVE_CONTEXT_NODE_0_);
- val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].min_val;
- bits_count = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].Length;
-
- do
- {
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY6, bits_count);
- bits_count -- ;
- }
- while (bits_count >= 0);
-
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_FIVE_CONTEXT_NODE_0_:
- val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY5].min_val;
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 4);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 3);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 2);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 1);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_THREEFOUR_CONTEXT_NODE_0_:
- DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREE_CONTEXT_NODE], CAT_THREE_CONTEXT_NODE_0_);
- val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY4].min_val;
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 3);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 2);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 1);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_THREE_CONTEXT_NODE_0_:
- val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY3].min_val;
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 2);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 1);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-HIGH_LOW_CONTEXT_NODE_0_:
- DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_ONE_CONTEXT_NODE], CAT_ONE_CONTEXT_NODE_0_);
-
- val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY2].min_val;
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 1);
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_ONE_CONTEXT_NODE_0_:
- val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY1].min_val;
- DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY1, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-LOW_VAL_CONTEXT_NODE_0_:
- DECODE_AND_BRANCH_IF_ZERO(Prob[TWO_CONTEXT_NODE], TWO_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[THREE_CONTEXT_NODE], THREE_CONTEXT_NODE_0_);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(4);
-
-THREE_CONTEXT_NODE_0_:
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(3);
-
-TWO_CONTEXT_NODE_0_:
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(2);
-
-ONE_CONTEXT_NODE_0_:
- DECODE_AND_APPLYSIGN(1);
- Prob = coef_probs + ENTROPY_NODES;
-
- if (c < 15)
- {
- qcoeff_ptr [ scan[c] ] = (INT16) v;
- ++c;
- goto DO_WHILE;
- }
-
- qcoeff_ptr [ scan[15] ] = (INT16) v;
-BLOCK_FINISHED:
- t = ((x->Block[i].eob = c) != !type); // any nonzero data?
- eobtotal += x->Block[i].eob;
- *a = *l = t;
- qcoeff_ptr += 16;
-
- i++;
-
- if (i < stop)
- goto BLOCK_LOOP;
-
- if (i == 25)
- {
- scan = vp8_default_zig_zag1d;//x->scan_order1d;
- type = 0;
- i = 0;
- stop = 16;
- coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
- qcoeff_ptr = &x->qcoeff[0];
- goto BLOCK_LOOP;
- }
-
- if (i == 16)
- {
- type = 2;
- coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
- stop = 24;
- goto BLOCK_LOOP;
- }
-
- bc->count = count;
- bc->value = value;
- bc->range = range;
- bc->pos = bufptr - bc->buffer;
- return eobtotal;
-
-}
-//#endif
-#else
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-#if 0
-//uses relative offsets
-
-const vp8_tree_index vp8_coef_tree_x[ 22] = /* corresponding _CONTEXT_NODEs */
-{
- -DCT_EOB_TOKEN, 1, /* 0 = EOB */
- -ZERO_TOKEN, 1, /* 1 = ZERO */
- -ONE_TOKEN, 1, /* 2 = ONE */
- 2, 5, /* 3 = LOW_VAL */
- -TWO_TOKEN, 1, /* 4 = TWO */
- -THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */
- 2, 3, /* 6 = HIGH_LOW */
- -DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */
- 2, 3, /* 8 = CAT_THREEFOUR */
- -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */
- -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */
-};
-#endif
-
-#define _SCALEDOWN 8 //16 //8
-
-int vp8_decode_mb_tokens_v5(DETOK *detoken, int type);
-
-int vp8_decode_mb_tokens_v5_c(DETOK *detoken, int type)
-{
- BOOL_DECODER *bc = detoken->current_bc;
-
- ENTROPY_CONTEXT *a;
- ENTROPY_CONTEXT *l;
- int i;
-
- register int count;
-
- BOOL_DATA *bufptr;
- register unsigned int range;
- register unsigned int value;
- register unsigned int shift;
- UINT32 split;
- INT16 *qcoeff_ptr;
-
- UINT8 *coef_probs;
-// int type;
- int stop;
- INT16 c;
- INT16 t;
- INT16 v;
- vp8_prob *Prob;
-
-
-
-// type = 3;
- i = 0;
- stop = 16;
- qcoeff_ptr = detoken->qcoeff_start_ptr;
-
-// if( detoken->mode != B_PRED && detoken->mode != SPLITMV)
- if (type == 1)
- {
- i += 24;
- stop += 8; //24;
-// type = 1;
- qcoeff_ptr += 24 * 16;
-// eobtotal-=16;
- }
-
- count = bc->count;
- range = bc->range;
- value = bc->value;
- bufptr = &bc->buffer[bc->pos];
-
-
- coef_probs = detoken->coef_probs[type]; //(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
-
-BLOCK_LOOP:
- a = detoken->A[ detoken->ptr_onyxblock2context_leftabove[i] ];
- l = detoken->L[ detoken->ptr_onyxblock2context_leftabove[i] ];
- c = !type;
- a += detoken->ptr_onyxblock2context_leftabove[i + ONYXBLOCK2ABOVE_OFFSET];
- l += detoken->ptr_onyxblock2context_leftabove[i + ONYXBLOCK2LEFT_OFFSET];
-
- //#define ONYX_COMBINEENTROPYCONTEXTS( Dest, A, B) \
- //Dest = ((A)!=0) + ((B)!=0);
-
- VP8_COMBINEENTROPYCONTEXTS(t, *a, *l);
-
- Prob = coef_probs;
- Prob += t * ENTROPY_NODES;
- t = 0;
-
- do
- {
-
- {
-// onyx_tree_index * onyx_coef_tree_ptr = onyx_coef_tree_x;
-
- Prob += detoken->ptr_onyx_coef_bands_x[c];
-
- GET_TOKEN_START:
-
- do
- {
- split = 1 + (((range - 1) * (Prob[t>>1])) >> 8);
-
- if (value >> 24 >= split)
- {
- range = range - split;
- value = value - (split << 24);
- t += 1;
-
- //used to eliminate else branch
- split = range;
- }
-
- range = split;
-
- t = detoken->vp8_coef_tree_ptr[ t ];
-
- NORMALIZE
-
- }
- while (t > 0) ;
- }
- GET_TOKEN_STOP:
-
- if (t == -DCT_EOB_TOKEN)
- {
- break;
- }
-
- v = -t;
-
- if (v > FOUR_TOKEN)
- {
- INT16 bits_count;
- TOKENEXTRABITS *teb_ptr;
-
-// teb_ptr = &onyxd_token_extra_bits2[t];
-// teb_ptr = &onyxd_token_extra_bits2[v];
- teb_ptr = &detoken->teb_base_ptr[v];
-
-
- v = teb_ptr->min_val;
- bits_count = teb_ptr->Length;
-
- do
- {
- split = 1 + (((range - 1) * teb_ptr->Probs[bits_count]) >> _SCALEDOWN);
-
- if ((value >> 24) >= split)
- {
- range = range - split;
- value = value - (split << 24);
- v += ((UINT16)1 << bits_count);
-
- //used to eliminate else branch
- split = range;
- }
-
- range = split;
-
- NORMALIZE
-
- bits_count -- ;
- }
- while (bits_count >= 0);
- }
-
- Prob = coef_probs;
-
- if (t)
- {
- split = 1 + (((range - 1) * vp8_prob_half) >> 8);
-
- if ((value >> 24) >= split)
- {
- range = range - split;
- value = value - (split << 24);
- v = (v ^ -1) + 1; /* negate w/out conditionals */
-
- //used to eliminate else branch
- split = range;
- }
-
- range = split;
-
- NORMALIZE
- Prob += ENTROPY_NODES;
-
- if (t < -ONE_TOKEN)
- Prob += ENTROPY_NODES;
-
- t = -2;
- }
-
- //if t is zero, we will skip the eob table check
- t += 2;
- qcoeff_ptr [detoken->scan [c] ] = (INT16) v;
-
- }
- while (++c < 16);
-
- if (t != -DCT_EOB_TOKEN)
- {
- --c;
- }
-
- t = ((detoken->eob[i] = c) != !type); // any nonzero data?
-// eobtotal += detoken->eob[i];
- *a = *l = t;
- qcoeff_ptr += 16;
-
- i++;
-
- if (i < stop)
- goto BLOCK_LOOP;
-
- if (i == 25)
- {
- type = 0;
- i = 0;
- stop = 16;
-// coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
- coef_probs = detoken->coef_probs[type]; //(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
- qcoeff_ptr = detoken->qcoeff_start_ptr;
- goto BLOCK_LOOP;
- }
-
- if (i == 16)
- {
- type = 2;
-// coef_probs =(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
- coef_probs = detoken->coef_probs[type]; //(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
- stop = 24;
- goto BLOCK_LOOP;
- }
-
- bc->count = count;
- bc->value = value;
- bc->range = range;
- bc->pos = bufptr - bc->buffer;
- return 0;
-}
-//#if 0
-int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
-{
-// const ONYX_COMMON * const oc = & dx->common;
- int eobtotal = 0;
- int i, type;
- /*
- dx->detoken.norm_ptr = norm;
- dx->detoken.onyx_coef_tree_ptr = onyx_coef_tree;
- dx->detoken.ptr_onyxblock2context_leftabove = ONYXBLOCK2CONTEXT_LEFTABOVE;
- dx->detoken.ptr_onyx_coef_bands_x = onyx_coef_bands_x;
- dx->detoken.scan = default_zig_zag1d;
- dx->detoken.teb_base_ptr = onyxd_token_extra_bits2;
-
- dx->detoken.qcoeff_start_ptr = &x->qcoeff[0];
-
- dx->detoken.A = x->above_context;
- dx->detoken.L = x->left_context;
-
- dx->detoken.coef_probs[0] = (unsigned char *)( oc->fc.coef_probs [0] [ 0 ] [0]);
- dx->detoken.coef_probs[1] = (unsigned char *)( oc->fc.coef_probs [1] [ 0 ] [0]);
- dx->detoken.coef_probs[2] = (unsigned char *)( oc->fc.coef_probs [2] [ 0 ] [0]);
- dx->detoken.coef_probs[3] = (unsigned char *)( oc->fc.coef_probs [3] [ 0 ] [0]);
- */
-
- dx->detoken.current_bc = x->current_bc;
- dx->detoken.A = x->above_context;
- dx->detoken.L = x->left_context;
-
- type = 3;
-
- if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
- {
- type = 1;
- eobtotal -= 16;
- }
-
- vp8_decode_mb_tokens_v5(&dx->detoken, type);
-
- for (i = 0; i < 25; i++)
- {
- x->Block[i].eob = dx->detoken.eob[i];
- eobtotal += dx->detoken.eob[i];
- }
-
- return eobtotal;
-}
-#endif
diff --git a/vp8/decoder/arm/detokenizearm_v6.asm b/vp8/decoder/arm/detokenizearm_v6.asm
deleted file mode 100644
index 4d87ee5bd..000000000
--- a/vp8/decoder/arm/detokenizearm_v6.asm
+++ /dev/null
@@ -1,364 +0,0 @@
-;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |vp8_decode_mb_tokens_v5|
-
- AREA |.text|, CODE, READONLY ; name this block of code
-
- INCLUDE vpx_asm_offsets.asm
-
-l_qcoeff EQU 0
-l_i EQU 4
-l_type EQU 8
-l_stop EQU 12
-l_c EQU 16
-l_l_ptr EQU 20
-l_a_ptr EQU 24
-l_bc EQU 28
-l_coef_ptr EQU 32
-l_stacksize EQU 64
-
-
-;; constant offsets -- these should be created at build time
-c_onyxblock2left_offset EQU 25
-c_onyxblock2above_offset EQU 50
-c_entropy_nodes EQU 11
-c_dct_eob_token EQU 11
-
-|vp8_decode_mb_tokens_v5| PROC
- stmdb sp!, {r4 - r11, lr}
- sub sp, sp, #l_stacksize
- mov r7, r1
- mov r9, r0 ;DETOK *detoken
-
- ldr r1, [r9, #detok_current_bc]
- ldr r0, [r9, #detok_qcoeff_start_ptr]
- mov r11, #0
- mov r3, #0x10
-
- cmp r7, #1
- addeq r11, r11, #24
- addeq r3, r3, #8
- addeq r0, r0, #3, 24
-
- str r0, [sp, #l_qcoeff]
- str r11, [sp, #l_i]
- str r7, [sp, #l_type]
- str r3, [sp, #l_stop]
- str r1, [sp, #l_bc]
-
- add lr, r9, r7, lsl #2
-
- ldr r2, [r1, #bool_decoder_buffer]
- ldr r3, [r1, #bool_decoder_pos]
-
- ldr r10, [lr, #detok_coef_probs]
- ldr r5, [r1, #bool_decoder_count]
- ldr r6, [r1, #bool_decoder_range]
- ldr r4, [r1, #bool_decoder_value]
- add r8, r2, r3
-
- str r10, [sp, #l_coef_ptr]
-
-
- ;align 4
-BLOCK_LOOP
- ldr r3, [r9, #detok_ptr_onyxblock2context_leftabove]
- ldr r2, [r9, #DETOK_A]
- ldr r1, [r9, #DETOK_L]
- ldrb r12, [r3, +r11] ; detoken->ptr_onyxblock2context_leftabove[i]
-
- cmp r7, #0 ; check type
- moveq r7, #1
- movne r7, #0
-
- ldr r0, [r2, +r12, lsl #2] ; a
- add r1, r1, r12, lsl #4
- add r3, r3, r11
-
- ldrb r2, [r3, #c_onyxblock2above_offset]
- ldrb r3, [r3, #c_onyxblock2left_offset]
- mov lr, #c_entropy_nodes
-;; ;++
-
- ldr r2, [r0, +r2, lsl #2]!
- add r3, r1, r3, lsl #2
- str r3, [sp, #l_l_ptr]
- ldr r3, [r3]
-
- cmp r2, #0
- movne r2, #1
- cmp r3, #0
- addne r2, r2, #1
-
- str r0, [sp, #l_a_ptr]
- smlabb r0, r2, lr, r10
- mov r1, #0 ; t = 0
- str r7, [sp, #l_c]
-
- ;align 4
-COEFF_LOOP
- ldr r3, [r9, #detok_ptr_onyx_coef_bands_x]
- ldr lr, [r9, #detok_onyx_coef_tree_ptr]
-
-;;the following two lines are used if onyx_coef_bands_x is UINT16
-;; add r3, r3, r7, lsl #1
-;; ldrh r3, [r3]
-
-;;the following line is used if onyx_coef_bands_x is UINT8
- ldrb r3, [r7, +r3]
-
-
-;; ;++
-;; pld [r8]
- ;++
- add r0, r0, r3
-
- ;align 4
-get_token_loop
- ldrb r2, [r0, +r1, asr #1]
- mov r3, r6, lsl #8
- sub r3, r3, #256 ;split = 1 + (((range-1) * probability) >> 8)
- mov r10, #1
-
- smlawb r2, r3, r2, r10
- ldrb r12, [r8] ;load cx data byte in stall slot
- ;++
-
- subs r3, r4, r2, lsl #24 ;x = value-(split<<24)
- addhs r1, r1, #1 ;t += 1
- movhs r4, r3 ;update value
- subhs r2, r6, r2 ;range = range - split
- movlo r6, r2
-
-;;; ldrsbhs r1, [r1, +lr]
- ldrsb r1, [r1, +lr]
-
-
-;; use branch for short pipelines ???
-;; cmp r2, #0x80
-;; bcs |$LN22@decode_mb_to|
-
- clz r3, r2
- sub r3, r3, #24
- subs r5, r5, r3
- mov r6, r2, lsl r3
- mov r4, r4, lsl r3
-
-;; use branch for short pipelines ???
-;; bgt |$LN22@decode_mb_to|
-
- addle r5, r5, #8
- rsble r3, r5, #8
- addle r8, r8, #1
- orrle r4, r4, r12, lsl r3
-
-;;|$LN22@decode_mb_to|
-
- cmp r1, #0
- bgt get_token_loop
-
- cmn r1, #c_dct_eob_token ;if(t == -DCT_EOB_TOKEN)
- beq END_OF_BLOCK
-
- rsb lr, r1, #0 ;v = -t;
-
- cmp lr, #4 ;if(v > FOUR_TOKEN)
- ble SKIP_EXTRABITS
-
- ldr r3, [r9, #detok_teb_base_ptr]
- mov r11, #1
- add r7, r3, lr, lsl #4
-
- ldrsh lr, [r7, #tokenextrabits_min_val];v = teb_ptr->min_val
- ldrsh r0, [r7, #tokenextrabits_length];bits_count = teb_ptr->Length
-
-extrabits_loop
- add r3, r0, r7
-
- ldrb r2, [r3, #4]
- mov r3, r6, lsl #8
- sub r3, r3, #256 ;split = 1 + (((range-1) * probability) >> 8)
- mov r10, #1
-
- smlawb r2, r3, r2, r10
- ldrb r12, [r8]
- ;++
-
- subs r10, r4, r2, lsl #24 ;x = value-(split<<24)
- movhs r4, r10 ;update value
- subhs r2, r6, r2 ;range = range - split
- addhs lr, lr, r11, lsl r0 ;v += ((UINT16)1<<bits_count)
- movlo r6, r2 ;range = split
-
-
-;; use branch for short pipelines ???
-;; cmp r2, #0x80
-;; bcs |$LN10@decode_mb_to|
-
- clz r3, r2
- sub r3, r3, #24
- subs r5, r5, r3
- mov r6, r2, lsl r3 ;range
- mov r4, r4, lsl r3 ;value
-
- addle r5, r5, #8
- addle r8, r8, #1
- rsble r3, r5, #8
- orrle r4, r4, r12, lsl r3
-
-;;|$LN10@decode_mb_to|
- subs r0, r0, #1
- bpl extrabits_loop
-
-
-SKIP_EXTRABITS
- ldr r11, [sp, #l_qcoeff]
- ldr r0, [sp, #l_coef_ptr]
-
- cmp r1, #0 ;check for nonzero token
- beq SKIP_EOB_CHECK ;if t is zero, we will skip the eob table chec
-
- sub r3, r6, #1 ;range - 1
- ;++
- mov r3, r3, lsl #7 ; *= onyx_prob_half (128)
- ;++
- mov r3, r3, lsr #8
- add r2, r3, #1 ;split
-
- subs r3, r4, r2, lsl #24 ;x = value-(split<<24)
- movhs r4, r3 ;update value
- subhs r2, r6, r2 ;range = range - split
- mvnhs r3, lr
- addhs lr, r3, #1 ;v = (v ^ -1) + 1
- movlo r6, r2 ;range = split
-
-;; use branch for short pipelines ???
-;; cmp r2, #0x80
-;; bcs |$LN6@decode_mb_to|
-
- clz r3, r2
- sub r3, r3, #24
- subs r5, r5, r3
- mov r6, r2, lsl r3
- mov r4, r4, lsl r3
- ldrleb r2, [r8], #1
- addle r5, r5, #8
- rsble r3, r5, #8
- orrle r4, r4, r2, lsl r3
-
-;;|$LN6@decode_mb_to|
- add r0, r0, #0xB
-
- cmn r1, #1
-
- addlt r0, r0, #0xB
-
- mvn r1, #1
-
-SKIP_EOB_CHECK
- ldr r7, [sp, #l_c]
- ldr r3, [r9, #detok_scan]
- add r1, r1, #2
- cmp r7, #(0x10 - 1) ;assume one less for now.... increment below
-
- ldr r3, [r3, +r7, lsl #2]
- add r7, r7, #1
- add r3, r11, r3, lsl #1
-
- str r7, [sp, #l_c]
- strh lr, [r3]
-
- blt COEFF_LOOP
-
- sub r7, r7, #1 ;if(t != -DCT_EOB_TOKEN) --c
-
-END_OF_BLOCK
- ldr r3, [sp, #l_type]
- ldr r10, [sp, #l_coef_ptr]
- ldr r0, [sp, #l_qcoeff]
- ldr r11, [sp, #l_i]
- ldr r12, [sp, #l_stop]
-
- cmp r3, #0
- moveq r1, #1
- movne r1, #0
- add r3, r11, r9
-
- cmp r7, r1
- strb r7, [r3, #detok_eob]
-
- ldr r7, [sp, #l_l_ptr]
- ldr r2, [sp, #l_a_ptr]
- movne r3, #1
- moveq r3, #0
-
- add r0, r0, #0x20
- add r11, r11, #1
- str r3, [r7]
- str r3, [r2]
- str r0, [sp, #l_qcoeff]
- str r11, [sp, #l_i]
-
- cmp r11, r12 ;i >= stop ?
- ldr r7, [sp, #l_type]
- mov lr, #0xB
-
- blt BLOCK_LOOP
-
- cmp r11, #0x19
- bne ln2_decode_mb_to
-
- ldr r12, [r9, #detok_qcoeff_start_ptr]
- ldr r10, [r9, #detok_coef_probs]
- mov r7, #0
- mov r3, #0x10
- str r12, [sp, #l_qcoeff]
- str r7, [sp, #l_i]
- str r7, [sp, #l_type]
- str r3, [sp, #l_stop]
-
- str r10, [sp, #l_coef_ptr]
-
- b BLOCK_LOOP
-
-ln2_decode_mb_to
- cmp r11, #0x10
- bne ln1_decode_mb_to
-
- ldr r10, [r9, #0x30]
-
- mov r7, #2
- mov r3, #0x18
-
- str r7, [sp, #l_type]
- str r3, [sp, #l_stop]
-
- str r10, [sp, #l_coef_ptr]
- b BLOCK_LOOP
-
-ln1_decode_mb_to
- ldr r2, [sp, #l_bc]
- mov r0, #0
- nop
-
- ldr r3, [r2, #bool_decoder_buffer]
- str r5, [r2, #bool_decoder_count]
- str r4, [r2, #bool_decoder_value]
- sub r3, r8, r3
- str r3, [r2, #bool_decoder_pos]
- str r6, [r2, #bool_decoder_range]
-
- add sp, sp, #l_stacksize
- ldmia sp!, {r4 - r11, pc}
-
- ENDP ; |vp8_decode_mb_tokens_v5|
-
- END
diff --git a/vp8/decoder/arm/dsystemdependent.c b/vp8/decoder/arm/dsystemdependent.c
deleted file mode 100644
index 455c83a9c..000000000
--- a/vp8/decoder/arm/dsystemdependent.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "blockd.h"
-#include "pragmas.h"
-#include "postproc.h"
-#include "dboolhuff.h"
-#include "dequantize.h"
-#include "onyxd_int.h"
-
-void vp8_dmachine_specific_config(VP8D_COMP *pbi)
-{
-#if CONFIG_RUNTIME_CPU_DETECT
- pbi->mb.rtcd = &pbi->common.rtcd;
-#if HAVE_ARMV7
- pbi->dequant.block = vp8_dequantize_b_neon;
- pbi->dequant.idct = vp8_dequant_idct_neon;
- pbi->dequant.idct_dc = vp8_dequant_dc_idct_neon;
- pbi->dboolhuff.start = vp8dx_start_decode_c;
- pbi->dboolhuff.stop = vp8dx_stop_decode_c;
- pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
- pbi->dboolhuff.debool = vp8dx_decode_bool_c;
- pbi->dboolhuff.devalue = vp8dx_decode_value_c;
-
-#elif HAVE_ARMV6
- pbi->dequant.block = vp8_dequantize_b_v6;
- pbi->dequant.idct = vp8_dequant_idct_v6;
- pbi->dequant.idct_dc = vp8_dequant_dc_idct_v6;
- pbi->dboolhuff.start = vp8dx_start_decode_c;
- pbi->dboolhuff.stop = vp8dx_stop_decode_c;
- pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
- pbi->dboolhuff.debool = vp8dx_decode_bool_c;
- pbi->dboolhuff.devalue = vp8dx_decode_value_c;
-#endif
-#endif
-}
diff --git a/vp8/decoder/arm/neon/dboolhuff_neon.asm b/vp8/decoder/arm/neon/dboolhuff_neon.asm
index 7ec62a3d8..ff3ffda97 100644
--- a/vp8/decoder/arm/neon/dboolhuff_neon.asm
+++ b/vp8/decoder/arm/neon/dboolhuff_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/decoder/arm/neon/dequantidct_neon.asm b/vp8/decoder/arm/neon/dequant_idct_neon.asm
index bba4d5dfb..1923be42a 100644
--- a/vp8/decoder/arm/neon/dequantidct_neon.asm
+++ b/vp8/decoder/arm/neon/dequant_idct_neon.asm
@@ -1,29 +1,41 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp8_dequant_idct_neon|
+ EXPORT |vp8_dequant_idct_add_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp8_dequant_idct_c(short *input, short *dq, short *output, int pitch);
+;void vp8_dequant_idct_neon(short *input, short *dq, unsigned char *pred,
+; unsigned char *dest, int pitch, int stride)
; r0 short *input,
; r1 short *dq,
-; r2 short *output,
-; r3 int pitch,
-|vp8_dequant_idct_neon| PROC
+; r2 unsigned char *pred
+; r3 unsigned char *dest
+; sp int pitch
+; sp+4 int stride
+
+|vp8_dequant_idct_add_neon| PROC
vld1.16 {q3, q4}, [r0]
vld1.16 {q5, q6}, [r1]
+ ldr r1, [sp] ; pitch
+ vld1.32 {d14[0]}, [r2], r1
+ vld1.32 {d14[1]}, [r2], r1
+ vld1.32 {d15[0]}, [r2], r1
+ vld1.32 {d15[1]}, [r2]
+
+ ldr r1, [sp, #4] ; stride
- ldr r12, _didct_coeff_
+ ldr r12, _CONSTANTS_
vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon
vmul.i16 q2, q4, q6
@@ -41,14 +53,9 @@
vshr.s16 q3, q3, #1
vshr.s16 q4, q4, #1
- vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
+ vqadd.s16 q3, q3, q2
vqadd.s16 q4, q4, q2
- ;d6 - c1:temp1
- ;d7 - d1:temp2
- ;d8 - d1:temp1
- ;d9 - c1:temp2
-
vqsub.s16 d10, d6, d9 ;c1
vqadd.s16 d11, d7, d8 ;d1
@@ -77,7 +84,7 @@
vshr.s16 q3, q3, #1
vshr.s16 q4, q4, #1
- vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
+ vqadd.s16 q3, q3, q2
vqadd.s16 q4, q4, q2
vqsub.s16 d10, d6, d9 ;c1
@@ -95,34 +102,29 @@
vrshr.s16 d4, d4, #3
vrshr.s16 d5, d5, #3
- add r1, r2, r3
- add r12, r1, r3
- add r0, r12, r3
-
vtrn.32 d2, d4
vtrn.32 d3, d5
vtrn.16 d2, d3
vtrn.16 d4, d5
- vst1.16 {d2}, [r2]
- vst1.16 {d3}, [r1]
- vst1.16 {d4}, [r12]
- vst1.16 {d5}, [r0]
+ vaddw.u8 q1, q1, d14
+ vaddw.u8 q2, q2, d15
- bx lr
+ vqmovun.s16 d0, q1
+ vqmovun.s16 d1, q2
+
+ vst1.32 {d0[0]}, [r3], r1
+ vst1.32 {d0[1]}, [r3], r1
+ vst1.32 {d1[0]}, [r3], r1
+ vst1.32 {d1[1]}, [r3]
- ENDP
+ bx lr
-;-----------------
- AREA didct4x4_dat, DATA, READWRITE ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_didct_coeff_
- DCD didct_coeff
-didct_coeff
- DCD 0x4e7b4e7b, 0x8a8c8a8c
+ ENDP ; |vp8_dequant_idct_add_neon|
-;20091, 20091, 35468, 35468
+; Constant Pool
+_CONSTANTS_ DCD cospi8sqrt2minus1
+cospi8sqrt2minus1 DCD 0x4e7b4e7b
+sinpi8sqrt2 DCD 0x8a8c8a8c
END
diff --git a/vp8/decoder/arm/neon/dequantdcidct_neon.asm b/vp8/decoder/arm/neon/dequantdcidct_neon.asm
deleted file mode 100644
index 3392f2c2b..000000000
--- a/vp8/decoder/arm/neon/dequantdcidct_neon.asm
+++ /dev/null
@@ -1,133 +0,0 @@
-;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |vp8_dequant_dc_idct_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp8_dequant_dc_idct_c(short *input, short *dq, short *output, int pitch, int Dc);
-; r0 short *input,
-; r1 short *dq,
-; r2 short *output,
-; r3 int pitch,
-; (stack) int Dc
-|vp8_dequant_dc_idct_neon| PROC
- vld1.16 {q3, q4}, [r0]
- vld1.16 {q5, q6}, [r1]
-
- ldr r1, [sp] ;load Dc from stack
-
- ldr r12, _dcidct_coeff_
-
- vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon
- vmul.i16 q2, q4, q6
-
- vmov.16 d2[0], r1
-
-;|short_idct4x4llm_neon| PROC
- vld1.16 {d0}, [r12]
- vswp d3, d4 ;q2(vp[4] vp[12])
-
- vqdmulh.s16 q3, q2, d0[2]
- vqdmulh.s16 q4, q2, d0[0]
-
- vqadd.s16 d12, d2, d3 ;a1
- vqsub.s16 d13, d2, d3 ;b1
-
- vshr.s16 q3, q3, #1
- vshr.s16 q4, q4, #1
-
- vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
- vqadd.s16 q4, q4, q2
-
- ;d6 - c1:temp1
- ;d7 - d1:temp2
- ;d8 - d1:temp1
- ;d9 - c1:temp2
-
- vqsub.s16 d10, d6, d9 ;c1
- vqadd.s16 d11, d7, d8 ;d1
-
- vqadd.s16 d2, d12, d11
- vqadd.s16 d3, d13, d10
- vqsub.s16 d4, d13, d10
- vqsub.s16 d5, d12, d11
-
- vtrn.32 d2, d4
- vtrn.32 d3, d5
- vtrn.16 d2, d3
- vtrn.16 d4, d5
-
-; memset(input, 0, 32) -- 32bytes
- vmov.i16 q14, #0
-
- vswp d3, d4
- vqdmulh.s16 q3, q2, d0[2]
- vqdmulh.s16 q4, q2, d0[0]
-
- vqadd.s16 d12, d2, d3 ;a1
- vqsub.s16 d13, d2, d3 ;b1
-
- vmov q15, q14
-
- vshr.s16 q3, q3, #1
- vshr.s16 q4, q4, #1
-
- vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
- vqadd.s16 q4, q4, q2
-
- vqsub.s16 d10, d6, d9 ;c1
- vqadd.s16 d11, d7, d8 ;d1
-
- vqadd.s16 d2, d12, d11
- vqadd.s16 d3, d13, d10
- vqsub.s16 d4, d13, d10
- vqsub.s16 d5, d12, d11
-
- vst1.16 {q14, q15}, [r0]
-
- vrshr.s16 d2, d2, #3
- vrshr.s16 d3, d3, #3
- vrshr.s16 d4, d4, #3
- vrshr.s16 d5, d5, #3
-
- add r1, r2, r3
- add r12, r1, r3
- add r0, r12, r3
-
- vtrn.32 d2, d4
- vtrn.32 d3, d5
- vtrn.16 d2, d3
- vtrn.16 d4, d5
-
- vst1.16 {d2}, [r2]
- vst1.16 {d3}, [r1]
- vst1.16 {d4}, [r12]
- vst1.16 {d5}, [r0]
-
- bx lr
-
- ENDP
-
-;-----------------
- AREA dcidct4x4_dat, DATA, READWRITE ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_dcidct_coeff_
- DCD dcidct_coeff
-dcidct_coeff
- DCD 0x4e7b4e7b, 0x8a8c8a8c
-
-;20091, 20091, 35468, 35468
-
- END
diff --git a/vp8/decoder/arm/neon/dequantizeb_neon.asm b/vp8/decoder/arm/neon/dequantizeb_neon.asm
index 1bde94607..c8e0c31f2 100644
--- a/vp8/decoder/arm/neon/dequantizeb_neon.asm
+++ b/vp8/decoder/arm/neon/dequantizeb_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/decoder/arm/neon/idct_blk_neon.c b/vp8/decoder/arm/neon/idct_blk_neon.c
new file mode 100644
index 000000000..fe4f2e0d4
--- /dev/null
+++ b/vp8/decoder/arm/neon/idct_blk_neon.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "idct.h"
+#include "dequantize.h"
+
+/* place these declarations here because we don't want to maintain them
+ * outside of this scope
+ */
+void idct_dequant_dc_full_2x_neon
+ (short *input, short *dq, unsigned char *pre, unsigned char *dst,
+ int stride, short *dc);
+void idct_dequant_dc_0_2x_neon
+ (short *dc, unsigned char *pre, unsigned char *dst, int stride);
+void idct_dequant_full_2x_neon
+ (short *q, short *dq, unsigned char *pre, unsigned char *dst,
+ int pitch, int stride);
+void idct_dequant_0_2x_neon
+ (short *q, short dq, unsigned char *pre, int pitch,
+ unsigned char *dst, int stride);
+
+void vp8_dequant_dc_idct_add_y_block_neon
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs, short *dc)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ if (((short *)eobs)[0] & 0xfefe)
+ idct_dequant_dc_full_2x_neon (q, dq, pre, dst, stride, dc);
+ else
+ idct_dequant_dc_0_2x_neon(dc, pre, dst, stride);
+
+ if (((short *)eobs)[1] & 0xfefe)
+ idct_dequant_dc_full_2x_neon (q+32, dq, pre+8, dst+8, stride, dc+2);
+ else
+ idct_dequant_dc_0_2x_neon(dc+2, pre+8, dst+8, stride);
+
+ q += 64;
+ dc += 4;
+ pre += 64;
+ dst += 4*stride;
+ eobs += 4;
+ }
+}
+
+void vp8_dequant_idct_add_y_block_neon
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ if (((short *)eobs)[0] & 0xfefe)
+ idct_dequant_full_2x_neon (q, dq, pre, dst, 16, stride);
+ else
+ idct_dequant_0_2x_neon (q, dq[0], pre, 16, dst, stride);
+
+ if (((short *)eobs)[1] & 0xfefe)
+ idct_dequant_full_2x_neon (q+32, dq, pre+8, dst+8, 16, stride);
+ else
+ idct_dequant_0_2x_neon (q+32, dq[0], pre+8, 16, dst+8, stride);
+
+ q += 64;
+ pre += 64;
+ dst += 4*stride;
+ eobs += 4;
+ }
+}
+
+void vp8_dequant_idct_add_uv_block_neon
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+ if (((short *)eobs)[0] & 0xfefe)
+ idct_dequant_full_2x_neon (q, dq, pre, dstu, 8, stride);
+ else
+ idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstu, stride);
+
+ q += 32;
+ pre += 32;
+ dstu += 4*stride;
+
+ if (((short *)eobs)[1] & 0xfefe)
+ idct_dequant_full_2x_neon (q, dq, pre, dstu, 8, stride);
+ else
+ idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstu, stride);
+
+ q += 32;
+ pre += 32;
+
+ if (((short *)eobs)[2] & 0xfefe)
+ idct_dequant_full_2x_neon (q, dq, pre, dstv, 8, stride);
+ else
+ idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstv, stride);
+
+ q += 32;
+ pre += 32;
+ dstv += 4*stride;
+
+ if (((short *)eobs)[3] & 0xfefe)
+ idct_dequant_full_2x_neon (q, dq, pre, dstv, 8, stride);
+ else
+ idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstv, stride);
+}
diff --git a/vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm
new file mode 100644
index 000000000..456f8e1d4
--- /dev/null
+++ b/vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm
@@ -0,0 +1,79 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+ EXPORT |idct_dequant_0_2x_neon|
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+;void idct_dequant_0_2x_neon(short *q, short dq, unsigned char *pre,
+; int pitch, unsigned char *dst, int stride);
+; r0 *q
+; r1 dq
+; r2 *pre
+; r3 pitch
+; sp *dst
+; sp+4 stride
+|idct_dequant_0_2x_neon| PROC
+ add r12, r2, #4
+ vld1.32 {d2[0]}, [r2], r3
+ vld1.32 {d2[1]}, [r2], r3
+ vld1.32 {d4[0]}, [r2], r3
+ vld1.32 {d4[1]}, [r2]
+ vld1.32 {d8[0]}, [r12], r3
+ vld1.32 {d8[1]}, [r12], r3
+ vld1.32 {d10[0]}, [r12], r3
+ vld1.32 {d10[1]}, [r12]
+
+ ldrh r12, [r0] ; lo q
+ ldrh r2, [r0, #32] ; hi q
+ mov r3, #0
+ strh r3, [r0]
+ strh r3, [r0, #32]
+
+ sxth r12, r12 ; lo
+ mul r0, r12, r1
+ add r0, r0, #4
+ asr r0, r0, #3
+ vdup.16 q0, r0
+ sxth r2, r2 ; hi
+ mul r0, r2, r1
+ add r0, r0, #4
+ asr r0, r0, #3
+ vdup.16 q3, r0
+
+ vaddw.u8 q1, q0, d2 ; lo
+ vaddw.u8 q2, q0, d4
+ vaddw.u8 q4, q3, d8 ; hi
+ vaddw.u8 q5, q3, d10
+
+ ldr r2, [sp] ; dst
+ ldr r3, [sp, #4] ; stride
+
+ vqmovun.s16 d2, q1 ; lo
+ vqmovun.s16 d4, q2
+ vqmovun.s16 d8, q4 ; hi
+ vqmovun.s16 d10, q5
+
+ add r0, r2, #4
+ vst1.32 {d2[0]}, [r2], r3 ; lo
+ vst1.32 {d2[1]}, [r2], r3
+ vst1.32 {d4[0]}, [r2], r3
+ vst1.32 {d4[1]}, [r2]
+ vst1.32 {d8[0]}, [r0], r3 ; hi
+ vst1.32 {d8[1]}, [r0], r3
+ vst1.32 {d10[0]}, [r0], r3
+ vst1.32 {d10[1]}, [r0]
+
+ bx lr
+
+ ENDP ; |idct_dequant_0_2x_neon|
+ END
diff --git a/vp8/decoder/arm/neon/idct_dequant_dc_0_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_dc_0_2x_neon.asm
new file mode 100644
index 000000000..0dc036acb
--- /dev/null
+++ b/vp8/decoder/arm/neon/idct_dequant_dc_0_2x_neon.asm
@@ -0,0 +1,69 @@
+;
+; Copyright (c) 2010 The Webm project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+ EXPORT |idct_dequant_dc_0_2x_neon|
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+;void idct_dequant_dc_0_2x_neon(short *dc, unsigned char *pre,
+; unsigned char *dst, int stride);
+; r0 *dc
+; r1 *pre
+; r2 *dst
+; r3 stride
+|idct_dequant_dc_0_2x_neon| PROC
+ ldr r0, [r0] ; *dc
+ mov r12, #16
+
+ vld1.32 {d2[0]}, [r1], r12 ; lo
+ vld1.32 {d2[1]}, [r1], r12
+ vld1.32 {d4[0]}, [r1], r12
+ vld1.32 {d4[1]}, [r1]
+ sub r1, r1, #44
+ vld1.32 {d8[0]}, [r1], r12 ; hi
+ vld1.32 {d8[1]}, [r1], r12
+ vld1.32 {d10[0]}, [r1], r12
+ vld1.32 {d10[1]}, [r1]
+
+ sxth r1, r0 ; lo *dc
+ add r1, r1, #4
+ asr r1, r1, #3
+ vdup.16 q0, r1
+ sxth r0, r0, ror #16 ; hi *dc
+ add r0, r0, #4
+ asr r0, r0, #3
+ vdup.16 q3, r0
+
+ vaddw.u8 q1, q0, d2 ; lo
+ vaddw.u8 q2, q0, d4
+ vaddw.u8 q4, q3, d8 ; hi
+ vaddw.u8 q5, q3, d10
+
+ vqmovun.s16 d2, q1 ; lo
+ vqmovun.s16 d4, q2
+ vqmovun.s16 d8, q4 ; hi
+ vqmovun.s16 d10, q5
+
+ add r0, r2, #4
+ vst1.32 {d2[0]}, [r2], r3 ; lo
+ vst1.32 {d2[1]}, [r2], r3
+ vst1.32 {d4[0]}, [r2], r3
+ vst1.32 {d4[1]}, [r2]
+ vst1.32 {d8[0]}, [r0], r3 ; hi
+ vst1.32 {d8[1]}, [r0], r3
+ vst1.32 {d10[0]}, [r0], r3
+ vst1.32 {d10[1]}, [r0]
+
+ bx lr
+
+ ENDP ;|idct_dequant_dc_0_2x_neon|
+ END
diff --git a/vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm
new file mode 100644
index 000000000..ad4364adc
--- /dev/null
+++ b/vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm
@@ -0,0 +1,206 @@
+;
+; Copyright (c) 2010 The Webm project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |idct_dequant_dc_full_2x_neon|
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+;void idct_dequant_dc_full_2x_neon(short *q, short *dq, unsigned char *pre,
+; unsigned char *dst, int stride, short *dc);
+; r0 *q,
+; r1 *dq,
+; r2 *pre
+; r3 *dst
+; sp stride
+; sp+4 *dc
+|idct_dequant_dc_full_2x_neon| PROC
+ vld1.16 {q0, q1}, [r1] ; dq (same l/r)
+ vld1.16 {q2, q3}, [r0] ; l q
+ mov r1, #16 ; pitch
+ add r0, r0, #32
+ vld1.16 {q4, q5}, [r0] ; r q
+ add r12, r2, #4
+ ; interleave the predictors
+ vld1.32 {d28[0]}, [r2], r1 ; l pre
+ vld1.32 {d28[1]}, [r12], r1 ; r pre
+ vld1.32 {d29[0]}, [r2], r1
+ vld1.32 {d29[1]}, [r12], r1
+ vld1.32 {d30[0]}, [r2], r1
+ vld1.32 {d30[1]}, [r12], r1
+ vld1.32 {d31[0]}, [r2]
+ ldr r1, [sp, #4]
+ vld1.32 {d31[1]}, [r12]
+
+ ldr r2, _CONSTANTS_
+
+ ldrh r12, [r1], #2 ; lo *dc
+ ldrh r1, [r1] ; hi *dc
+
+ ; dequant: q[i] = q[i] * dq[i]
+ vmul.i16 q2, q2, q0
+ vmul.i16 q3, q3, q1
+ vmul.i16 q4, q4, q0
+ vmul.i16 q5, q5, q1
+
+ ; move dc up to neon and overwrite first element
+ vmov.16 d4[0], r12
+ vmov.16 d8[0], r1
+
+ vld1.16 {d0}, [r2]
+
+ ; q2: l0r0 q3: l8r8
+ ; q4: l4r4 q5: l12r12
+ vswp d5, d8
+ vswp d7, d10
+
+ ; _CONSTANTS_ * 4,12 >> 16
+ ; q6: 4 * sinpi : c1/temp1
+ ; q7: 12 * sinpi : d1/temp2
+ ; q8: 4 * cospi
+ ; q9: 12 * cospi
+ vqdmulh.s16 q6, q4, d0[2] ; sinpi8sqrt2
+ vqdmulh.s16 q7, q5, d0[2]
+ vqdmulh.s16 q8, q4, d0[0] ; cospi8sqrt2minus1
+ vqdmulh.s16 q9, q5, d0[0]
+
+ vqadd.s16 q10, q2, q3 ; a1 = 0 + 8
+ vqsub.s16 q11, q2, q3 ; b1 = 0 - 8
+
+ ; vqdmulh only accepts signed values. this was a problem because
+ ; our constant had the high bit set, and was treated as a negative value.
+ ; vqdmulh also doubles the value before it shifts by 16. we need to
+ ; compensate for this. in the case of sinpi8sqrt2, the lowest bit is 0,
+ ; so we can shift the constant without losing precision. this avoids
+ ; shift again afterward, but also avoids the sign issue. win win!
+ ; for cospi8sqrt2minus1 the lowest bit is 1, so we lose precision if we
+ ; pre-shift it
+ vshr.s16 q8, q8, #1
+ vshr.s16 q9, q9, #1
+
+ ; q4: 4 + 4 * cospi : d1/temp1
+ ; q5: 12 + 12 * cospi : c1/temp2
+ vqadd.s16 q4, q4, q8
+ vqadd.s16 q5, q5, q9
+
+ ; c1 = temp1 - temp2
+ ; d1 = temp1 + temp2
+ vqsub.s16 q2, q6, q5
+ vqadd.s16 q3, q4, q7
+
+ ; [0]: a1+d1
+ ; [1]: b1+c1
+ ; [2]: b1-c1
+ ; [3]: a1-d1
+ vqadd.s16 q4, q10, q3
+ vqadd.s16 q5, q11, q2
+ vqsub.s16 q6, q11, q2
+ vqsub.s16 q7, q10, q3
+
+ ; rotate
+ vtrn.32 q4, q6
+ vtrn.32 q5, q7
+ vtrn.16 q4, q5
+ vtrn.16 q6, q7
+ ; idct loop 2
+ ; q4: l 0, 4, 8,12 r 0, 4, 8,12
+ ; q5: l 1, 5, 9,13 r 1, 5, 9,13
+ ; q6: l 2, 6,10,14 r 2, 6,10,14
+ ; q7: l 3, 7,11,15 r 3, 7,11,15
+
+ ; q8: 1 * sinpi : c1/temp1
+ ; q9: 3 * sinpi : d1/temp2
+ ; q10: 1 * cospi
+ ; q11: 3 * cospi
+ vqdmulh.s16 q8, q5, d0[2] ; sinpi8sqrt2
+ vqdmulh.s16 q9, q7, d0[2]
+ vqdmulh.s16 q10, q5, d0[0] ; cospi8sqrt2minus1
+ vqdmulh.s16 q11, q7, d0[0]
+
+ vqadd.s16 q2, q4, q6 ; a1 = 0 + 2
+ vqsub.s16 q3, q4, q6 ; b1 = 0 - 2
+
+ ; see note on shifting above
+ vshr.s16 q10, q10, #1
+ vshr.s16 q11, q11, #1
+
+ ; q10: 1 + 1 * cospi : d1/temp1
+ ; q11: 3 + 3 * cospi : c1/temp2
+ vqadd.s16 q10, q5, q10
+ vqadd.s16 q11, q7, q11
+
+ ; q8: c1 = temp1 - temp2
+ ; q9: d1 = temp1 + temp2
+ vqsub.s16 q8, q8, q11
+ vqadd.s16 q9, q10, q9
+
+ ; a1+d1
+ ; b1+c1
+ ; b1-c1
+ ; a1-d1
+ vqadd.s16 q4, q2, q9
+ vqadd.s16 q5, q3, q8
+ vqsub.s16 q6, q3, q8
+ vqsub.s16 q7, q2, q9
+
+ ; +4 >> 3 (rounding)
+ vrshr.s16 q4, q4, #3 ; lo
+ vrshr.s16 q5, q5, #3
+ vrshr.s16 q6, q6, #3 ; hi
+ vrshr.s16 q7, q7, #3
+
+ vtrn.32 q4, q6
+ vtrn.32 q5, q7
+ vtrn.16 q4, q5
+ vtrn.16 q6, q7
+
+ ; adding pre
+ ; input is still packed. pre was read interleaved
+ vaddw.u8 q4, q4, d28
+ vaddw.u8 q5, q5, d29
+ vaddw.u8 q6, q6, d30
+ vaddw.u8 q7, q7, d31
+
+ vmov.i16 q14, #0
+ vmov q15, q14
+ vst1.16 {q14, q15}, [r0] ; write over high input
+ sub r0, r0, #32
+ vst1.16 {q14, q15}, [r0] ; write over low input
+
+ ;saturate and narrow
+ vqmovun.s16 d0, q4 ; lo
+ vqmovun.s16 d1, q5
+ vqmovun.s16 d2, q6 ; hi
+ vqmovun.s16 d3, q7
+
+ ldr r1, [sp] ; stride
+ add r2, r3, #4 ; hi
+ vst1.32 {d0[0]}, [r3], r1 ; lo
+ vst1.32 {d0[1]}, [r2], r1 ; hi
+ vst1.32 {d1[0]}, [r3], r1
+ vst1.32 {d1[1]}, [r2], r1
+ vst1.32 {d2[0]}, [r3], r1
+ vst1.32 {d2[1]}, [r2], r1
+ vst1.32 {d3[0]}, [r3]
+ vst1.32 {d3[1]}, [r2]
+
+ bx lr
+
+ ENDP ; |idct_dequant_dc_full_2x_neon|
+
+; Constant Pool
+_CONSTANTS_ DCD cospi8sqrt2minus1
+cospi8sqrt2minus1 DCD 0x4e7b
+; because the lowest bit in 0x8a8c is 0, we can pre-shift this
+sinpi8sqrt2 DCD 0x4546
+
+ END
diff --git a/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm
new file mode 100644
index 000000000..85fff11b3
--- /dev/null
+++ b/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm
@@ -0,0 +1,198 @@
+;
+; Copyright (c) 2010 The Webm project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |idct_dequant_full_2x_neon|
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+;void idct_dequant_full_2x_neon(short *q, short *dq, unsigned char *pre,
+; unsigned char *dst, int pitch, int stride);
+; r0 *q,
+; r1 *dq,
+; r2 *pre
+; r3 *dst
+; sp pitch
+; sp+4 stride
+|idct_dequant_full_2x_neon| PROC
+ vld1.16 {q0, q1}, [r1] ; dq (same l/r)
+ vld1.16 {q2, q3}, [r0] ; l q
+ ldr r1, [sp] ; pitch
+ add r0, r0, #32
+ vld1.16 {q4, q5}, [r0] ; r q
+ add r12, r2, #4
+ ; interleave the predictors
+ vld1.32 {d28[0]}, [r2], r1 ; l pre
+ vld1.32 {d28[1]}, [r12], r1 ; r pre
+ vld1.32 {d29[0]}, [r2], r1
+ vld1.32 {d29[1]}, [r12], r1
+ vld1.32 {d30[0]}, [r2], r1
+ vld1.32 {d30[1]}, [r12], r1
+ vld1.32 {d31[0]}, [r2]
+ vld1.32 {d31[1]}, [r12]
+
+ ldr r2, _CONSTANTS_
+
+ ; dequant: q[i] = q[i] * dq[i]
+ vmul.i16 q2, q2, q0
+ vmul.i16 q3, q3, q1
+ vmul.i16 q4, q4, q0
+ vmul.i16 q5, q5, q1
+
+ vld1.16 {d0}, [r2]
+
+ ; q2: l0r0 q3: l8r8
+ ; q4: l4r4 q5: l12r12
+ vswp d5, d8
+ vswp d7, d10
+
+ ; _CONSTANTS_ * 4,12 >> 16
+ ; q6: 4 * sinpi : c1/temp1
+ ; q7: 12 * sinpi : d1/temp2
+ ; q8: 4 * cospi
+ ; q9: 12 * cospi
+ vqdmulh.s16 q6, q4, d0[2] ; sinpi8sqrt2
+ vqdmulh.s16 q7, q5, d0[2]
+ vqdmulh.s16 q8, q4, d0[0] ; cospi8sqrt2minus1
+ vqdmulh.s16 q9, q5, d0[0]
+
+ vqadd.s16 q10, q2, q3 ; a1 = 0 + 8
+ vqsub.s16 q11, q2, q3 ; b1 = 0 - 8
+
+ ; vqdmulh only accepts signed values. this was a problem because
+ ; our constant had the high bit set, and was treated as a negative value.
+ ; vqdmulh also doubles the value before it shifts by 16. we need to
+ ; compensate for this. in the case of sinpi8sqrt2, the lowest bit is 0,
+ ; so we can shift the constant without losing precision. this avoids
+ ; shift again afterward, but also avoids the sign issue. win win!
+ ; for cospi8sqrt2minus1 the lowest bit is 1, so we lose precision if we
+ ; pre-shift it
+ vshr.s16 q8, q8, #1
+ vshr.s16 q9, q9, #1
+
+ ; q4: 4 + 4 * cospi : d1/temp1
+ ; q5: 12 + 12 * cospi : c1/temp2
+ vqadd.s16 q4, q4, q8
+ vqadd.s16 q5, q5, q9
+
+ ; c1 = temp1 - temp2
+ ; d1 = temp1 + temp2
+ vqsub.s16 q2, q6, q5
+ vqadd.s16 q3, q4, q7
+
+ ; [0]: a1+d1
+ ; [1]: b1+c1
+ ; [2]: b1-c1
+ ; [3]: a1-d1
+ vqadd.s16 q4, q10, q3
+ vqadd.s16 q5, q11, q2
+ vqsub.s16 q6, q11, q2
+ vqsub.s16 q7, q10, q3
+
+ ; rotate
+ vtrn.32 q4, q6
+ vtrn.32 q5, q7
+ vtrn.16 q4, q5
+ vtrn.16 q6, q7
+ ; idct loop 2
+ ; q4: l 0, 4, 8,12 r 0, 4, 8,12
+ ; q5: l 1, 5, 9,13 r 1, 5, 9,13
+ ; q6: l 2, 6,10,14 r 2, 6,10,14
+ ; q7: l 3, 7,11,15 r 3, 7,11,15
+
+ ; q8: 1 * sinpi : c1/temp1
+ ; q9: 3 * sinpi : d1/temp2
+ ; q10: 1 * cospi
+ ; q11: 3 * cospi
+ vqdmulh.s16 q8, q5, d0[2] ; sinpi8sqrt2
+ vqdmulh.s16 q9, q7, d0[2]
+ vqdmulh.s16 q10, q5, d0[0] ; cospi8sqrt2minus1
+ vqdmulh.s16 q11, q7, d0[0]
+
+ vqadd.s16 q2, q4, q6 ; a1 = 0 + 2
+ vqsub.s16 q3, q4, q6 ; b1 = 0 - 2
+
+ ; see note on shifting above
+ vshr.s16 q10, q10, #1
+ vshr.s16 q11, q11, #1
+
+ ; q10: 1 + 1 * cospi : d1/temp1
+ ; q11: 3 + 3 * cospi : c1/temp2
+ vqadd.s16 q10, q5, q10
+ vqadd.s16 q11, q7, q11
+
+ ; q8: c1 = temp1 - temp2
+ ; q9: d1 = temp1 + temp2
+ vqsub.s16 q8, q8, q11
+ vqadd.s16 q9, q10, q9
+
+ ; a1+d1
+ ; b1+c1
+ ; b1-c1
+ ; a1-d1
+ vqadd.s16 q4, q2, q9
+ vqadd.s16 q5, q3, q8
+ vqsub.s16 q6, q3, q8
+ vqsub.s16 q7, q2, q9
+
+ ; +4 >> 3 (rounding)
+ vrshr.s16 q4, q4, #3 ; lo
+ vrshr.s16 q5, q5, #3
+ vrshr.s16 q6, q6, #3 ; hi
+ vrshr.s16 q7, q7, #3
+
+ vtrn.32 q4, q6
+ vtrn.32 q5, q7
+ vtrn.16 q4, q5
+ vtrn.16 q6, q7
+
+ ; adding pre
+ ; input is still packed. pre was read interleaved
+ vaddw.u8 q4, q4, d28
+ vaddw.u8 q5, q5, d29
+ vaddw.u8 q6, q6, d30
+ vaddw.u8 q7, q7, d31
+
+ vmov.i16 q14, #0
+ vmov q15, q14
+ vst1.16 {q14, q15}, [r0] ; write over high input
+ sub r0, r0, #32
+ vst1.16 {q14, q15}, [r0] ; write over low input
+
+ ;saturate and narrow
+ vqmovun.s16 d0, q4 ; lo
+ vqmovun.s16 d1, q5
+ vqmovun.s16 d2, q6 ; hi
+ vqmovun.s16 d3, q7
+
+ ldr r1, [sp, #4] ; stride
+ add r2, r3, #4 ; hi
+ vst1.32 {d0[0]}, [r3], r1 ; lo
+ vst1.32 {d0[1]}, [r2], r1 ; hi
+ vst1.32 {d1[0]}, [r3], r1
+ vst1.32 {d1[1]}, [r2], r1
+ vst1.32 {d2[0]}, [r3], r1
+ vst1.32 {d2[1]}, [r2], r1
+ vst1.32 {d3[0]}, [r3]
+ vst1.32 {d3[1]}, [r2]
+
+ bx lr
+
+ ENDP ; |idct_dequant_full_2x_neon|
+
+; Constant Pool
+_CONSTANTS_ DCD cospi8sqrt2minus1
+cospi8sqrt2minus1 DCD 0x4e7b
+; because the lowest bit in 0x8a8c is 0, we can pre-shift this
+sinpi8sqrt2 DCD 0x4546
+
+ END
diff --git a/vp8/decoder/dboolhuff.c b/vp8/decoder/dboolhuff.c
index 442054ed3..57cba16a3 100644
--- a/vp8/decoder/dboolhuff.c
+++ b/vp8/decoder/dboolhuff.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -12,7 +13,7 @@
#include "vpx_ports/mem.h"
#include "vpx_mem/vpx_mem.h"
-DECLARE_ALIGNED(16, const unsigned int, vp8dx_bitreader_norm[256]) =
+DECLARE_ALIGNED(16, const unsigned char, vp8dx_bitreader_norm[256]) =
{
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -25,86 +26,41 @@ DECLARE_ALIGNED(16, const unsigned int, vp8dx_bitreader_norm[256]) =
};
-static void copy_in(BOOL_DECODER *br, unsigned int to_write)
-{
- if (to_write > br->user_buffer_sz)
- to_write = br->user_buffer_sz;
-
- memcpy(br->write_ptr, br->user_buffer, to_write);
- br->user_buffer += to_write;
- br->user_buffer_sz -= to_write;
- br->write_ptr = br_ptr_advance(br->write_ptr, to_write);
-}
-
int vp8dx_start_decode_c(BOOL_DECODER *br, const unsigned char *source,
unsigned int source_sz)
{
- br->lowvalue = 0;
+ br->user_buffer_end = source+source_sz;
+ br->user_buffer = source;
+ br->value = 0;
+ br->count = -8;
br->range = 255;
- br->count = 0;
- br->user_buffer = source;
- br->user_buffer_sz = source_sz;
if (source_sz && !source)
return 1;
- /* Allocate the ring buffer backing store with alignment equal to the
- * buffer size*2 so that a single pointer can be used for wrapping rather
- * than a pointer+offset.
- */
- br->decode_buffer = vpx_memalign(VP8_BOOL_DECODER_SZ * 2,
- VP8_BOOL_DECODER_SZ);
-
- if (!br->decode_buffer)
- return 1;
-
/* Populate the buffer */
- br->read_ptr = br->decode_buffer;
- br->write_ptr = br->decode_buffer;
- copy_in(br, VP8_BOOL_DECODER_SZ);
+ vp8dx_bool_decoder_fill_c(br);
- /* Read the first byte */
- br->value = (*br->read_ptr++) << 8;
return 0;
}
void vp8dx_bool_decoder_fill_c(BOOL_DECODER *br)
{
- int left, right;
-
- /* Find available room in the buffer */
- left = 0;
- right = br->read_ptr - br->write_ptr;
-
- if (right < 0)
- {
- /* Read pointer is behind the write pointer. We can write from the
- * write pointer to the end of the buffer.
- */
- right = VP8_BOOL_DECODER_SZ - (br->write_ptr - br->decode_buffer);
- left = br->read_ptr - br->decode_buffer;
- }
-
- if (right + left < 128)
- return;
-
- if (right)
- copy_in(br, right);
-
- if (left)
- {
- br->write_ptr = br->decode_buffer;
- copy_in(br, left);
- }
-
-}
-
-
-void vp8dx_stop_decode_c(BOOL_DECODER *bc)
-{
- vpx_free(bc->decode_buffer);
- bc->decode_buffer = 0;
+ const unsigned char *bufptr;
+ const unsigned char *bufend;
+ VP8_BD_VALUE value;
+ int count;
+ bufend = br->user_buffer_end;
+ bufptr = br->user_buffer;
+ value = br->value;
+ count = br->count;
+
+ VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
+
+ br->user_buffer = bufptr;
+ br->value = value;
+ br->count = count;
}
#if 0
@@ -119,13 +75,18 @@ void vp8dx_stop_decode_c(BOOL_DECODER *bc)
int vp8dx_decode_bool_c(BOOL_DECODER *br, int probability)
{
unsigned int bit=0;
+ VP8_BD_VALUE value;
unsigned int split;
- unsigned int bigsplit;
- register unsigned int range = br->range;
- register unsigned int value = br->value;
+ VP8_BD_VALUE bigsplit;
+ int count;
+ unsigned int range;
+
+ value = br->value;
+ count = br->count;
+ range = br->range;
split = 1 + (((range-1) * probability) >> 8);
- bigsplit = (split<<8);
+ bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
range = split;
if(value >= bigsplit)
@@ -143,21 +104,16 @@ int vp8dx_decode_bool_c(BOOL_DECODER *br, int probability)
}*/
{
- int count = br->count;
register unsigned int shift = vp8dx_bitreader_norm[range];
range <<= shift;
value <<= shift;
count -= shift;
- if(count <= 0)
- {
- value |= (*br->read_ptr) << (-count);
- br->read_ptr = br_ptr_advance(br->read_ptr, 1);
- count += 8 ;
- }
- br->count = count;
}
br->value = value;
+ br->count = count;
br->range = range;
+ if (count < 0)
+ vp8dx_bool_decoder_fill_c(br);
return bit;
}
diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h
index 772dbdb2e..c851aa7e5 100644
--- a/vp8/decoder/dboolhuff.h
+++ b/vp8/decoder/dboolhuff.h
@@ -1,60 +1,41 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
#ifndef DBOOLHUFF_H
#define DBOOLHUFF_H
+#include <stddef.h>
+#include <limits.h>
#include "vpx_ports/config.h"
#include "vpx_ports/mem.h"
#include "vpx/vpx_integer.h"
-/* Size of the bool decoder backing storage
- *
- * This size was chosen to be greater than the worst case encoding of a
- * single macroblock. This was calcluated as follows (python):
- *
- * def max_cost(prob):
- * return max(prob_costs[prob], prob_costs[255-prob]) / 256;
- *
- * tree_nodes_cost = 7 * max_cost(255)
- * extra_bits_cost = sum([max_cost(bit) for bit in extra_bits])
- * sign_bit_cost = max_cost(128)
- * total_cost = tree_nodes_cost + extra_bits_cost + sign_bit_cost
- *
- * where the prob_costs table was taken from the C vp8_prob_cost table in
- * boolhuff.c and the extra_bits table was taken from the 11 extrabits for
- * a category 6 token as defined in vp8d_token_extra_bits2/detokenize.c
- *
- * This equation produced a maximum of 79 bits per coefficient. Scaling up
- * to the macroblock level:
- *
- * 79 bits/coeff * 16 coeff/block * 25 blocks/macroblock = 31600 b/mb
- *
- * 4096 bytes = 32768 bits > 31600
- */
-#define VP8_BOOL_DECODER_SZ 4096
-#define VP8_BOOL_DECODER_MASK (VP8_BOOL_DECODER_SZ-1)
-#define VP8_BOOL_DECODER_PTR_MASK (~(uintptr_t)(VP8_BOOL_DECODER_SZ))
+typedef size_t VP8_BD_VALUE;
+
+# define VP8_BD_VALUE_SIZE ((int)sizeof(VP8_BD_VALUE)*CHAR_BIT)
+/*This is meant to be a large, positive constant that can still be efficiently
+ loaded as an immediate (on platforms like ARM, for example).
+ Even relatively modest values like 100 would work fine.*/
+# define VP8_LOTS_OF_BITS (0x40000000)
+
+
struct vp8_dboolhuff_rtcd_vtable;
typedef struct
{
- unsigned int lowvalue;
- unsigned int range;
- unsigned int value;
- int count;
+ const unsigned char *user_buffer_end;
const unsigned char *user_buffer;
- unsigned int user_buffer_sz;
- unsigned char *decode_buffer;
- const unsigned char *read_ptr;
- unsigned char *write_ptr;
+ VP8_BD_VALUE value;
+ int count;
+ unsigned int range;
#if CONFIG_RUNTIME_CPU_DETECT
struct vp8_dboolhuff_rtcd_vtable *rtcd;
#endif
@@ -62,10 +43,9 @@ typedef struct
#define prototype_dbool_start(sym) int sym(BOOL_DECODER *br, \
const unsigned char *source, unsigned int source_sz)
-#define prototype_dbool_stop(sym) void sym(BOOL_DECODER *bc)
#define prototype_dbool_fill(sym) void sym(BOOL_DECODER *br)
#define prototype_dbool_debool(sym) int sym(BOOL_DECODER *br, int probability)
-#define prototype_dbool_devalue(sym) int sym(BOOL_DECODER *br, int bits);
+#define prototype_dbool_devalue(sym) int sym(BOOL_DECODER *br, int bits)
#if ARCH_ARM
#include "arm/dboolhuff_arm.h"
@@ -75,10 +55,6 @@ typedef struct
#define vp8_dbool_start vp8dx_start_decode_c
#endif
-#ifndef vp8_dbool_stop
-#define vp8_dbool_stop vp8dx_stop_decode_c
-#endif
-
#ifndef vp8_dbool_fill
#define vp8_dbool_fill vp8dx_bool_decoder_fill_c
#endif
@@ -92,48 +68,35 @@ typedef struct
#endif
extern prototype_dbool_start(vp8_dbool_start);
-extern prototype_dbool_stop(vp8_dbool_stop);
extern prototype_dbool_fill(vp8_dbool_fill);
extern prototype_dbool_debool(vp8_dbool_debool);
extern prototype_dbool_devalue(vp8_dbool_devalue);
typedef prototype_dbool_start((*vp8_dbool_start_fn_t));
-typedef prototype_dbool_stop((*vp8_dbool_stop_fn_t));
typedef prototype_dbool_fill((*vp8_dbool_fill_fn_t));
typedef prototype_dbool_debool((*vp8_dbool_debool_fn_t));
typedef prototype_dbool_devalue((*vp8_dbool_devalue_fn_t));
typedef struct vp8_dboolhuff_rtcd_vtable {
vp8_dbool_start_fn_t start;
- vp8_dbool_stop_fn_t stop;
vp8_dbool_fill_fn_t fill;
vp8_dbool_debool_fn_t debool;
vp8_dbool_devalue_fn_t devalue;
} vp8_dboolhuff_rtcd_vtable_t;
-// There are no processor-specific versions of these
-// functions right now. Disable RTCD to avoid using
-// function pointers which gives a speed boost
-//#ifdef ENABLE_RUNTIME_CPU_DETECT
-//#define DBOOLHUFF_INVOKE(ctx,fn) (ctx)->fn
-//#define IF_RTCD(x) (x)
-//#else
+/* There are no processor-specific versions of these
+ * functions right now. Disable RTCD to avoid using
+ * function pointers which gives a speed boost
+ */
+/*#ifdef ENABLE_RUNTIME_CPU_DETECT
+#define DBOOLHUFF_INVOKE(ctx,fn) (ctx)->fn
+#define IF_RTCD(x) (x)
+#else*/
#define DBOOLHUFF_INVOKE(ctx,fn) vp8_dbool_##fn
#define IF_RTCD(x) NULL
-//#endif
-
-static unsigned char *br_ptr_advance(const unsigned char *_ptr,
- unsigned int n)
-{
- uintptr_t ptr = (uintptr_t)_ptr;
-
- ptr += n;
- ptr &= VP8_BOOL_DECODER_PTR_MASK;
-
- return (void *)ptr;
-}
+/*#endif*/
-DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
+DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
/* wrapper functions to hide RTCD. static means inline means hopefully no
* penalty
@@ -146,12 +109,34 @@ static int vp8dx_start_decode(BOOL_DECODER *br,
#endif
return DBOOLHUFF_INVOKE(rtcd, start)(br, source, source_sz);
}
-static void vp8dx_stop_decode(BOOL_DECODER *br) {
- DBOOLHUFF_INVOKE(br->rtcd, stop)(br);
-}
static void vp8dx_bool_decoder_fill(BOOL_DECODER *br) {
DBOOLHUFF_INVOKE(br->rtcd, fill)(br);
}
+
+/*The refill loop is used in several places, so define it in a macro to make
+ sure they're all consistent.
+ An inline function would be cleaner, but has a significant penalty, because
+ multiple BOOL_DECODER fields must be modified, and the compiler is not smart
+ enough to eliminate the stores to those fields and the subsequent reloads
+ from them when inlining the function.*/
+#define VP8DX_BOOL_DECODER_FILL(_count,_value,_bufptr,_bufend) \
+ do \
+ { \
+ int shift; \
+ for(shift = VP8_BD_VALUE_SIZE - 8 - ((_count) + 8); shift >= 0; ) \
+ { \
+ if((_bufptr) >= (_bufend)) { \
+ (_count) = VP8_LOTS_OF_BITS; \
+ break; \
+ } \
+ (_count) += 8; \
+ (_value) |= (VP8_BD_VALUE)*(_bufptr)++ << shift; \
+ shift -= 8; \
+ } \
+ } \
+ while(0)
+
+
static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
/*
* Until optimized versions of this function are available, we
@@ -160,13 +145,18 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
*return DBOOLHUFF_INVOKE(br->rtcd, debool)(br, probability);
*/
unsigned int bit = 0;
+ VP8_BD_VALUE value;
unsigned int split;
- unsigned int bigsplit;
- register unsigned int range = br->range;
- register unsigned int value = br->value;
+ VP8_BD_VALUE bigsplit;
+ int count;
+ unsigned int range;
+
+ value = br->value;
+ count = br->count;
+ range = br->range;
split = 1 + (((range - 1) * probability) >> 8);
- bigsplit = (split << 8);
+ bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
range = split;
@@ -185,23 +175,16 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
}*/
{
- int count = br->count;
register unsigned int shift = vp8dx_bitreader_norm[range];
range <<= shift;
value <<= shift;
count -= shift;
-
- if (count <= 0)
- {
- value |= (*br->read_ptr) << (-count);
- br->read_ptr = br_ptr_advance(br->read_ptr, 1);
- count += 8 ;
- }
-
- br->count = count;
}
br->value = value;
+ br->count = count;
br->range = range;
+ if(count < 0)
+ vp8dx_bool_decoder_fill(br);
return bit;
}
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index 6035f3e6a..203d72dd2 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -13,10 +14,127 @@
#include "entropymode.h"
#include "onyxd_int.h"
#include "findnearmv.h"
-#include "demode.h"
+
#if CONFIG_DEBUG
#include <assert.h>
#endif
+static int vp8_read_bmode(vp8_reader *bc, const vp8_prob *p)
+{
+ const int i = vp8_treed_read(bc, vp8_bmode_tree, p);
+
+ return i;
+}
+
+
+static int vp8_read_ymode(vp8_reader *bc, const vp8_prob *p)
+{
+ const int i = vp8_treed_read(bc, vp8_ymode_tree, p);
+
+ return i;
+}
+
+static int vp8_kfread_ymode(vp8_reader *bc, const vp8_prob *p)
+{
+ const int i = vp8_treed_read(bc, vp8_kf_ymode_tree, p);
+
+ return i;
+}
+
+
+
+static int vp8_read_uv_mode(vp8_reader *bc, const vp8_prob *p)
+{
+ const int i = vp8_treed_read(bc, vp8_uv_mode_tree, p);
+
+ return i;
+}
+
+static void vp8_read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x)
+{
+ /* Is segmentation enabled */
+ if (x->segmentation_enabled && x->update_mb_segmentation_map)
+ {
+ /* If so then read the segment id. */
+ if (vp8_read(r, x->mb_segment_tree_probs[0]))
+ mi->segment_id = (unsigned char)(2 + vp8_read(r, x->mb_segment_tree_probs[2]));
+ else
+ mi->segment_id = (unsigned char)(vp8_read(r, x->mb_segment_tree_probs[1]));
+ }
+}
+
+static void vp8_kfread_modes(VP8D_COMP *pbi, MODE_INFO *m, int mb_row, int mb_col)
+{
+ vp8_reader *const bc = & pbi->bc;
+ const int mis = pbi->common.mode_info_stride;
+
+ {
+ MB_PREDICTION_MODE y_mode;
+
+ /* Read the Macroblock segmentation map if it is being updated explicitly this frame (reset to 0 above by default)
+ * By default on a key frame reset all MBs to segment 0
+ */
+ m->mbmi.segment_id = 0;
+
+ if (pbi->mb.update_mb_segmentation_map)
+ vp8_read_mb_features(bc, &m->mbmi, &pbi->mb);
+
+ /* Read the macroblock coeff skip flag if this feature is in use, else default to 0 */
+ if (pbi->common.mb_no_coeff_skip)
+ m->mbmi.mb_skip_coeff = vp8_read(bc, pbi->prob_skip_false);
+ else
+ m->mbmi.mb_skip_coeff = 0;
+
+ y_mode = (MB_PREDICTION_MODE) vp8_kfread_ymode(bc, pbi->common.kf_ymode_prob);
+
+ m->mbmi.ref_frame = INTRA_FRAME;
+
+ if ((m->mbmi.mode = y_mode) == B_PRED)
+ {
+ int i = 0;
+
+ do
+ {
+ const B_PREDICTION_MODE A = vp8_above_bmi(m, i, mis)->mode;
+ const B_PREDICTION_MODE L = vp8_left_bmi(m, i)->mode;
+
+ m->bmi[i].mode = (B_PREDICTION_MODE) vp8_read_bmode(bc, pbi->common.kf_bmode_prob [A] [L]);
+ }
+ while (++i < 16);
+ }
+ else
+ {
+ int BMode;
+ int i = 0;
+
+ switch (y_mode)
+ {
+ case DC_PRED:
+ BMode = B_DC_PRED;
+ break;
+ case V_PRED:
+ BMode = B_VE_PRED;
+ break;
+ case H_PRED:
+ BMode = B_HE_PRED;
+ break;
+ case TM_PRED:
+ BMode = B_TM_PRED;
+ break;
+ default:
+ BMode = B_DC_PRED;
+ break;
+ }
+
+ do
+ {
+ m->bmi[i].mode = (B_PREDICTION_MODE)BMode;
+ }
+ while (++i < 16);
+ }
+
+ m->mbmi.uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, pbi->common.kf_uv_mode_prob);
+ }
+}
static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc)
{
@@ -98,6 +216,8 @@ static MB_PREDICTION_MODE sub_mv_ref(vp8_reader *bc, const vp8_prob *p)
return (MB_PREDICTION_MODE)i;
}
+
+#ifdef VPX_MODE_COUNT
unsigned int vp8_mv_cont_count[5][4] =
{
{ 0, 0, 0, 0 },
@@ -106,313 +226,333 @@ unsigned int vp8_mv_cont_count[5][4] =
{ 0, 0, 0, 0 },
{ 0, 0, 0, 0 }
};
+#endif
-void vp8_decode_mode_mvs(VP8D_COMP *pbi)
-{
- const MV Zero = { 0, 0};
-
- VP8_COMMON *const pc = & pbi->common;
- vp8_reader *const bc = & pbi->bc;
-
- MODE_INFO *mi = pc->mi, *ms;
- const int mis = pc->mode_info_stride;
+unsigned char vp8_mbsplit_offset[4][16] = {
+ { 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
+};
- MV_CONTEXT *const mvc = pc->fc.mvc;
+unsigned char vp8_mbsplit_fill_count[4] = {8, 8, 4, 1};
+unsigned char vp8_mbsplit_fill_offset[4][16] = {
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+ { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15},
+ { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15},
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
+};
- int mb_row = -1;
- vp8_prob prob_intra;
- vp8_prob prob_last;
- vp8_prob prob_gf;
- vp8_prob prob_skip_false = 0;
- if (pc->mb_no_coeff_skip)
- prob_skip_false = (vp8_prob)vp8_read_literal(bc, 8);
- prob_intra = (vp8_prob)vp8_read_literal(bc, 8);
- prob_last = (vp8_prob)vp8_read_literal(bc, 8);
- prob_gf = (vp8_prob)vp8_read_literal(bc, 8);
+void vp8_mb_mode_mv_init(VP8D_COMP *pbi)
+{
+ vp8_reader *const bc = & pbi->bc;
+ MV_CONTEXT *const mvc = pbi->common.fc.mvc;
- ms = pc->mi - 1;
+ pbi->prob_skip_false = 0;
+ if (pbi->common.mb_no_coeff_skip)
+ pbi->prob_skip_false = (vp8_prob)vp8_read_literal(bc, 8);
- if (vp8_read_bit(bc))
+ if(pbi->common.frame_type != KEY_FRAME)
{
- int i = 0;
+ pbi->prob_intra = (vp8_prob)vp8_read_literal(bc, 8);
+ pbi->prob_last = (vp8_prob)vp8_read_literal(bc, 8);
+ pbi->prob_gf = (vp8_prob)vp8_read_literal(bc, 8);
- do
+ if (vp8_read_bit(bc))
{
- pc->fc.ymode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
- }
- while (++i < 4);
- }
+ int i = 0;
- if (vp8_read_bit(bc))
- {
- int i = 0;
+ do
+ {
+ pbi->common.fc.ymode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
+ }
+ while (++i < 4);
+ }
- do
+ if (vp8_read_bit(bc))
{
- pc->fc.uv_mode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
+ int i = 0;
+
+ do
+ {
+ pbi->common.fc.uv_mode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
+ }
+ while (++i < 3);
}
- while (++i < 3);
- }
- read_mvcontexts(bc, mvc);
+ read_mvcontexts(bc, mvc);
+ }
+}
- while (++mb_row < pc->mb_rows)
+void vp8_read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
+ int mb_row, int mb_col)
+{
+ const MV Zero = { 0, 0};
+ vp8_reader *const bc = & pbi->bc;
+ MV_CONTEXT *const mvc = pbi->common.fc.mvc;
+ const int mis = pbi->common.mode_info_stride;
+
+ MV *const mv = & mbmi->mv.as_mv;
+ int mb_to_left_edge;
+ int mb_to_right_edge;
+ int mb_to_top_edge;
+ int mb_to_bottom_edge;
+
+ mb_to_top_edge = pbi->mb.mb_to_top_edge;
+ mb_to_bottom_edge = pbi->mb.mb_to_bottom_edge;
+ mb_to_top_edge -= LEFT_TOP_MARGIN;
+ mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN;
+
+ mbmi->need_to_clamp_mvs = 0;
+ /* Distance of Mb to the various image edges.
+ * These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
+ */
+ pbi->mb.mb_to_left_edge =
+ mb_to_left_edge = -((mb_col * 16) << 3);
+ mb_to_left_edge -= LEFT_TOP_MARGIN;
+
+ pbi->mb.mb_to_right_edge =
+ mb_to_right_edge = ((pbi->common.mb_cols - 1 - mb_col) * 16) << 3;
+ mb_to_right_edge += RIGHT_BOTTOM_MARGIN;
+
+ /* If required read in new segmentation data for this MB */
+ if (pbi->mb.update_mb_segmentation_map)
+ vp8_read_mb_features(bc, mbmi, &pbi->mb);
+
+ /* Read the macroblock coeff skip flag if this feature is in use, else default to 0 */
+ if (pbi->common.mb_no_coeff_skip)
+ mbmi->mb_skip_coeff = vp8_read(bc, pbi->prob_skip_false);
+ else
+ mbmi->mb_skip_coeff = 0;
+
+ if ((mbmi->ref_frame = (MV_REFERENCE_FRAME) vp8_read(bc, pbi->prob_intra))) /* inter MB */
{
- int mb_col = -1;
+ int rct[4];
+ vp8_prob mv_ref_p [VP8_MVREFS-1];
+ MV nearest, nearby, best_mv;
- while (++mb_col < pc->mb_cols)
+ if (vp8_read(bc, pbi->prob_last))
{
- MB_MODE_INFO *const mbmi = & mi->mbmi;
- MV *const mv = & mbmi->mv.as_mv;
- VP8_COMMON *const pc = &pbi->common;
- MACROBLOCKD *xd = &pbi->mb;
+ mbmi->ref_frame = (MV_REFERENCE_FRAME)((int)mbmi->ref_frame + (int)(1 + vp8_read(bc, pbi->prob_gf)));
+ }
- vp8dx_bool_decoder_fill(bc);
+ vp8_find_near_mvs(&pbi->mb, mi, &nearest, &nearby, &best_mv, rct, mbmi->ref_frame, pbi->common.ref_frame_sign_bias);
- // Distance of Mb to the various image edges.
- // These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
- xd->mb_to_left_edge = -((mb_col * 16) << 3);
- xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
- xd->mb_to_top_edge = -((mb_row * 16)) << 3;
- xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
+ vp8_mv_ref_probs(mv_ref_p, rct);
- // If required read in new segmentation data for this MB
- if (pbi->mb.update_mb_segmentation_map)
- vp8_read_mb_features(bc, mbmi, &pbi->mb);
+ mbmi->uv_mode = DC_PRED;
+ switch (mbmi->mode = read_mv_ref(bc, mv_ref_p))
+ {
+ case SPLITMV:
+ {
+ const int s = mbmi->partitioning =
+ vp8_treed_read(bc, vp8_mbsplit_tree, vp8_mbsplit_probs);
+ const int num_p = vp8_mbsplit_count [s];
+ int j = 0;
- // Read the macroblock coeff skip flag if this feature is in use, else default to 0
- if (pc->mb_no_coeff_skip)
- mbmi->mb_skip_coeff = vp8_read(bc, prob_skip_false);
- else
- mbmi->mb_skip_coeff = 0;
+ do /* for each subset j */
+ {
+ B_MODE_INFO bmi;
+ MV *const mv = & bmi.mv.as_mv;
- mbmi->uv_mode = DC_PRED;
+ int k; /* first block in subset j */
+ int mv_contz;
+ k = vp8_mbsplit_offset[s][j];
- if ((mbmi->ref_frame = (MV_REFERENCE_FRAME) vp8_read(bc, prob_intra))) /* inter MB */
- {
- int rct[4];
- vp8_prob mv_ref_p [VP8_MVREFS-1];
- MV nearest, nearby, best_mv;
+ mv_contz = vp8_mv_cont(&(vp8_left_bmi(mi, k)->mv.as_mv), &(vp8_above_bmi(mi, k, mis)->mv.as_mv));
- if (vp8_read(bc, prob_last))
+ switch (bmi.mode = (B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) /*pc->fc.sub_mv_ref_prob))*/
{
- mbmi->ref_frame = (MV_REFERENCE_FRAME)((int)mbmi->ref_frame + (int)(1 + vp8_read(bc, prob_gf)));
+ case NEW4X4:
+ read_mv(bc, mv, (const MV_CONTEXT *) mvc);
+ mv->row += best_mv.row;
+ mv->col += best_mv.col;
+ #ifdef VPX_MODE_COUNT
+ vp8_mv_cont_count[mv_contz][3]++;
+ #endif
+ break;
+ case LEFT4X4:
+ *mv = vp8_left_bmi(mi, k)->mv.as_mv;
+ #ifdef VPX_MODE_COUNT
+ vp8_mv_cont_count[mv_contz][0]++;
+ #endif
+ break;
+ case ABOVE4X4:
+ *mv = vp8_above_bmi(mi, k, mis)->mv.as_mv;
+ #ifdef VPX_MODE_COUNT
+ vp8_mv_cont_count[mv_contz][1]++;
+ #endif
+ break;
+ case ZERO4X4:
+ *mv = Zero;
+ #ifdef VPX_MODE_COUNT
+ vp8_mv_cont_count[mv_contz][2]++;
+ #endif
+ break;
+ default:
+ break;
}
- vp8_find_near_mvs(xd, mi, &nearest, &nearby, &best_mv, rct, mbmi->ref_frame, pbi->common.ref_frame_sign_bias);
+ mbmi->need_to_clamp_mvs |= (mv->col < mb_to_left_edge) ? 1 : 0;
+ mbmi->need_to_clamp_mvs |= (mv->col > mb_to_right_edge) ? 1 : 0;
+ mbmi->need_to_clamp_mvs |= (mv->row < mb_to_top_edge) ? 1 : 0;
+ mbmi->need_to_clamp_mvs |= (mv->row > mb_to_bottom_edge) ? 1 : 0;
- vp8_mv_ref_probs(mv_ref_p, rct);
-
- switch (mbmi->mode = read_mv_ref(bc, mv_ref_p))
- {
- case SPLITMV:
{
- const int s = mbmi->partitioning = vp8_treed_read(
- bc, vp8_mbsplit_tree, vp8_mbsplit_probs
- );
- const int num_p = vp8_mbsplit_count [s];
- const int *const L = vp8_mbsplits [s];
- int j = 0;
-
- do /* for each subset j */
- {
- B_MODE_INFO *const bmi = mbmi->partition_bmi + j;
- MV *const mv = & bmi->mv.as_mv;
-
- int k = -1; /* first block in subset j */
- int mv_contz;
-
- while (j != L[++k])
- if (k >= 16)
-#if CONFIG_DEBUG
- assert(0);
+ /* Fill (uniform) modes, mvs of jth subset.
+ Must do it here because ensuing subsets can
+ refer back to us via "left" or "above". */
+ unsigned char *fill_offset;
+ unsigned int fill_count = vp8_mbsplit_fill_count[s];
-#else
- ;
-#endif
+ fill_offset = &vp8_mbsplit_fill_offset[s][(unsigned char)j * vp8_mbsplit_fill_count[s]];
- mv_contz = vp8_mv_cont(&(vp8_left_bmi(mi, k)->mv.as_mv), &(vp8_above_bmi(mi, k, mis)->mv.as_mv));
+ do {
+ mi->bmi[ *fill_offset] = bmi;
+ fill_offset++;
- switch (bmi->mode = (B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) //pc->fc.sub_mv_ref_prob))
- {
- case NEW4X4:
- read_mv(bc, mv, (const MV_CONTEXT *) mvc);
- mv->row += best_mv.row;
- mv->col += best_mv.col;
-#ifdef VPX_MODE_COUNT
- vp8_mv_cont_count[mv_contz][3]++;
-#endif
- break;
- case LEFT4X4:
- *mv = vp8_left_bmi(mi, k)->mv.as_mv;
-#ifdef VPX_MODE_COUNT
- vp8_mv_cont_count[mv_contz][0]++;
-#endif
- break;
- case ABOVE4X4:
- *mv = vp8_above_bmi(mi, k, mis)->mv.as_mv;
-#ifdef VPX_MODE_COUNT
- vp8_mv_cont_count[mv_contz][1]++;
-#endif
- break;
- case ZERO4X4:
- *mv = Zero;
-#ifdef VPX_MODE_COUNT
- vp8_mv_cont_count[mv_contz][2]++;
-#endif
- break;
- default:
- break;
- }
-
- /* Fill (uniform) modes, mvs of jth subset.
- Must do it here because ensuing subsets can
- refer back to us via "left" or "above". */
- do
- if (j == L[k])
- mi->bmi[k] = *bmi;
-
- while (++k < 16);
- }
- while (++j < num_p);
+ }while (--fill_count);
}
- *mv = mi->bmi[15].mv.as_mv;
-
- break; /* done with SPLITMV */
-
- case NEARMV:
- *mv = nearby;
-
- // Clip "next_nearest" so that it does not extend to far out of image
- if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
- mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
- else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
- mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
-
- if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
- mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
- else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
- mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
-
- goto propagate_mv;
+ }
+ while (++j < num_p);
+ }
- case NEARESTMV:
- *mv = nearest;
+ *mv = mi->bmi[15].mv.as_mv;
+
+ break; /* done with SPLITMV */
+
+ case NEARMV:
+ *mv = nearby;
+ /* Clip "next_nearest" so that it does not extend to far out of image */
+ mv->col = (mv->col < mb_to_left_edge) ? mb_to_left_edge : mv->col;
+ mv->col = (mv->col > mb_to_right_edge) ? mb_to_right_edge : mv->col;
+ mv->row = (mv->row < mb_to_top_edge) ? mb_to_top_edge : mv->row;
+ mv->row = (mv->row > mb_to_bottom_edge) ? mb_to_bottom_edge : mv->row;
+ goto propagate_mv;
+
+ case NEARESTMV:
+ *mv = nearest;
+ /* Clip "next_nearest" so that it does not extend to far out of image */
+ mv->col = (mv->col < mb_to_left_edge) ? mb_to_left_edge : mv->col;
+ mv->col = (mv->col > mb_to_right_edge) ? mb_to_right_edge : mv->col;
+ mv->row = (mv->row < mb_to_top_edge) ? mb_to_top_edge : mv->row;
+ mv->row = (mv->row > mb_to_bottom_edge) ? mb_to_bottom_edge : mv->row;
+ goto propagate_mv;
+
+ case ZEROMV:
+ *mv = Zero;
+ goto propagate_mv;
+
+ case NEWMV:
+ read_mv(bc, mv, (const MV_CONTEXT *) mvc);
+ mv->row += best_mv.row;
+ mv->col += best_mv.col;
+
+ /* Don't need to check this on NEARMV and NEARESTMV modes
+ * since those modes clamp the MV. The NEWMV mode does not,
+ * so signal to the prediction stage whether special
+ * handling may be required.
+ */
+ mbmi->need_to_clamp_mvs = (mv->col < mb_to_left_edge) ? 1 : 0;
+ mbmi->need_to_clamp_mvs |= (mv->col > mb_to_right_edge) ? 1 : 0;
+ mbmi->need_to_clamp_mvs |= (mv->row < mb_to_top_edge) ? 1 : 0;
+ mbmi->need_to_clamp_mvs |= (mv->row > mb_to_bottom_edge) ? 1 : 0;
+
+ propagate_mv: /* same MV throughout */
+ {
+ /*int i=0;
+ do
+ {
+ mi->bmi[i].mv.as_mv = *mv;
+ }
+ while( ++i < 16);*/
+
+ mi->bmi[0].mv.as_mv = *mv;
+ mi->bmi[1].mv.as_mv = *mv;
+ mi->bmi[2].mv.as_mv = *mv;
+ mi->bmi[3].mv.as_mv = *mv;
+ mi->bmi[4].mv.as_mv = *mv;
+ mi->bmi[5].mv.as_mv = *mv;
+ mi->bmi[6].mv.as_mv = *mv;
+ mi->bmi[7].mv.as_mv = *mv;
+ mi->bmi[8].mv.as_mv = *mv;
+ mi->bmi[9].mv.as_mv = *mv;
+ mi->bmi[10].mv.as_mv = *mv;
+ mi->bmi[11].mv.as_mv = *mv;
+ mi->bmi[12].mv.as_mv = *mv;
+ mi->bmi[13].mv.as_mv = *mv;
+ mi->bmi[14].mv.as_mv = *mv;
+ mi->bmi[15].mv.as_mv = *mv;
+ }
+ break;
+ default:;
+ #if CONFIG_DEBUG
+ assert(0);
+ #endif
+ }
+ }
+ else
+ {
+ /* MB is intra coded */
+ int j = 0;
+ do
+ {
+ mi->bmi[j].mv.as_mv = Zero;
+ }
+ while (++j < 16);
- // Clip "next_nearest" so that it does not extend to far out of image
- if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
- mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
- else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
- mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
+ if ((mbmi->mode = (MB_PREDICTION_MODE) vp8_read_ymode(bc, pbi->common.fc.ymode_prob)) == B_PRED)
+ {
+ j = 0;
+ do
+ {
+ mi->bmi[j].mode = (B_PREDICTION_MODE)vp8_read_bmode(bc, pbi->common.fc.bmode_prob);
+ }
+ while (++j < 16);
+ }
- if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
- mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
- else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
- mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
+ mbmi->uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, pbi->common.fc.uv_mode_prob);
+ }
- goto propagate_mv;
+}
- case ZEROMV:
- *mv = Zero;
- goto propagate_mv;
+void vp8_decode_mode_mvs(VP8D_COMP *pbi)
+{
+ MODE_INFO *mi = pbi->common.mi;
+ int mb_row = -1;
- case NEWMV:
- read_mv(bc, mv, (const MV_CONTEXT *) mvc);
- mv->row += best_mv.row;
- mv->col += best_mv.col;
- /* Encoder should not produce invalid motion vectors, but since
- * arbitrary length MVs can be parsed from the bitstream, we
- * need to clamp them here in case we're reading bad data to
- * avoid a crash.
- */
-#if CONFIG_DEBUG
- assert(mv->col >= (xd->mb_to_left_edge - LEFT_TOP_MARGIN));
- assert(mv->col <= (xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN));
- assert(mv->row >= (xd->mb_to_top_edge - LEFT_TOP_MARGIN));
- assert(mv->row <= (xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN));
-#endif
+ vp8_mb_mode_mv_init(pbi);
- if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
- mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
- else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
- mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
-
- if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
- mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
- else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
- mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
-
- propagate_mv: /* same MV throughout */
- {
- //int i=0;
- //do
- //{
- // mi->bmi[i].mv.as_mv = *mv;
- //}
- //while( ++i < 16);
-
- mi->bmi[0].mv.as_mv = *mv;
- mi->bmi[1].mv.as_mv = *mv;
- mi->bmi[2].mv.as_mv = *mv;
- mi->bmi[3].mv.as_mv = *mv;
- mi->bmi[4].mv.as_mv = *mv;
- mi->bmi[5].mv.as_mv = *mv;
- mi->bmi[6].mv.as_mv = *mv;
- mi->bmi[7].mv.as_mv = *mv;
- mi->bmi[8].mv.as_mv = *mv;
- mi->bmi[9].mv.as_mv = *mv;
- mi->bmi[10].mv.as_mv = *mv;
- mi->bmi[11].mv.as_mv = *mv;
- mi->bmi[12].mv.as_mv = *mv;
- mi->bmi[13].mv.as_mv = *mv;
- mi->bmi[14].mv.as_mv = *mv;
- mi->bmi[15].mv.as_mv = *mv;
- }
+ while (++mb_row < pbi->common.mb_rows)
+ {
+ int mb_col = -1;
+ int mb_to_top_edge;
+ int mb_to_bottom_edge;
- break;
+ pbi->mb.mb_to_top_edge =
+ mb_to_top_edge = -((mb_row * 16)) << 3;
+ mb_to_top_edge -= LEFT_TOP_MARGIN;
- default:;
-#if CONFIG_DEBUG
- assert(0);
-#endif
- }
+ pbi->mb.mb_to_bottom_edge =
+ mb_to_bottom_edge = ((pbi->common.mb_rows - 1 - mb_row) * 16) << 3;
+ mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN;
- }
+ while (++mb_col < pbi->common.mb_cols)
+ {
+ /*vp8_read_mb_modes_mv(pbi, xd->mode_info_context, &xd->mode_info_context->mbmi, mb_row, mb_col);*/
+ if(pbi->common.frame_type == KEY_FRAME)
+ vp8_kfread_modes(pbi, mi, mb_row, mb_col);
else
- {
- /* MB is intra coded */
-
- int j = 0;
-
- do
- {
- mi->bmi[j].mv.as_mv = Zero;
- }
- while (++j < 16);
-
- *mv = Zero;
-
- if ((mbmi->mode = (MB_PREDICTION_MODE) vp8_read_ymode(bc, pc->fc.ymode_prob)) == B_PRED)
- {
- int j = 0;
+ vp8_read_mb_modes_mv(pbi, mi, &mi->mbmi, mb_row, mb_col);
- do
- {
- mi->bmi[j].mode = (B_PREDICTION_MODE)vp8_read_bmode(bc, pc->fc.bmode_prob);
- }
- while (++j < 16);
- }
-
- mbmi->uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, pc->fc.uv_mode_prob);
- }
-
- mi++; // next macroblock
+ mi++; /* next macroblock */
}
- mi++; // skip left predictor each row
+ mi++; /* skip left predictor each row */
}
}
+
diff --git a/vp8/decoder/decodemv.h b/vp8/decoder/decodemv.h
index 403007183..940342447 100644
--- a/vp8/decoder/decodemv.h
+++ b/vp8/decoder/decodemv.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/decoder/decoderthreading.h b/vp8/decoder/decoderthreading.h
index ebc5c27b2..25dee8fe8 100644
--- a/vp8/decoder/decoderthreading.h
+++ b/vp8/decoder/decoderthreading.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -14,11 +15,12 @@
#ifndef _DECODER_THREADING_H
#define _DECODER_THREADING_H
-
-extern void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
- MACROBLOCKD *xd);
-extern void vp8_stop_lfthread(VP8D_COMP *pbi);
-extern void vp8_start_lfthread(VP8D_COMP *pbi);
+#if CONFIG_MULTITHREAD
+extern void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd);
extern void vp8_decoder_remove_threads(VP8D_COMP *pbi);
extern void vp8_decoder_create_threads(VP8D_COMP *pbi);
+extern int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows);
+extern void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows);
+#endif
+
#endif
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 4edf4f60d..1bdc3d946 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -20,9 +21,10 @@
#include "alloccommon.h"
#include "entropymode.h"
#include "quant_common.h"
-#include "segmentation_common.h"
+#include "vpx_scale/vpxscale.h"
+#include "vpx_scale/yv12extend.h"
#include "setupintrarecon.h"
-#include "demode.h"
+
#include "decodemv.h"
#include "extend.h"
#include "vpx_mem/vpx_mem.h"
@@ -38,56 +40,53 @@
void vp8cx_init_de_quantizer(VP8D_COMP *pbi)
{
- int r, c;
int i;
int Q;
VP8_COMMON *const pc = & pbi->common;
for (Q = 0; Q < QINDEX_RANGE; Q++)
{
- pc->Y1dequant[Q][0][0] = (short)vp8_dc_quant(Q, pc->y1dc_delta_q);
- pc->Y2dequant[Q][0][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q);
- pc->UVdequant[Q][0][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q);
+ pc->Y1dequant[Q][0] = (short)vp8_dc_quant(Q, pc->y1dc_delta_q);
+ pc->Y2dequant[Q][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q);
+ pc->UVdequant[Q][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q);
- // all the ac values = ;
+ /* all the ac values = ; */
for (i = 1; i < 16; i++)
{
int rc = vp8_default_zig_zag1d[i];
- r = (rc >> 2);
- c = (rc & 3);
- pc->Y1dequant[Q][r][c] = (short)vp8_ac_yquant(Q);
- pc->Y2dequant[Q][r][c] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q);
- pc->UVdequant[Q][r][c] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q);
+ pc->Y1dequant[Q][rc] = (short)vp8_ac_yquant(Q);
+ pc->Y2dequant[Q][rc] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q);
+ pc->UVdequant[Q][rc] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q);
}
}
}
-static void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd)
+void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd)
{
int i;
int QIndex;
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
VP8_COMMON *const pc = & pbi->common;
- // Decide whether to use the default or alternate baseline Q value.
+ /* Decide whether to use the default or alternate baseline Q value. */
if (xd->segmentation_enabled)
{
- // Abs Value
+ /* Abs Value */
if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id];
- // Delta Value
+ /* Delta Value */
else
{
QIndex = pc->base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id];
- QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; // Clamp to valid range
+ QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */
}
}
else
QIndex = pc->base_qindex;
- // Set up the block level dequant pointers
+ /* Set up the block level dequant pointers */
for (i = 0; i < 16; i++)
{
xd->block[i].dequant = pc->Y1dequant[QIndex];
@@ -108,11 +107,12 @@ static void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd)
#define RTCD_VTABLE(x) NULL
#endif
-//skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it
-// to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy.
+/* skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it
+ * to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy.
+ */
static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd)
{
- if (xd->frame_type == KEY_FRAME || xd->mbmi.ref_frame == INTRA_FRAME)
+ if (xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
{
vp8_build_intra_predictors_mbuv_s(xd);
@@ -125,42 +125,114 @@ static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd)
}
}
-static void reconstruct_mb(VP8D_COMP *pbi, MACROBLOCKD *xd)
+static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
+{
+ /* If the MV points so far into the UMV border that no visible pixels
+ * are used for reconstruction, the subpel part of the MV can be
+ * discarded and the MV limited to 16 pixels with equivalent results.
+ *
+ * This limit kicks in at 19 pixels for the top and left edges, for
+ * the 16 pixels plus 3 taps right of the central pixel when subpel
+ * filtering. The bottom and right edges use 16 pixels plus 2 pixels
+ * left of the central pixel when filtering.
+ */
+ if (mv->col < (xd->mb_to_left_edge - (19 << 3)))
+ mv->col = xd->mb_to_left_edge - (16 << 3);
+ else if (mv->col > xd->mb_to_right_edge + (18 << 3))
+ mv->col = xd->mb_to_right_edge + (16 << 3);
+
+ if (mv->row < (xd->mb_to_top_edge - (19 << 3)))
+ mv->row = xd->mb_to_top_edge - (16 << 3);
+ else if (mv->row > xd->mb_to_bottom_edge + (18 << 3))
+ mv->row = xd->mb_to_bottom_edge + (16 << 3);
+}
+
+/* A version of the above function for chroma block MVs.*/
+static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
+{
+ mv->col = (2*mv->col < (xd->mb_to_left_edge - (19 << 3))) ? (xd->mb_to_left_edge - (16 << 3)) >> 1 : mv->col;
+ mv->col = (2*mv->col > xd->mb_to_right_edge + (18 << 3)) ? (xd->mb_to_right_edge + (16 << 3)) >> 1 : mv->col;
+
+ mv->row = (2*mv->row < (xd->mb_to_top_edge - (19 << 3))) ? (xd->mb_to_top_edge - (16 << 3)) >> 1 : mv->row;
+ mv->row = (2*mv->row > xd->mb_to_bottom_edge + (18 << 3)) ? (xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row;
+}
+
+void clamp_mvs(MACROBLOCKD *xd)
{
- if (xd->frame_type == KEY_FRAME || xd->mbmi.ref_frame == INTRA_FRAME)
+ if (xd->mode_info_context->mbmi.mode == SPLITMV)
+ {
+ int i;
+
+ for (i=0; i<16; i++)
+ clamp_mv_to_umv_border(&xd->block[i].bmi.mv.as_mv, xd);
+ for (i=16; i<24; i++)
+ clamp_uvmv_to_umv_border(&xd->block[i].bmi.mv.as_mv, xd);
+ }
+ else
+ {
+ clamp_mv_to_umv_border(&xd->mode_info_context->mbmi.mv.as_mv, xd);
+ clamp_uvmv_to_umv_border(&xd->block[16].bmi.mv.as_mv, xd);
+ }
+
+}
+
+void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
+{
+ int eobtotal = 0;
+ int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs;
+
+ if (xd->mode_info_context->mbmi.mb_skip_coeff)
+ {
+ vp8_reset_mb_tokens_context(xd);
+ }
+ else
+ {
+ eobtotal = vp8_decode_mb_tokens(pbi, xd);
+ }
+
+ /* Perform temporary clamping of the MV to be used for prediction */
+ if (do_clamp)
+ {
+ clamp_mvs(xd);
+ }
+
+ xd->mode_info_context->mbmi.dc_diff = 1;
+
+ if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV && eobtotal == 0)
+ {
+ xd->mode_info_context->mbmi.dc_diff = 0;
+ skip_recon_mb(pbi, xd);
+ return;
+ }
+
+ if (xd->segmentation_enabled)
+ mb_init_dequantizer(pbi, xd);
+
+ /* do prediction */
+ if (xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
{
vp8_build_intra_predictors_mbuv(xd);
- if (xd->mbmi.mode != B_PRED)
+ if (xd->mode_info_context->mbmi.mode != B_PRED)
{
vp8_build_intra_predictors_mby_ptr(xd);
- vp8_recon16x16mb(RTCD_VTABLE(recon), xd);
- }
- else
- {
- vp8_recon_intra4x4mb(RTCD_VTABLE(recon), xd);
+ } else {
+ vp8_intra_prediction_down_copy(xd);
}
}
else
{
vp8_build_inter_predictors_mb(xd);
- vp8_recon16x16mb(RTCD_VTABLE(recon), xd);
}
-}
-
-static void de_quantand_idct(VP8D_COMP *pbi, MACROBLOCKD *xd)
-{
- int i;
- BLOCKD *b = &xd->block[24];
-
-
- if (xd->mbmi.mode != B_PRED && xd->mbmi.mode != SPLITMV)
+ /* dequantization and idct */
+ if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
{
+ BLOCKD *b = &xd->block[24];
DEQUANT_INVOKE(&pbi->dequant, block)(b);
- // do 2nd order transform on the dc block
- if (b->eob > 1)
+ /* do 2nd order transform on the dc block */
+ if (xd->eobs[24] > 1)
{
IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
((int *)b->qcoeff)[0] = 0;
@@ -178,86 +250,50 @@ static void de_quantand_idct(VP8D_COMP *pbi, MACROBLOCKD *xd)
((int *)b->qcoeff)[0] = 0;
}
-
- for (i = 0; i < 16; i++)
- {
-
- b = &xd->block[i];
-
- if (b->eob > 1)
- {
- DEQUANT_INVOKE(&pbi->dequant, idct_dc)(b->qcoeff, &b->dequant[0][0], b->diff, 32, xd->block[24].diff[i]);
- }
- else
- {
- IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar)(xd->block[24].diff[i], b->diff, 32);
- }
- }
-
- for (i = 16; i < 24; i++)
- {
- b = &xd->block[i];
-
- if (b->eob > 1)
- {
- DEQUANT_INVOKE(&pbi->dequant, idct)(b->qcoeff, &b->dequant[0][0], b->diff, 16);
- }
- else
- {
- IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar)(b->qcoeff[0] * b->dequant[0][0], b->diff, 16);
- ((int *)b->qcoeff)[0] = 0;
- }
- }
+ DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block)
+ (xd->qcoeff, xd->block[0].dequant,
+ xd->predictor, xd->dst.y_buffer,
+ xd->dst.y_stride, xd->eobs, xd->block[24].diff);
}
- else
+ else if ((xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && xd->mode_info_context->mbmi.mode == B_PRED)
{
- for (i = 0; i < 24; i++)
+ for (i = 0; i < 16; i++)
{
- b = &xd->block[i];
+ BLOCKD *b = &xd->block[i];
+ vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
- if (b->eob > 1)
+ if (xd->eobs[i] > 1)
{
- DEQUANT_INVOKE(&pbi->dequant, idct)(b->qcoeff, &b->dequant[0][0], b->diff, (32 - (i & 16)));
+ DEQUANT_INVOKE(&pbi->dequant, idct_add)
+ (b->qcoeff, b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 16, b->dst_stride);
}
else
{
- IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar)(b->qcoeff[0] * b->dequant[0][0], b->diff, (32 - (i & 16)));
+ IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
+ (b->qcoeff[0] * b->dequant[0], b->predictor,
+ *(b->base_dst) + b->dst, 16, b->dst_stride);
((int *)b->qcoeff)[0] = 0;
}
}
- }
-}
-
-void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
-{
- int eobtotal = 0;
- if (xd->mbmi.mb_skip_coeff)
- {
- vp8_reset_mb_tokens_context(xd);
}
else
{
- eobtotal = vp8_decode_mb_tokens(pbi, xd);
- }
-
- xd->mode_info_context->mbmi.dc_diff = 1;
-
- if (xd->mbmi.mode != B_PRED && xd->mbmi.mode != SPLITMV && eobtotal == 0)
- {
- xd->mode_info_context->mbmi.dc_diff = 0;
- skip_recon_mb(pbi, xd);
- return;
+ DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
+ (xd->qcoeff, xd->block[0].dequant,
+ xd->predictor, xd->dst.y_buffer,
+ xd->dst.y_stride, xd->eobs);
}
- if (xd->segmentation_enabled)
- mb_init_dequantizer(pbi, xd);
-
- de_quantand_idct(pbi, xd);
- reconstruct_mb(pbi, xd);
+ DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
+ (xd->qcoeff+16*16, xd->block[16].dequant,
+ xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
+ xd->dst.uv_stride, xd->eobs+16);
}
+
static int get_delta_q(vp8_reader *bc, int prev, int *q_update)
{
int ret_val = 0;
@@ -293,18 +329,17 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
int i;
int recon_yoffset, recon_uvoffset;
int mb_col;
- int recon_y_stride = pc->last_frame.y_stride;
- int recon_uv_stride = pc->last_frame.uv_stride;
+ int ref_fb_idx = pc->lst_fb_idx;
+ int dst_fb_idx = pc->new_fb_idx;
+ int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
+ int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
- vpx_memset(pc->left_context, 0, sizeof(pc->left_context));
+ vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
recon_yoffset = mb_row * recon_y_stride * 16;
recon_uvoffset = mb_row * recon_uv_stride * 8;
- // reset above block coeffs
+ /* reset above block coeffs */
- xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT];
- xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT];
- xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT];
- xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
+ xd->above_context = pc->above_context;
xd->up_available = (mb_row != 0);
xd->mb_to_top_edge = -((mb_row * 16)) << 3;
@@ -312,10 +347,8 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
{
- // Take a copy of the mode and Mv information for this macroblock into the xd->mbmi
- vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi, 32); //sizeof(MB_MODE_INFO) );
- if (xd->mbmi.mode == SPLITMV || xd->mbmi.mode == B_PRED)
+ if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
{
for (i = 0; i < 16; i++)
{
@@ -324,48 +357,38 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
}
}
- // Distance of Mb to the various image edges.
- // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+ /* Distance of Mb to the various image edges.
+ * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+ */
xd->mb_to_left_edge = -((mb_col * 16) << 3);
xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
- xd->dst.y_buffer = pc->new_frame.y_buffer + recon_yoffset;
- xd->dst.u_buffer = pc->new_frame.u_buffer + recon_uvoffset;
- xd->dst.v_buffer = pc->new_frame.v_buffer + recon_uvoffset;
+ xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+ xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+ xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
xd->left_available = (mb_col != 0);
- // Select the appropriate reference frame for this MB
- if (xd->mbmi.ref_frame == LAST_FRAME)
- {
- xd->pre.y_buffer = pc->last_frame.y_buffer + recon_yoffset;
- xd->pre.u_buffer = pc->last_frame.u_buffer + recon_uvoffset;
- xd->pre.v_buffer = pc->last_frame.v_buffer + recon_uvoffset;
- }
- else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
- {
- // Golden frame reconstruction buffer
- xd->pre.y_buffer = pc->golden_frame.y_buffer + recon_yoffset;
- xd->pre.u_buffer = pc->golden_frame.u_buffer + recon_uvoffset;
- xd->pre.v_buffer = pc->golden_frame.v_buffer + recon_uvoffset;
- }
+ /* Select the appropriate reference frame for this MB */
+ if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
+ ref_fb_idx = pc->lst_fb_idx;
+ else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
+ ref_fb_idx = pc->gld_fb_idx;
else
- {
- // Alternate reference frame reconstruction buffer
- xd->pre.y_buffer = pc->alt_ref_frame.y_buffer + recon_yoffset;
- xd->pre.u_buffer = pc->alt_ref_frame.u_buffer + recon_uvoffset;
- xd->pre.v_buffer = pc->alt_ref_frame.v_buffer + recon_uvoffset;
- }
+ ref_fb_idx = pc->alt_fb_idx;
+
+ xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
+ xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
+ xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
vp8_build_uvmvs(xd, pc->full_pixel);
/*
- if(pbi->common.current_video_frame==0 &&mb_col==1 && mb_row==0)
+ if(pc->current_video_frame==0 &&mb_col==1 && mb_row==0)
pbi->debugoutput =1;
else
pbi->debugoutput =0;
*/
- vp8dx_bool_decoder_fill(xd->current_bc);
vp8_decode_macroblock(pbi, xd);
@@ -374,25 +397,17 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
++xd->mode_info_context; /* next mb */
- xd->gf_active_ptr++; // GF useage flag for next MB
+ xd->above_context++;
- xd->above_context[Y1CONTEXT] += 4;
- xd->above_context[UCONTEXT ] += 2;
- xd->above_context[VCONTEXT ] += 2;
- xd->above_context[Y2CONTEXT] ++;
-
- pbi->current_mb_col_main = mb_col;
}
- // adjust to the next row of mbs
+ /* adjust to the next row of mbs */
vp8_extend_mb_row(
- &pc->new_frame,
+ &pc->yv12_fb[dst_fb_idx],
xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
);
++xd->mode_info_context; /* skip prediction column */
-
- pbi->last_mb_row_decoded = mb_row;
}
@@ -432,7 +447,7 @@ static void setup_token_decoder(VP8D_COMP *pbi,
for (i = 0; i < num_part; i++)
{
const unsigned char *partition_size_ptr = cx_data + i * 3;
- unsigned int partition_size;
+ ptrdiff_t partition_size;
/* Calculate the length of this partition. The last partition
* size is implicit.
@@ -446,7 +461,7 @@ static void setup_token_decoder(VP8D_COMP *pbi,
partition_size = user_data_end - partition;
}
- if (partition + partition_size > user_data_end)
+ if (user_data_end - partition < partition_size)
vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
"Truncated packet or corrupt partition "
"%d length", i + 1);
@@ -473,18 +488,7 @@ static void stop_token_decoder(VP8D_COMP *pbi)
VP8_COMMON *pc = &pbi->common;
if (pc->multi_token_partition != ONE_PARTITION)
- {
- int num_part = (1 << pc->multi_token_partition);
-
- for (i = 0; i < num_part; i++)
- {
- vp8dx_stop_decode(&pbi->mbc[i]);
- }
-
vpx_free(pbi->mbc);
- }
- else
- vp8dx_stop_decode(& pbi->bc2);
}
static void init_frame(VP8D_COMP *pbi)
@@ -494,7 +498,7 @@ static void init_frame(VP8D_COMP *pbi)
if (pc->frame_type == KEY_FRAME)
{
- // Various keyframe initializations
+ /* Various keyframe initializations */
vpx_memcpy(pc->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
vp8_init_mbmode_probs(pc);
@@ -502,22 +506,23 @@ static void init_frame(VP8D_COMP *pbi)
vp8_default_coef_probs(pc);
vp8_kf_default_bmode_probs(pc->kf_bmode_prob);
- // reset the segment feature data to 0 with delta coding (Default state).
+ /* reset the segment feature data to 0 with delta coding (Default state). */
vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
xd->mb_segement_abs_delta = SEGMENT_DELTADATA;
- // reset the mode ref deltasa for loop filter
+ /* reset the mode ref deltasa for loop filter */
vpx_memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas));
vpx_memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas));
- // All buffers are implicitly updated on key frames.
+ /* All buffers are implicitly updated on key frames. */
pc->refresh_golden_frame = 1;
pc->refresh_alt_ref_frame = 1;
pc->copy_buffer_to_gf = 0;
pc->copy_buffer_to_arf = 0;
- // Note that Golden and Altref modes cannot be used on a key frame so
- // ref_frame_sign_bias[] is undefined and meaningless
+ /* Note that Golden and Altref modes cannot be used on a key frame so
+ * ref_frame_sign_bias[] is undefined and meaningless
+ */
pc->ref_frame_sign_bias[GOLDEN_FRAME] = 0;
pc->ref_frame_sign_bias[ALTREF_FRAME] = 0;
}
@@ -528,7 +533,7 @@ static void init_frame(VP8D_COMP *pbi)
else
pc->mcomp_filter_type = BILINEAR;
- // To enable choice of different interploation filters
+ /* To enable choice of different interploation filters */
if (pc->mcomp_filter_type == SIXTAP)
{
xd->subpixel_predict = SUBPIX_INVOKE(RTCD_VTABLE(subpix), sixtap4x4);
@@ -545,10 +550,10 @@ static void init_frame(VP8D_COMP *pbi)
}
}
- xd->left_context = pc->left_context;
+ xd->left_context = &pc->left_context;
xd->mode_info_context = pc->mi;
xd->frame_type = pc->frame_type;
- xd->mbmi.mode = DC_PRED;
+ xd->mode_info_context->mbmi.mode = DC_PRED;
xd->mode_info_stride = pc->mode_info_stride;
}
@@ -559,12 +564,15 @@ int vp8_decode_frame(VP8D_COMP *pbi)
MACROBLOCKD *const xd = & pbi->mb;
const unsigned char *data = (const unsigned char *)pbi->Source;
const unsigned char *const data_end = data + pbi->source_sz;
- int first_partition_length_in_bytes;
+ ptrdiff_t first_partition_length_in_bytes;
int mb_row;
int i, j, k, l;
const int *const mb_feature_data_bits = vp8_mb_feature_data_bits;
+ if (data_end - data < 3)
+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
+ "Truncated packet");
pc->frame_type = (FRAME_TYPE)(data[0] & 1);
pc->version = (data[0] >> 1) & 7;
pc->show_frame = (data[0] >> 4) & 1;
@@ -572,7 +580,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
(data[0] | (data[1] << 8) | (data[2] << 16)) >> 5;
data += 3;
- if (data + first_partition_length_in_bytes > data_end)
+ if (data_end - data < first_partition_length_in_bytes)
vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
"Truncated packet or corrupt partition 0 length");
vp8_setup_version(pc);
@@ -582,7 +590,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
const int Width = pc->Width;
const int Height = pc->Height;
- // vet via sync code
+ /* vet via sync code */
if (data[0] != 0x9d || data[1] != 0x01 || data[2] != 0x2a)
vpx_internal_error(&pc->error, VPX_CODEC_UNSUP_BITSTREAM,
"Invalid frame sync code");
@@ -595,6 +603,8 @@ int vp8_decode_frame(VP8D_COMP *pbi)
if (Width != pc->Width || Height != pc->Height)
{
+ int prev_mb_rows = pc->mb_rows;
+
if (pc->Width <= 0)
{
pc->Width = Width;
@@ -609,9 +619,14 @@ int vp8_decode_frame(VP8D_COMP *pbi)
"Invalid frame height");
}
- if (vp8_alloc_frame_buffers(&pbi->common, pc->Width, pc->Height))
+ if (vp8_alloc_frame_buffers(pc, pc->Width, pc->Height))
vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate frame buffers");
+
+#if CONFIG_MULTITHREAD
+ if (pbi->b_multithreaded_rd)
+ vp8mt_alloc_temp_buffers(pbi, pc->Width, prev_mb_rows);
+#endif
}
}
@@ -631,12 +646,12 @@ int vp8_decode_frame(VP8D_COMP *pbi)
pc->clamp_type = (CLAMP_TYPE)vp8_read_bit(bc);
}
- // Is segmentation enabled
+ /* Is segmentation enabled */
xd->segmentation_enabled = (unsigned char)vp8_read_bit(bc);
if (xd->segmentation_enabled)
{
- // Signal whether or not the segmentation map is being explicitly updated this frame.
+ /* Signal whether or not the segmentation map is being explicitly updated this frame. */
xd->update_mb_segmentation_map = (unsigned char)vp8_read_bit(bc);
xd->update_mb_segmentation_data = (unsigned char)vp8_read_bit(bc);
@@ -646,12 +661,12 @@ int vp8_decode_frame(VP8D_COMP *pbi)
vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
- // For each segmentation feature (Quant and loop filter level)
+ /* For each segmentation feature (Quant and loop filter level) */
for (i = 0; i < MB_LVL_MAX; i++)
{
for (j = 0; j < MAX_MB_SEGMENTS; j++)
{
- // Frame level data
+ /* Frame level data */
if (vp8_read_bit(bc))
{
xd->segment_feature_data[i][j] = (signed char)vp8_read_literal(bc, mb_feature_data_bits[i]);
@@ -667,57 +682,57 @@ int vp8_decode_frame(VP8D_COMP *pbi)
if (xd->update_mb_segmentation_map)
{
- // Which macro block level features are enabled
+ /* Which macro block level features are enabled */
vpx_memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs));
- // Read the probs used to decode the segment id for each macro block.
+ /* Read the probs used to decode the segment id for each macro block. */
for (i = 0; i < MB_FEATURE_TREE_PROBS; i++)
{
- // If not explicitly set value is defaulted to 255 by memset above
+ /* If not explicitly set value is defaulted to 255 by memset above */
if (vp8_read_bit(bc))
xd->mb_segment_tree_probs[i] = (vp8_prob)vp8_read_literal(bc, 8);
}
}
}
- // Read the loop filter level and type
+ /* Read the loop filter level and type */
pc->filter_type = (LOOPFILTERTYPE) vp8_read_bit(bc);
pc->filter_level = vp8_read_literal(bc, 6);
pc->sharpness_level = vp8_read_literal(bc, 3);
- // Read in loop filter deltas applied at the MB level based on mode or ref frame.
+ /* Read in loop filter deltas applied at the MB level based on mode or ref frame. */
xd->mode_ref_lf_delta_update = 0;
xd->mode_ref_lf_delta_enabled = (unsigned char)vp8_read_bit(bc);
if (xd->mode_ref_lf_delta_enabled)
{
- // Do the deltas need to be updated
+ /* Do the deltas need to be updated */
xd->mode_ref_lf_delta_update = (unsigned char)vp8_read_bit(bc);
if (xd->mode_ref_lf_delta_update)
{
- // Send update
+ /* Send update */
for (i = 0; i < MAX_REF_LF_DELTAS; i++)
{
if (vp8_read_bit(bc))
{
- //sign = vp8_read_bit( bc );
+ /*sign = vp8_read_bit( bc );*/
xd->ref_lf_deltas[i] = (signed char)vp8_read_literal(bc, 6);
- if (vp8_read_bit(bc)) // Apply sign
+ if (vp8_read_bit(bc)) /* Apply sign */
xd->ref_lf_deltas[i] = xd->ref_lf_deltas[i] * -1;
}
}
- // Send update
+ /* Send update */
for (i = 0; i < MAX_MODE_LF_DELTAS; i++)
{
if (vp8_read_bit(bc))
{
- //sign = vp8_read_bit( bc );
+ /*sign = vp8_read_bit( bc );*/
xd->mode_lf_deltas[i] = (signed char)vp8_read_literal(bc, 6);
- if (vp8_read_bit(bc)) // Apply sign
+ if (vp8_read_bit(bc)) /* Apply sign */
xd->mode_lf_deltas[i] = xd->mode_lf_deltas[i] * -1;
}
}
@@ -727,11 +742,11 @@ int vp8_decode_frame(VP8D_COMP *pbi)
setup_token_decoder(pbi, data + first_partition_length_in_bytes);
xd->current_bc = &pbi->bc2;
- // Read the default quantizers.
+ /* Read the default quantizers. */
{
int Q, q_update;
- Q = vp8_read_literal(bc, 7); // AC 1st order Q = default
+ Q = vp8_read_literal(bc, 7); /* AC 1st order Q = default */
pc->base_qindex = Q;
q_update = 0;
pc->y1dc_delta_q = get_delta_q(bc, pc->y1dc_delta_q, &q_update);
@@ -743,20 +758,21 @@ int vp8_decode_frame(VP8D_COMP *pbi)
if (q_update)
vp8cx_init_de_quantizer(pbi);
- // MB level dequantizer setup
+ /* MB level dequantizer setup */
mb_init_dequantizer(pbi, &pbi->mb);
}
- // Determine if the golden frame or ARF buffer should be updated and how.
- // For all non key frames the GF and ARF refresh flags and sign bias
- // flags must be set explicitly.
+ /* Determine if the golden frame or ARF buffer should be updated and how.
+ * For all non key frames the GF and ARF refresh flags and sign bias
+ * flags must be set explicitly.
+ */
if (pc->frame_type != KEY_FRAME)
{
- // Should the GF or ARF be updated from the current frame
+ /* Should the GF or ARF be updated from the current frame */
pc->refresh_golden_frame = vp8_read_bit(bc);
pc->refresh_alt_ref_frame = vp8_read_bit(bc);
- // Buffer to buffer copy flags.
+ /* Buffer to buffer copy flags. */
pc->copy_buffer_to_gf = 0;
if (!pc->refresh_golden_frame)
@@ -793,9 +809,8 @@ int vp8_decode_frame(VP8D_COMP *pbi)
}
- vp8dx_bool_decoder_fill(bc);
{
- // read coef probability tree
+ /* read coef probability tree */
for (i = 0; i < BLOCK_TYPES; i++)
for (j = 0; j < COEF_BANDS; j++)
@@ -813,52 +828,49 @@ int vp8_decode_frame(VP8D_COMP *pbi)
}
}
- vpx_memcpy(&xd->pre, &pc->last_frame, sizeof(YV12_BUFFER_CONFIG));
- vpx_memcpy(&xd->dst, &pc->new_frame, sizeof(YV12_BUFFER_CONFIG));
+ vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->lst_fb_idx], sizeof(YV12_BUFFER_CONFIG));
+ vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], sizeof(YV12_BUFFER_CONFIG));
- // set up frame new frame for intra coded blocks
- vp8_setup_intra_recon(&pc->new_frame);
+ /* set up frame new frame for intra coded blocks */
+ if (!(pbi->b_multithreaded_rd) || pc->multi_token_partition == ONE_PARTITION || !(pc->filter_level))
+ vp8_setup_intra_recon(&pc->yv12_fb[pc->new_fb_idx]);
vp8_setup_block_dptrs(xd);
vp8_build_block_doffsets(xd);
- // clear out the coeff buffer
+ /* clear out the coeff buffer */
vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
- // Read the mb_no_coeff_skip flag
+ /* Read the mb_no_coeff_skip flag */
pc->mb_no_coeff_skip = (int)vp8_read_bit(bc);
- if (pc->frame_type == KEY_FRAME)
- vp8_kfread_modes(pbi);
- else
- vp8_decode_mode_mvs(pbi);
- // reset since these guys are used as iterators
- vpx_memset(pc->above_context[Y1CONTEXT], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 4);
- vpx_memset(pc->above_context[UCONTEXT ], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 2);
- vpx_memset(pc->above_context[VCONTEXT ], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 2);
- vpx_memset(pc->above_context[Y2CONTEXT], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols);
-
- xd->gf_active_ptr = (signed char *)pc->gf_active_flags; // Point to base of GF active flags data structure
+ vp8_decode_mode_mvs(pbi);
+ vpx_memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols);
vpx_memcpy(&xd->block[0].bmi, &xd->mode_info_context->bmi[0], sizeof(B_MODE_INFO));
-
- if (pbi->b_multithreaded_lf && pbi->common.filter_level != 0)
- vp8_start_lfthread(pbi);
-
- if (pbi->b_multithreaded_rd && pbi->common.multi_token_partition != ONE_PARTITION)
+ if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION)
{
- vp8_mtdecode_mb_rows(pbi, xd);
+ vp8mt_decode_mb_rows(pbi, xd);
+ if(pbi->common.filter_level)
+ {
+ /*vp8_mt_loop_filter_frame(pbi);*/ /*cm, &pbi->mb, cm->filter_level);*/
+
+ pc->last_frame_type = pc->frame_type;
+ pc->last_filter_type = pc->filter_type;
+ pc->last_sharpness_level = pc->sharpness_level;
+ }
+ vp8_yv12_extend_frame_borders_ptr(&pc->yv12_fb[pc->new_fb_idx]); /*cm->frame_to_show);*/
}
else
{
int ibc = 0;
- int num_part = 1 << pbi->common.multi_token_partition;
+ int num_part = 1 << pc->multi_token_partition;
- // Decode the individual macro block
+ /* Decode the individual macro block */
for (mb_row = 0; mb_row < pc->mb_rows; mb_row++)
{
@@ -873,20 +885,19 @@ int vp8_decode_frame(VP8D_COMP *pbi)
vp8_decode_mb_row(pbi, pc, mb_row, xd);
}
-
- pbi->last_mb_row_decoded = mb_row;
}
stop_token_decoder(pbi);
- vp8dx_stop_decode(bc);
-
- // vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes \n",bc->pos+pbi->bc2.pos);
+ /* vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes \n",bc->pos+pbi->bc2.pos); */
- // If this was a kf or Gf note the Q used
- if ((pc->frame_type == KEY_FRAME) || (pc->refresh_golden_frame) || pbi->common.refresh_alt_ref_frame)
+ /* If this was a kf or Gf note the Q used */
+ if ((pc->frame_type == KEY_FRAME) ||
+ pc->refresh_golden_frame || pc->refresh_alt_ref_frame)
+ {
pc->last_kf_gf_q = pc->base_qindex;
+ }
if (pc->refresh_entropy_probs == 0)
{
diff --git a/vp8/decoder/demode.c b/vp8/decoder/demode.c
deleted file mode 100644
index fd05e6db5..000000000
--- a/vp8/decoder/demode.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-
-#include "onyxd_int.h"
-#include "entropymode.h"
-#include "findnearmv.h"
-
-
-int vp8_read_bmode(vp8_reader *bc, const vp8_prob *p)
-{
- const int i = vp8_treed_read(bc, vp8_bmode_tree, p);
-
- return i;
-}
-
-
-int vp8_read_ymode(vp8_reader *bc, const vp8_prob *p)
-{
- const int i = vp8_treed_read(bc, vp8_ymode_tree, p);
-
- return i;
-}
-
-int vp8_kfread_ymode(vp8_reader *bc, const vp8_prob *p)
-{
- const int i = vp8_treed_read(bc, vp8_kf_ymode_tree, p);
-
- return i;
-}
-
-
-
-int vp8_read_uv_mode(vp8_reader *bc, const vp8_prob *p)
-{
- const int i = vp8_treed_read(bc, vp8_uv_mode_tree, p);
-
- return i;
-}
-
-void vp8_read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x)
-{
- // Is segmentation enabled
- if (x->segmentation_enabled && x->update_mb_segmentation_map)
- {
- // If so then read the segment id.
- if (vp8_read(r, x->mb_segment_tree_probs[0]))
- mi->segment_id = (unsigned char)(2 + vp8_read(r, x->mb_segment_tree_probs[2]));
- else
- mi->segment_id = (unsigned char)(vp8_read(r, x->mb_segment_tree_probs[1]));
- }
-}
-
-void vp8_kfread_modes(VP8D_COMP *pbi)
-{
- VP8_COMMON *const cp = & pbi->common;
- vp8_reader *const bc = & pbi->bc;
-
- MODE_INFO *m = cp->mi;
- const int ms = cp->mode_info_stride;
-
- int mb_row = -1;
- vp8_prob prob_skip_false = 0;
-
- if (cp->mb_no_coeff_skip)
- prob_skip_false = (vp8_prob)(vp8_read_literal(bc, 8));
-
- while (++mb_row < cp->mb_rows)
- {
- int mb_col = -1;
-
- while (++mb_col < cp->mb_cols)
- {
- MB_PREDICTION_MODE y_mode;
-
- vp8dx_bool_decoder_fill(bc);
- // Read the Macroblock segmentation map if it is being updated explicitly this frame (reset to 0 above by default)
- // By default on a key frame reset all MBs to segment 0
- m->mbmi.segment_id = 0;
-
- if (pbi->mb.update_mb_segmentation_map)
- vp8_read_mb_features(bc, &m->mbmi, &pbi->mb);
-
- // Read the macroblock coeff skip flag if this feature is in use, else default to 0
- if (cp->mb_no_coeff_skip)
- m->mbmi.mb_skip_coeff = vp8_read(bc, prob_skip_false);
- else
- m->mbmi.mb_skip_coeff = 0;
-
- y_mode = (MB_PREDICTION_MODE) vp8_kfread_ymode(bc, cp->kf_ymode_prob);
-
- m->mbmi.ref_frame = INTRA_FRAME;
-
- if ((m->mbmi.mode = y_mode) == B_PRED)
- {
- int i = 0;
-
- do
- {
- const B_PREDICTION_MODE A = vp8_above_bmi(m, i, ms)->mode;
- const B_PREDICTION_MODE L = vp8_left_bmi(m, i)->mode;
-
- m->bmi[i].mode = (B_PREDICTION_MODE) vp8_read_bmode(bc, cp->kf_bmode_prob [A] [L]);
- }
- while (++i < 16);
- }
- else
- {
- int BMode;
- int i = 0;
-
- switch (y_mode)
- {
- case DC_PRED:
- BMode = B_DC_PRED;
- break;
- case V_PRED:
- BMode = B_VE_PRED;
- break;
- case H_PRED:
- BMode = B_HE_PRED;
- break;
- case TM_PRED:
- BMode = B_TM_PRED;
- break;
- default:
- BMode = B_DC_PRED;
- break;
- }
-
- do
- {
- m->bmi[i].mode = (B_PREDICTION_MODE)BMode;
- }
- while (++i < 16);
- }
-
- (m++)->mbmi.uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, cp->kf_uv_mode_prob);
- }
-
- m++; // skip the border
- }
-}
diff --git a/vp8/decoder/demode.h b/vp8/decoder/demode.h
deleted file mode 100644
index 51bbc5e7a..000000000
--- a/vp8/decoder/demode.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-
-#include "onyxd_int.h"
-
-/* Read (intra) modes for all blocks in a keyframe */
-
-void vp8_kfread_modes(VP8D_COMP *pbi);
-
-/* Intra mode for a Y subblock */
-
-int vp8_read_bmode(vp8_reader *, const vp8_prob *);
-
-/* MB intra Y mode trees differ for key and inter frames. */
-
-int vp8_read_ymode(vp8_reader *, const vp8_prob *);
-int vp8_kfread_ymode(vp8_reader *, const vp8_prob *);
-
-/* MB intra UV mode trees are the same for key and inter frames. */
-
-int vp8_read_uv_mode(vp8_reader *, const vp8_prob *);
-
-/* Read any macroblock-level features that may be present. */
-
-void vp8_read_mb_features(vp8_reader *, MB_MODE_INFO *, MACROBLOCKD *);
diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c
index 14798d9af..84a9fd943 100644
--- a/vp8/decoder/dequantize.c
+++ b/vp8/decoder/dequantize.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -23,7 +24,7 @@ void vp8_dequantize_b_c(BLOCKD *d)
int i;
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
- short *DQC = &d->dequant[0][0];
+ short *DQC = d->dequant;
for (i = 0; i < 16; i++)
{
@@ -31,8 +32,12 @@ void vp8_dequantize_b_c(BLOCKD *d)
}
}
-void vp8_dequant_idct_c(short *input, short *dq, short *output, int pitch)
+void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred,
+ unsigned char *dest, int pitch, int stride)
{
+ short output[16];
+ short *diff_ptr = output;
+ int r, c;
int i;
for (i = 0; i < 16; i++)
@@ -40,13 +45,40 @@ void vp8_dequant_idct_c(short *input, short *dq, short *output, int pitch)
input[i] = dq[i] * input[i];
}
- vp8_short_idct4x4llm_c(input, output, pitch);
+ /* the idct halves ( >> 1) the pitch */
+ vp8_short_idct4x4llm_c(input, output, 4 << 1);
+
vpx_memset(input, 0, 32);
+
+ for (r = 0; r < 4; r++)
+ {
+ for (c = 0; c < 4; c++)
+ {
+ int a = diff_ptr[c] + pred[c];
+
+ if (a < 0)
+ a = 0;
+
+ if (a > 255)
+ a = 255;
+
+ dest[c] = (unsigned char) a;
+ }
+
+ dest += stride;
+ diff_ptr += 4;
+ pred += pitch;
+ }
}
-void vp8_dequant_dc_idct_c(short *input, short *dq, short *output, int pitch, int Dc)
+void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred,
+ unsigned char *dest, int pitch, int stride,
+ int Dc)
{
int i;
+ short output[16];
+ short *diff_ptr = output;
+ int r, c;
input[0] = (short)Dc;
@@ -55,6 +87,28 @@ void vp8_dequant_dc_idct_c(short *input, short *dq, short *output, int pitch, in
input[i] = dq[i] * input[i];
}
- vp8_short_idct4x4llm_c(input, output, pitch);
+ /* the idct halves ( >> 1) the pitch */
+ vp8_short_idct4x4llm_c(input, output, 4 << 1);
+
vpx_memset(input, 0, 32);
+
+ for (r = 0; r < 4; r++)
+ {
+ for (c = 0; c < 4; c++)
+ {
+ int a = diff_ptr[c] + pred[c];
+
+ if (a < 0)
+ a = 0;
+
+ if (a > 255)
+ a = 255;
+
+ dest[c] = (unsigned char) a;
+ }
+
+ dest += stride;
+ diff_ptr += 4;
+ pred += pitch;
+ }
}
diff --git a/vp8/decoder/dequantize.h b/vp8/decoder/dequantize.h
index d16b02e58..b78e39c1d 100644
--- a/vp8/decoder/dequantize.h
+++ b/vp8/decoder/dequantize.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -15,11 +16,31 @@
#define prototype_dequant_block(sym) \
void sym(BLOCKD *x)
-#define prototype_dequant_idct(sym) \
- void sym(short *input, short *dq, short *output, int pitch)
+#define prototype_dequant_idct_add(sym) \
+ void sym(short *input, short *dq, \
+ unsigned char *pred, unsigned char *output, \
+ int pitch, int stride)
-#define prototype_dequant_idct_dc(sym) \
- void sym(short *input, short *dq, short *output, int pitch, int dc)
+#define prototype_dequant_dc_idct_add(sym) \
+ void sym(short *input, short *dq, \
+ unsigned char *pred, unsigned char *output, \
+ int pitch, int stride, \
+ int dc)
+
+#define prototype_dequant_dc_idct_add_y_block(sym) \
+ void sym(short *q, short *dq, \
+ unsigned char *pre, unsigned char *dst, \
+ int stride, char *eobs, short *dc)
+
+#define prototype_dequant_idct_add_y_block(sym) \
+ void sym(short *q, short *dq, \
+ unsigned char *pre, unsigned char *dst, \
+ int stride, char *eobs)
+
+#define prototype_dequant_idct_add_uv_block(sym) \
+ void sym(short *q, short *dq, \
+ unsigned char *pre, unsigned char *dst_u, \
+ unsigned char *dst_v, int stride, char *eobs)
#if ARCH_X86 || ARCH_X86_64
#include "x86/dequantize_x86.h"
@@ -34,25 +55,52 @@
#endif
extern prototype_dequant_block(vp8_dequant_block);
-#ifndef vp8_dequant_idct
-#define vp8_dequant_idct vp8_dequant_idct_c
+#ifndef vp8_dequant_idct_add
+#define vp8_dequant_idct_add vp8_dequant_idct_add_c
+#endif
+extern prototype_dequant_idct_add(vp8_dequant_idct_add);
+
+#ifndef vp8_dequant_dc_idct_add
+#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_c
+#endif
+extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add);
+
+#ifndef vp8_dequant_dc_idct_add_y_block
+#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_c
#endif
-extern prototype_dequant_idct(vp8_dequant_idct);
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block);
-#ifndef vp8_dequant_idct_dc
-#define vp8_dequant_idct_dc vp8_dequant_dc_idct_c
+#ifndef vp8_dequant_idct_add_y_block
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_c
#endif
-extern prototype_dequant_idct_dc(vp8_dequant_idct_dc);
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block);
+
+#ifndef vp8_dequant_idct_add_uv_block
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_c
+#endif
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block);
typedef prototype_dequant_block((*vp8_dequant_block_fn_t));
-typedef prototype_dequant_idct((*vp8_dequant_idct_fn_t));
-typedef prototype_dequant_idct_dc((*vp8_dequant_idct_dc_fn_t));
+
+typedef prototype_dequant_idct_add((*vp8_dequant_idct_add_fn_t));
+
+typedef prototype_dequant_dc_idct_add((*vp8_dequant_dc_idct_add_fn_t));
+
+typedef prototype_dequant_dc_idct_add_y_block((*vp8_dequant_dc_idct_add_y_block_fn_t));
+
+typedef prototype_dequant_idct_add_y_block((*vp8_dequant_idct_add_y_block_fn_t));
+
+typedef prototype_dequant_idct_add_uv_block((*vp8_dequant_idct_add_uv_block_fn_t));
+
typedef struct
{
- vp8_dequant_block_fn_t block;
- vp8_dequant_idct_fn_t idct;
- vp8_dequant_idct_dc_fn_t idct_dc;
+ vp8_dequant_block_fn_t block;
+ vp8_dequant_idct_add_fn_t idct_add;
+ vp8_dequant_dc_idct_add_fn_t dc_idct_add;
+ vp8_dequant_dc_idct_add_y_block_fn_t dc_idct_add_y_block;
+ vp8_dequant_idct_add_y_block_fn_t idct_add_y_block;
+ vp8_dequant_idct_add_uv_block_fn_t idct_add_uv_block;
} vp8_dequant_rtcd_vtable_t;
#if CONFIG_RUNTIME_CPU_DETECT
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index a42f18dd7..7d013d240 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -13,12 +14,12 @@
#include "onyxd_int.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
+#include "detokenize.h"
-#define BR_COUNT 8
#define BOOL_DATA UINT8
#define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
-DECLARE_ALIGNED(16, UINT16, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X};
+DECLARE_ALIGNED(16, UINT8, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X};
#define EOB_CONTEXT_NODE 0
#define ZERO_CONTEXT_NODE 1
#define ONE_CONTEXT_NODE 2
@@ -43,49 +44,72 @@ typedef struct
DECLARE_ALIGNED(16, static const TOKENEXTRABITS, vp8d_token_extra_bits2[MAX_ENTROPY_TOKENS]) =
{
- { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //ZERO_TOKEN
- { 1, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //ONE_TOKEN
- { 2, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //TWO_TOKEN
- { 3, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //THREE_TOKEN
- { 4, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //FOUR_TOKEN
- { 5, 0, { 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY1
- { 7, 1, { 145, 165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY2
- { 11, 2, { 140, 148, 173, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY3
- { 19, 3, { 135, 140, 155, 176, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY4
- { 35, 4, { 130, 134, 141, 157, 180, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY5
- { 67, 10, { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0 } }, //DCT_VAL_CATEGORY6
- { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, // EOB TOKEN
+ { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* ZERO_TOKEN */
+ { 1, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* ONE_TOKEN */
+ { 2, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* TWO_TOKEN */
+ { 3, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* THREE_TOKEN */
+ { 4, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* FOUR_TOKEN */
+ { 5, 0, { 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY1 */
+ { 7, 1, { 145, 165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY2 */
+ { 11, 2, { 140, 148, 173, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY3 */
+ { 19, 3, { 135, 140, 155, 176, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY4 */
+ { 35, 4, { 130, 134, 141, 157, 180, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY5 */
+ { 67, 10, { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0 } }, /* DCT_VAL_CATEGORY6 */
+ { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* EOB TOKEN */
};
void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
{
- ENTROPY_CONTEXT **const A = x->above_context;
- ENTROPY_CONTEXT(* const L)[4] = x->left_context;
-
- ENTROPY_CONTEXT *a;
- ENTROPY_CONTEXT *l;
- int i;
-
- for (i = 0; i < 24; i++)
+ /* Clear entropy contexts for Y2 blocks */
+ if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
{
-
- a = A[ vp8_block2context[i] ] + vp8_block2above[i];
- l = L[ vp8_block2context[i] ] + vp8_block2left[i];
-
- *a = *l = 0;
+ vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
}
-
- if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
+ else
{
- a = A[Y2CONTEXT] + vp8_block2above[24];
- l = L[Y2CONTEXT] + vp8_block2left[24];
- *a = *l = 0;
+ vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
+ vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
}
+}
+#if CONFIG_ARM_ASM_DETOK
+/* mashup of vp8_block2left and vp8_block2above so we only need one pointer
+ * for the assembly version.
+ */
+DECLARE_ALIGNED(16, const UINT8, vp8_block2leftabove[25*2]) =
+{
+ /* vp8_block2left */
+ 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
+ /* vp8_block2above */
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
+};
+void vp8_init_detokenizer(VP8D_COMP *dx)
+{
+ const VP8_COMMON *const oc = & dx->common;
+ MACROBLOCKD *x = & dx->mb;
+
+ dx->detoken.vp8_coef_tree_ptr = vp8_coef_tree;
+ dx->detoken.ptr_block2leftabove = vp8_block2leftabove;
+ dx->detoken.ptr_coef_bands_x = vp8_coef_bands_x;
+ dx->detoken.scan = vp8_default_zig_zag1d;
+ dx->detoken.teb_base_ptr = vp8d_token_extra_bits2;
+ dx->detoken.qcoeff_start_ptr = &x->qcoeff[0];
+
+ dx->detoken.coef_probs[0] = (oc->fc.coef_probs [0] [ 0 ] [0]);
+ dx->detoken.coef_probs[1] = (oc->fc.coef_probs [1] [ 0 ] [0]);
+ dx->detoken.coef_probs[2] = (oc->fc.coef_probs [2] [ 0 ] [0]);
+ dx->detoken.coef_probs[3] = (oc->fc.coef_probs [3] [ 0 ] [0]);
}
-DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
+#endif
+
+DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
+#define FILL \
+ if(count < 0) \
+ VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
+
#define NORMALIZE \
/*if(range < 0x80)*/ \
{ \
@@ -93,17 +117,13 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
range <<= shift; \
value <<= shift; \
count -= shift; \
- if(count <= 0) \
- { \
- count += BR_COUNT ; \
- value |= (*bufptr) << (BR_COUNT-count); \
- bufptr = br_ptr_advance(bufptr, 1); \
- } \
}
#define DECODE_AND_APPLYSIGN(value_to_sign) \
split = (range + 1) >> 1; \
- if ( (value >> 8) < split ) \
+ bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
+ FILL \
+ if ( value < bigsplit ) \
{ \
range = split; \
v= value_to_sign; \
@@ -111,28 +131,25 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
else \
{ \
range = range-split; \
- value = value-(split<<8); \
+ value = value-bigsplit; \
v = -value_to_sign; \
} \
range +=range; \
value +=value; \
- if (!--count) \
- { \
- count = BR_COUNT; \
- value |= *bufptr; \
- bufptr = br_ptr_advance(bufptr, 1); \
- }
+ count--;
#define DECODE_AND_BRANCH_IF_ZERO(probability,branch) \
{ \
split = 1 + ((( probability*(range-1) ) )>> 8); \
- if ( (value >> 8) < split ) \
+ bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
+ FILL \
+ if ( value < bigsplit ) \
{ \
range = split; \
NORMALIZE \
goto branch; \
} \
- value -= (split<<8); \
+ value -= bigsplit; \
range = range - split; \
NORMALIZE \
}
@@ -140,7 +157,9 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
#define DECODE_AND_LOOP_IF_ZERO(probability,branch) \
{ \
split = 1 + ((( probability*(range-1) ) ) >> 8); \
- if ( (value >> 8) < split ) \
+ bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
+ FILL \
+ if ( value < bigsplit ) \
{ \
range = split; \
NORMALIZE \
@@ -151,7 +170,7 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
goto branch; \
} goto BLOCK_FINISHED; /*for malformed input */\
} \
- value -= (split<<8); \
+ value -= bigsplit; \
range = range - split; \
NORMALIZE \
}
@@ -169,10 +188,12 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
#define DECODE_EXTRABIT_AND_ADJUST_VAL(t,bits_count)\
split = 1 + (((range-1) * vp8d_token_extra_bits2[t].Probs[bits_count]) >> 8); \
- if(value >= (split<<8))\
+ bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
+ FILL \
+ if(value >= bigsplit)\
{\
range = range-split;\
- value = value-(split<<8);\
+ value = value-bigsplit;\
val += ((UINT16)1<<bits_count);\
}\
else\
@@ -181,14 +202,45 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
}\
NORMALIZE
+#if CONFIG_ARM_ASM_DETOK
int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
{
- ENTROPY_CONTEXT **const A = x->above_context;
- ENTROPY_CONTEXT(* const L)[4] = x->left_context;
+ int eobtotal = 0;
+ int i, type;
+
+ dx->detoken.current_bc = x->current_bc;
+ dx->detoken.A = x->above_context;
+ dx->detoken.L = x->left_context;
+
+ type = 3;
+
+ if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
+ {
+ type = 1;
+ eobtotal -= 16;
+ }
+
+ vp8_decode_mb_tokens_v6(&dx->detoken, type);
+
+ for (i = 0; i < 25; i++)
+ {
+ x->eobs[i] = dx->detoken.eob[i];
+ eobtotal += dx->detoken.eob[i];
+ }
+
+ return eobtotal;
+}
+#else
+int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
+{
+ ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context;
+ ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)x->left_context;
const VP8_COMMON *const oc = & dx->common;
BOOL_DECODER *bc = x->current_bc;
+ char *eobs = x->eobs;
+
ENTROPY_CONTEXT *a;
ENTROPY_CONTEXT *l;
int i;
@@ -198,11 +250,13 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
register int count;
const BOOL_DATA *bufptr;
+ const BOOL_DATA *bufend;
register unsigned int range;
- register unsigned int value;
+ VP8_BD_VALUE value;
const int *scan;
register unsigned int shift;
UINT32 split;
+ VP8_BD_VALUE bigsplit;
INT16 *qcoeff_ptr;
const vp8_prob *coef_probs;
@@ -210,46 +264,44 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
int stop;
INT16 val, bits_count;
INT16 c;
- INT16 t;
INT16 v;
const vp8_prob *Prob;
- //int *scan;
type = 3;
i = 0;
stop = 16;
- if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
+ scan = vp8_default_zig_zag1d;
+ qcoeff_ptr = &x->qcoeff[0];
+
+ if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
{
i = 24;
stop = 24;
type = 1;
- qcoeff_ptr = &x->qcoeff[24*16];
- scan = vp8_default_zig_zag1d;
+ qcoeff_ptr += 24*16;
eobtotal -= 16;
}
- else
- {
- scan = vp8_default_zig_zag1d;
- qcoeff_ptr = &x->qcoeff[0];
- }
+ bufend = bc->user_buffer_end;
+ bufptr = bc->user_buffer;
+ value = bc->value;
count = bc->count;
range = bc->range;
- value = bc->value;
- bufptr = bc->read_ptr;
coef_probs = oc->fc.coef_probs [type] [ 0 ] [0];
BLOCK_LOOP:
- a = A[ vp8_block2context[i] ] + vp8_block2above[i];
- l = L[ vp8_block2context[i] ] + vp8_block2left[i];
+ a = A + vp8_block2above[i];
+ l = L + vp8_block2left[i];
+
c = (INT16)(!type);
- VP8_COMBINEENTROPYCONTEXTS(t, *a, *l);
+ /*Dest = ((A)!=0) + ((B)!=0);*/
+ VP8_COMBINEENTROPYCONTEXTS(v, *a, *l);
Prob = coef_probs;
- Prob += t * ENTROPY_NODES;
+ Prob += v * ENTROPY_NODES;
DO_WHILE:
Prob += vp8_coef_bands_x[c];
@@ -336,9 +388,8 @@ ONE_CONTEXT_NODE_0_:
qcoeff_ptr [ scan[15] ] = (INT16) v;
BLOCK_FINISHED:
- t = ((x->block[i].eob = c) != !type); // any nonzero data?
- eobtotal += x->block[i].eob;
- *a = *l = t;
+ *a = *l = ((eobs[i] = c) != !type); /* any nonzero data? */
+ eobtotal += c;
qcoeff_ptr += 16;
i++;
@@ -348,12 +399,11 @@ BLOCK_FINISHED:
if (i == 25)
{
- scan = vp8_default_zig_zag1d;//x->scan_order1d;
type = 0;
i = 0;
stop = 16;
coef_probs = oc->fc.coef_probs [type] [ 0 ] [0];
- qcoeff_ptr = &x->qcoeff[0];
+ qcoeff_ptr -= (24*16 + 16);
goto BLOCK_LOOP;
}
@@ -365,10 +415,12 @@ BLOCK_FINISHED:
goto BLOCK_LOOP;
}
- bc->count = count;
+ FILL
+ bc->user_buffer = bufptr;
bc->value = value;
+ bc->count = count;
bc->range = range;
- bc->read_ptr = bufptr;
return eobtotal;
}
+#endif /*!CONFIG_ASM_DETOK*/
diff --git a/vp8/decoder/detokenize.h b/vp8/decoder/detokenize.h
index 6a9a47607..294a4a55d 100644
--- a/vp8/decoder/detokenize.h
+++ b/vp8/decoder/detokenize.h
@@ -1,19 +1,24 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef detokenize_h
-#define detokenize_h 1
+#ifndef DETOKENIZE_H
+#define DETOKENIZE_H
#include "onyxd_int.h"
+#if ARCH_ARM
+#include "arm/detokenize_arm.h"
+#endif
+
void vp8_reset_mb_tokens_context(MACROBLOCKD *x);
int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *);
-#endif /* detokenize_h */
+#endif /* DETOKENIZE_H */
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c
index 302b64bf8..2e284729b 100644
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -13,19 +14,22 @@
#include "onyxd_int.h"
extern void vp8_arch_x86_decode_init(VP8D_COMP *pbi);
+extern void vp8_arch_arm_decode_init(VP8D_COMP *pbi);
void vp8_dmachine_specific_config(VP8D_COMP *pbi)
{
- // Pure C:
+ /* Pure C: */
#if CONFIG_RUNTIME_CPU_DETECT
- pbi->mb.rtcd = &pbi->common.rtcd;
- pbi->dequant.block = vp8_dequantize_b_c;
- pbi->dequant.idct = vp8_dequant_idct_c;
- pbi->dequant.idct_dc = vp8_dequant_dc_idct_c;
- pbi->dboolhuff.start = vp8dx_start_decode_c;
- pbi->dboolhuff.stop = vp8dx_stop_decode_c;
- pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
-#if 0 //For use with RTCD, when implemented
+ pbi->mb.rtcd = &pbi->common.rtcd;
+ pbi->dequant.block = vp8_dequantize_b_c;
+ pbi->dequant.idct_add = vp8_dequant_idct_add_c;
+ pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_c;
+ pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_c;
+ pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_c;
+ pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
+ pbi->dboolhuff.start = vp8dx_start_decode_c;
+ pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
+#if 0 /*For use with RTCD, when implemented*/
pbi->dboolhuff.debool = vp8dx_decode_bool_c;
pbi->dboolhuff.devalue = vp8dx_decode_value_c;
#endif
@@ -34,4 +38,8 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
#if ARCH_X86 || ARCH_X86_64
vp8_arch_x86_decode_init(pbi);
#endif
+
+#if ARCH_ARM
+ vp8_arch_arm_decode_init(pbi);
+#endif
}
diff --git a/vp8/decoder/idct_blk.c b/vp8/decoder/idct_blk.c
new file mode 100644
index 000000000..c98bd5bb8
--- /dev/null
+++ b/vp8/decoder/idct_blk.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "idct.h"
+#include "dequantize.h"
+
+void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred,
+ unsigned char *dest, int pitch, int stride,
+ int Dc);
+void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred,
+ unsigned char *dest, int pitch, int stride);
+void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
+ unsigned char *dst_ptr, int pitch, int stride);
+
+void vp8_dequant_dc_idct_add_y_block_c
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs, short *dc)
+{
+ int i, j;
+
+ for (i = 0; i < 4; i++)
+ {
+ for (j = 0; j < 4; j++)
+ {
+ if (*eobs++ > 1)
+ vp8_dequant_dc_idct_add_c (q, dq, pre, dst, 16, stride, dc[0]);
+ else
+ vp8_dc_only_idct_add_c (dc[0], pre, dst, 16, stride);
+
+ q += 16;
+ pre += 4;
+ dst += 4;
+ dc ++;
+ }
+
+ pre += 64 - 16;
+ dst += 4*stride - 16;
+ }
+}
+
+void vp8_dequant_idct_add_y_block_c
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs)
+{
+ int i, j;
+
+ for (i = 0; i < 4; i++)
+ {
+ for (j = 0; j < 4; j++)
+ {
+ if (*eobs++ > 1)
+ vp8_dequant_idct_add_c (q, dq, pre, dst, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_c (q[0]*dq[0], pre, dst, 16, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ q += 16;
+ pre += 4;
+ dst += 4;
+ }
+
+ pre += 64 - 16;
+ dst += 4*stride - 16;
+ }
+}
+
+void vp8_dequant_idct_add_uv_block_c
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+ int i, j;
+
+ for (i = 0; i < 2; i++)
+ {
+ for (j = 0; j < 2; j++)
+ {
+ if (*eobs++ > 1)
+ vp8_dequant_idct_add_c (q, dq, pre, dstu, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_c (q[0]*dq[0], pre, dstu, 8, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ q += 16;
+ pre += 4;
+ dstu += 4;
+ }
+
+ pre += 32 - 8;
+ dstu += 4*stride - 8;
+ }
+
+ for (i = 0; i < 2; i++)
+ {
+ for (j = 0; j < 2; j++)
+ {
+ if (*eobs++ > 1)
+ vp8_dequant_idct_add_c (q, dq, pre, dstv, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_c (q[0]*dq[0], pre, dstv, 8, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ q += 16;
+ pre += 4;
+ dstv += 4;
+ }
+
+ pre += 32 - 8;
+ dstv += 4*stride - 8;
+ }
+}
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 6875585f0..6eda45e4a 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -23,18 +24,19 @@
#include "threading.h"
#include "decoderthreading.h"
#include <stdio.h>
-#include "segmentation_common.h"
+
#include "quant_common.h"
#include "vpx_scale/vpxscale.h"
#include "systemdependent.h"
#include "vpx_ports/vpx_timer.h"
-
+#include "detokenize.h"
+#if ARCH_ARM
+#include "vpx_ports/arm.h"
+#endif
extern void vp8_init_loop_filter(VP8_COMMON *cm);
-
extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi);
-// DEBUG code
#if CONFIG_DEBUG
void vp8_recon_write_yuv_frame(unsigned char *name, YV12_BUFFER_CONFIG *s)
{
@@ -111,12 +113,13 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
pbi->common.current_video_frame = 0;
pbi->ready_for_new_data = 1;
- pbi->CPUFreq = 0; //vp8_get_processor_freq();
+ pbi->CPUFreq = 0; /*vp8_get_processor_freq();*/
pbi->max_threads = oxcf->max_threads;
vp8_decoder_create_threads(pbi);
- //vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid
- // unnecessary calling of vp8cx_init_de_quantizer() for every frame.
+ /* vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid
+ * unnecessary calling of vp8cx_init_de_quantizer() for every frame.
+ */
vp8cx_init_de_quantizer(pbi);
{
@@ -128,6 +131,9 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
cm->last_sharpness_level = cm->sharpness_level;
}
+#if CONFIG_ARM_ASM_DETOK
+ vp8_init_detokenizer(pbi);
+#endif
pbi->common.error.setjmp = 0;
return (VP8D_PTR) pbi;
}
@@ -140,6 +146,10 @@ void vp8dx_remove_decompressor(VP8D_PTR ptr)
if (!pbi)
return;
+#if CONFIG_MULTITHREAD
+ if (pbi->b_multithreaded_rd)
+ vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows);
+#endif
vp8_decoder_remove_threads(pbi);
vp8_remove_common(&pbi->common);
vpx_free(pbi);
@@ -179,57 +189,143 @@ int vp8dx_get_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_C
{
VP8D_COMP *pbi = (VP8D_COMP *) ptr;
VP8_COMMON *cm = &pbi->common;
+ int ref_fb_idx;
if (ref_frame_flag == VP8_LAST_FLAG)
- vp8_yv12_copy_frame_ptr(&cm->last_frame, sd);
-
+ ref_fb_idx = cm->lst_fb_idx;
else if (ref_frame_flag == VP8_GOLD_FLAG)
- vp8_yv12_copy_frame_ptr(&cm->golden_frame, sd);
-
+ ref_fb_idx = cm->gld_fb_idx;
else if (ref_frame_flag == VP8_ALT_FLAG)
- vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, sd);
-
+ ref_fb_idx = cm->alt_fb_idx;
else
return -1;
+ vp8_yv12_copy_frame_ptr(&cm->yv12_fb[ref_fb_idx], sd);
+
return 0;
}
int vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
{
VP8D_COMP *pbi = (VP8D_COMP *) ptr;
VP8_COMMON *cm = &pbi->common;
+ int ref_fb_idx;
if (ref_frame_flag == VP8_LAST_FLAG)
- vp8_yv12_copy_frame_ptr(sd, &cm->last_frame);
-
+ ref_fb_idx = cm->lst_fb_idx;
else if (ref_frame_flag == VP8_GOLD_FLAG)
- vp8_yv12_copy_frame_ptr(sd, &cm->golden_frame);
-
+ ref_fb_idx = cm->gld_fb_idx;
else if (ref_frame_flag == VP8_ALT_FLAG)
- vp8_yv12_copy_frame_ptr(sd, &cm->alt_ref_frame);
-
+ ref_fb_idx = cm->alt_fb_idx;
else
return -1;
+ vp8_yv12_copy_frame_ptr(sd, &cm->yv12_fb[ref_fb_idx]);
+
return 0;
}
-//For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us.
+/*For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us.*/
#if HAVE_ARMV7
extern void vp8_push_neon(INT64 *store);
extern void vp8_pop_neon(INT64 *store);
-static INT64 dx_store_reg[8];
#endif
+
+static int get_free_fb (VP8_COMMON *cm)
+{
+ int i;
+ for (i = 0; i < NUM_YV12_BUFFERS; i++)
+ if (cm->fb_idx_ref_cnt[i] == 0)
+ break;
+
+ cm->fb_idx_ref_cnt[i] = 1;
+ return i;
+}
+
+static void ref_cnt_fb (int *buf, int *idx, int new_idx)
+{
+ if (buf[*idx] > 0)
+ buf[*idx]--;
+
+ *idx = new_idx;
+
+ buf[new_idx]++;
+}
+
+/* If any buffer copy / swapping is signalled it should be done here. */
+static int swap_frame_buffers (VP8_COMMON *cm)
+{
+ int fb_to_update_with, err = 0;
+
+ if (cm->refresh_last_frame)
+ fb_to_update_with = cm->lst_fb_idx;
+ else
+ fb_to_update_with = cm->new_fb_idx;
+
+ /* The alternate reference frame or golden frame can be updated
+ * using the new, last, or golden/alt ref frame. If it
+ * is updated using the newly decoded frame it is a refresh.
+ * An update using the last or golden/alt ref frame is a copy.
+ */
+ if (cm->copy_buffer_to_arf)
+ {
+ int new_fb = 0;
+
+ if (cm->copy_buffer_to_arf == 1)
+ new_fb = fb_to_update_with;
+ else if (cm->copy_buffer_to_arf == 2)
+ new_fb = cm->gld_fb_idx;
+ else
+ err = -1;
+
+ ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->alt_fb_idx, new_fb);
+ }
+
+ if (cm->copy_buffer_to_gf)
+ {
+ int new_fb = 0;
+
+ if (cm->copy_buffer_to_gf == 1)
+ new_fb = fb_to_update_with;
+ else if (cm->copy_buffer_to_gf == 2)
+ new_fb = cm->alt_fb_idx;
+ else
+ err = -1;
+
+ ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->gld_fb_idx, new_fb);
+ }
+
+ if (cm->refresh_golden_frame)
+ ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->gld_fb_idx, cm->new_fb_idx);
+
+ if (cm->refresh_alt_ref_frame)
+ ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->alt_fb_idx, cm->new_fb_idx);
+
+ if (cm->refresh_last_frame)
+ {
+ ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->lst_fb_idx, cm->new_fb_idx);
+
+ cm->frame_to_show = &cm->yv12_fb[cm->lst_fb_idx];
+ }
+ else
+ cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx];
+
+ cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
+
+ return err;
+}
+
int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsigned char *source, INT64 time_stamp)
{
+#if HAVE_ARMV7
+ INT64 dx_store_reg[8];
+#endif
VP8D_COMP *pbi = (VP8D_COMP *) ptr;
VP8_COMMON *cm = &pbi->common;
int retcode = 0;
-
struct vpx_usec_timer timer;
-// if(pbi->ready_for_new_data == 0)
-// return -1;
+ /*if(pbi->ready_for_new_data == 0)
+ return -1;*/
if (ptr == 0)
{
@@ -238,21 +334,38 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
pbi->common.error.error_code = VPX_CODEC_OK;
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_push_neon(dx_store_reg);
+ }
+#endif
+
+ cm->new_fb_idx = get_free_fb (cm);
+
if (setjmp(pbi->common.error.jmp))
{
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(dx_store_reg);
+ }
+#endif
pbi->common.error.setjmp = 0;
+ if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
+ cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
return -1;
}
pbi->common.error.setjmp = 1;
-#if HAVE_ARMV7
- vp8_push_neon(dx_store_reg);
-#endif
-
vpx_usec_timer_start(&timer);
- //cm->current_video_frame++;
+ /*cm->current_video_frame++;*/
pbi->Source = source;
pbi->source_sz = size;
@@ -261,101 +374,78 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
if (retcode < 0)
{
#if HAVE_ARMV7
- vp8_pop_neon(dx_store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(dx_store_reg);
+ }
#endif
pbi->common.error.error_code = VPX_CODEC_ERROR;
pbi->common.error.setjmp = 0;
+ if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
+ cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
return retcode;
}
- // Update the GF useage maps.
- vp8_update_gf_useage_maps(cm, &pbi->mb);
-
- if (pbi->b_multithreaded_lf && pbi->common.filter_level != 0)
- vp8_stop_lfthread(pbi);
-
- if (cm->refresh_last_frame)
- {
- vp8_swap_yv12_buffer(&cm->last_frame, &cm->new_frame);
-
- cm->frame_to_show = &cm->last_frame;
- }
- else
+ if (pbi->b_multithreaded_rd && cm->multi_token_partition != ONE_PARTITION)
{
- cm->frame_to_show = &cm->new_frame;
- }
-
- if (!pbi->b_multithreaded_lf)
+ if (swap_frame_buffers (cm))
+ {
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(dx_store_reg);
+ }
+#endif
+ pbi->common.error.error_code = VPX_CODEC_ERROR;
+ pbi->common.error.setjmp = 0;
+ return -1;
+ }
+ } else
{
- struct vpx_usec_timer lpftimer;
- vpx_usec_timer_start(&lpftimer);
- // Apply the loop filter if appropriate.
+ if (swap_frame_buffers (cm))
+ {
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(dx_store_reg);
+ }
+#endif
+ pbi->common.error.error_code = VPX_CODEC_ERROR;
+ pbi->common.error.setjmp = 0;
+ return -1;
+ }
- if (cm->filter_level > 0)
+ if(pbi->common.filter_level)
{
+ struct vpx_usec_timer lpftimer;
+ vpx_usec_timer_start(&lpftimer);
+ /* Apply the loop filter if appropriate. */
+
vp8_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
+
+ vpx_usec_timer_mark(&lpftimer);
+ pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
+
cm->last_frame_type = cm->frame_type;
cm->last_filter_type = cm->filter_type;
cm->last_sharpness_level = cm->sharpness_level;
-
}
-
- vpx_usec_timer_mark(&lpftimer);
- pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
+ vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
}
- vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
-
#if 0
- // DEBUG code
- //vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);
+ /* DEBUG code */
+ /*vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);*/
if (cm->current_video_frame <= 5)
write_dx_frame_to_file(cm->frame_to_show, cm->current_video_frame);
#endif
- // If any buffer copy / swaping is signalled it should be done here.
- if (cm->copy_buffer_to_arf)
- {
- if (cm->copy_buffer_to_arf == 1)
- {
- if (cm->refresh_last_frame)
- vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->alt_ref_frame);
- else
- vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->alt_ref_frame);
- }
- else if (cm->copy_buffer_to_arf == 2)
- vp8_yv12_copy_frame_ptr(&cm->golden_frame, &cm->alt_ref_frame);
- }
-
- if (cm->copy_buffer_to_gf)
- {
- if (cm->copy_buffer_to_gf == 1)
- {
- if (cm->refresh_last_frame)
- vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->golden_frame);
- else
- vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame);
- }
- else if (cm->copy_buffer_to_gf == 2)
- vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, &cm->golden_frame);
- }
-
- // Should the golden or alternate reference frame be refreshed?
- if (cm->refresh_golden_frame || cm->refresh_alt_ref_frame)
- {
- if (cm->refresh_golden_frame)
- vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame);
-
- if (cm->refresh_alt_ref_frame)
- vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame);
-
- //vpx_log("Decoder: recovery frame received \n");
-
- // Update data structures that monitors GF useage
- vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
- cm->gf_active_count = cm->mb_rows * cm->mb_cols;
- }
-
vp8_clear_system_state();
vpx_usec_timer_mark(&timer);
@@ -363,7 +453,7 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
pbi->time_decoding += pbi->decode_microseconds;
-// vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);
+ /*vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);*/
if (cm->show_frame)
cm->current_video_frame++;
@@ -406,7 +496,12 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
#endif
#if HAVE_ARMV7
- vp8_pop_neon(dx_store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(dx_store_reg);
+ }
#endif
pbi->common.error.setjmp = 0;
return retcode;
@@ -419,7 +514,7 @@ int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp,
if (pbi->ready_for_new_data == 1)
return ret;
- // ie no raw frame to show!!!
+ /* ie no raw frame to show!!! */
if (pbi->common.show_frame == 0)
return ret;
@@ -445,7 +540,7 @@ int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp,
ret = -1;
}
-#endif //!CONFIG_POSTPROC
+#endif /*!CONFIG_POSTPROC*/
vp8_clear_system_state();
return ret;
}
diff --git a/vp8/decoder/onyxd_if_sjl.c b/vp8/decoder/onyxd_if_sjl.c
deleted file mode 100644
index 363ad5d72..000000000
--- a/vp8/decoder/onyxd_if_sjl.c
+++ /dev/null
@@ -1,398 +0,0 @@
-/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-
-#include "onyxc_int.h"
-#include "postproc.h"
-#include "onyxd.h"
-#include "onyxd_int.h"
-#include "vpx_mem/vpx_mem.h"
-#include "alloccommon.h"
-#include "vpx_scale/yv12extend.h"
-#include "loopfilter.h"
-#include "swapyv12buffer.h"
-#include "g_common.h"
-#include "threading.h"
-#include "decoderthreading.h"
-#include <stdio.h>
-#include "segmentation_common.h"
-#include "quant_common.h"
-#include "vpx_scale/vpxscale.h"
-#include "systemdependent.h"
-#include "vpx_ports/vpx_timer.h"
-
-
-#ifndef VPX_NO_GLOBALS
-static int init_ct = 0;
-#else
-# include "vpx_global_handling.h"
-# define init_ct ((int)vpxglobalm(onyxd,init_ct))
-#endif
-
-extern void vp8_init_loop_filter(VP8_COMMON *cm);
-
-extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi);
-extern void init_detokenizer(VP8D_COMP *dx);
-
-// DEBUG code
-void vp8_recon_write_yuv_frame(unsigned char *name, YV12_BUFFER_CONFIG *s)
-{
- FILE *yuv_file = fopen((char *)name, "ab");
- unsigned char *src = s->y_buffer;
- int h = s->y_height;
-
- do
- {
- fwrite(src, s->y_width, 1, yuv_file);
- src += s->y_stride;
- }
- while (--h);
-
- src = s->u_buffer;
- h = s->uv_height;
-
- do
- {
- fwrite(src, s->uv_width, 1, yuv_file);
- src += s->uv_stride;
- }
- while (--h);
-
- src = s->v_buffer;
- h = s->uv_height;
-
- do
- {
- fwrite(src, s->uv_width, 1, yuv_file);
- src += s->uv_stride;
- }
- while (--h);
-
- fclose(yuv_file);
-}
-
-void vp8dx_initialize()
-{
- if (!init_ct++)
- {
- vp8_initialize_common();
- vp8_scale_machine_specific_config();
- }
-}
-
-void vp8dx_shutdown()
-{
- if (!--init_ct)
- {
- vp8_shutdown_common();
- }
-}
-
-
-VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
-{
- VP8D_COMP *pbi = vpx_memalign(32, sizeof(VP8D_COMP));
-
- if (!pbi)
- return NULL;
-
- vpx_memset(pbi, 0, sizeof(VP8D_COMP));
-
- vp8dx_initialize();
-
- vp8_create_common(&pbi->common);
- vp8_dmachine_specific_config(pbi);
-
- pbi->common.current_video_frame = 0;
- pbi->ready_for_new_data = 1;
-
- pbi->CPUFreq = 0; //vp8_get_processor_freq();
- pbi->max_threads = oxcf->max_threads;
- vp8_decoder_create_threads(pbi);
-
- //vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid
- // unnecessary calling of vp8cx_init_de_quantizer() for every frame.
- vp8cx_init_de_quantizer(pbi);
-
- {
- VP8_COMMON *cm = &pbi->common;
-
- vp8_init_loop_filter(cm);
- cm->last_frame_type = KEY_FRAME;
- cm->last_filter_type = cm->filter_type;
- cm->last_sharpness_level = cm->sharpness_level;
- }
-
- init_detokenizer(pbi);
-
- return (VP8D_PTR) pbi;
-}
-void vp8dx_remove_decompressor(VP8D_PTR ptr)
-{
- VP8D_COMP *pbi = (VP8D_COMP *) ptr;
-
- if (!pbi)
- return;
-
- vp8_decoder_remove_threads(pbi);
- vp8_remove_common(&pbi->common);
- vpx_free(pbi);
- vp8dx_shutdown();
-
-}
-
-void vp8dx_set_setting(VP8D_PTR comp, VP8D_SETTING oxst, int x)
-{
- VP8D_COMP *pbi = (VP8D_COMP *) comp;
-
- (void) pbi;
- (void) x;
-
- switch (oxst)
- {
- case VP8D_OK:
- break;
- }
-}
-
-int vp8dx_get_setting(VP8D_PTR comp, VP8D_SETTING oxst)
-{
- VP8D_COMP *pbi = (VP8D_COMP *) comp;
-
- (void) pbi;
-
- switch (oxst)
- {
- case VP8D_OK:
- break;
- }
-
- return -1;
-}
-
-int vp8dx_get_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
-{
- VP8D_COMP *pbi = (VP8D_COMP *) ptr;
- VP8_COMMON *cm = &pbi->common;
-
- if (ref_frame_flag == VP8_LAST_FLAG)
- vp8_yv12_copy_frame_ptr(&cm->last_frame, sd);
-
- else if (ref_frame_flag == VP8_GOLD_FLAG)
- vp8_yv12_copy_frame_ptr(&cm->golden_frame, sd);
-
- else if (ref_frame_flag == VP8_ALT_FLAG)
- vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, sd);
-
- else
- return -1;
-
- return 0;
-}
-int vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
-{
- VP8D_COMP *pbi = (VP8D_COMP *) ptr;
- VP8_COMMON *cm = &pbi->common;
-
- if (ref_frame_flag == VP8_LAST_FLAG)
- vp8_yv12_copy_frame_ptr(sd, &cm->last_frame);
-
- else if (ref_frame_flag == VP8_GOLD_FLAG)
- vp8_yv12_copy_frame_ptr(sd, &cm->golden_frame);
-
- else if (ref_frame_flag == VP8_ALT_FLAG)
- vp8_yv12_copy_frame_ptr(sd, &cm->alt_ref_frame);
-
- else
- return -1;
-
- return 0;
-}
-int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, char *source, INT64 time_stamp)
-{
- VP8D_COMP *pbi = (VP8D_COMP *) ptr;
- VP8_COMMON *cm = &pbi->common;
- int retcode = 0;
-
- struct vpx_usec_timer timer;
- (void) size;
-
-// if(pbi->ready_for_new_data == 0)
-// return -1;
-
- vpx_usec_timer_start(&timer);
-
- if (ptr == 0)
- {
- return -1;
- }
-
- //cm->current_video_frame++;
- pbi->Source = source;
-
- retcode = vp8_decode_frame(pbi);
-
- if (retcode < 0)
- return retcode;
-
- // Update the GF useage maps.
- vp8_update_gf_useage_maps(cm, &pbi->mb);
-
- if (pbi->b_multithreaded)
- vp8_stop_lfthread(pbi);
-
- if (cm->refresh_last_frame)
- {
- vp8_swap_yv12_buffer(&cm->last_frame, &cm->new_frame);
-
- cm->frame_to_show = &cm->last_frame;
- }
- else
- {
- cm->frame_to_show = &cm->new_frame;
- }
-
- if (!pbi->b_multithreaded)
- {
- struct vpx_usec_timer lpftimer;
- vpx_usec_timer_start(&lpftimer);
- // Apply the loop filter if appropriate.
-
- if (cm->filter_level > 0)
- {
- vp8_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
- cm->last_frame_type = cm->frame_type;
- cm->last_filter_type = cm->filter_type;
- cm->last_sharpness_level = cm->sharpness_level;
-
- }
-
- vpx_usec_timer_mark(&lpftimer);
- pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
- }
-
- vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
-
-#if 0
- // DEBUG code
- //vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);
- if (cm->current_video_frame <= 5)
- write_dx_frame_to_file(cm->frame_to_show, cm->current_video_frame);
-#endif
-
- // If any buffer copy / swaping is signalled it should be done here.
- if (cm->copy_buffer_to_arf)
- {
- if (cm->copy_buffer_to_arf == 1)
- {
- if (cm->refresh_last_frame)
- vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->alt_ref_frame);
- else
- vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->alt_ref_frame);
- }
- else if (cm->copy_buffer_to_arf == 2)
- vp8_yv12_copy_frame_ptr(&cm->golden_frame, &cm->alt_ref_frame);
- }
-
- if (cm->copy_buffer_to_gf)
- {
- if (cm->copy_buffer_to_gf == 1)
- {
- if (cm->refresh_last_frame)
- vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->golden_frame);
- else
- vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame);
- }
- else if (cm->copy_buffer_to_gf == 2)
- vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, &cm->golden_frame);
- }
-
- // Should the golden or alternate reference frame be refreshed?
- if (cm->refresh_golden_frame || cm->refresh_alt_ref_frame)
- {
- if (cm->refresh_golden_frame)
- vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame);
-
- if (cm->refresh_alt_ref_frame)
- vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame);
-
- //vpx_log("Decoder: recovery frame received \n");
-
- // Update data structures that monitors GF useage
- vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
- cm->gf_active_count = cm->mb_rows * cm->mb_cols;
- }
-
- vp8_clear_system_state();
-
- vpx_usec_timer_mark(&timer);
- pbi->decode_microseconds = vpx_usec_timer_elapsed(&timer);
-
- pbi->time_decoding += pbi->decode_microseconds;
-
-// vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);
-
- cm->current_video_frame++;
- pbi->ready_for_new_data = 0;
- pbi->last_time_stamp = time_stamp;
-
- {
- int i;
- INT64 earliest_time = pbi->dr[0].time_stamp;
- INT64 latest_time = pbi->dr[0].time_stamp;
- INT64 time_diff = 0;
- int bytes = 0;
-
- pbi->dr[pbi->common.current_video_frame&0xf].size = pbi->bc.pos + pbi->bc2.pos + 4;;
- pbi->dr[pbi->common.current_video_frame&0xf].time_stamp = time_stamp;
-
- for (i = 0; i < 16; i++)
- {
-
- bytes += pbi->dr[i].size;
-
- if (pbi->dr[i].time_stamp < earliest_time)
- earliest_time = pbi->dr[i].time_stamp;
-
- if (pbi->dr[i].time_stamp > latest_time)
- latest_time = pbi->dr[i].time_stamp;
- }
-
- time_diff = latest_time - earliest_time;
-
- if (time_diff > 0)
- {
- pbi->common.bitrate = 80000.00 * bytes / time_diff ;
- pbi->common.framerate = 160000000.00 / time_diff ;
- }
-
- }
- return retcode;
-}
-int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level, int noise_level, int flags)
-{
- int ret = -1;
- VP8D_COMP *pbi = (VP8D_COMP *) ptr;
-
- if (pbi->ready_for_new_data == 1)
- return ret;
-
- // ie no raw frame to show!!!
- if (pbi->common.show_frame == 0)
- return ret;
-
- pbi->ready_for_new_data = 1;
- *time_stamp = pbi->last_time_stamp;
- *time_end_stamp = 0;
-
- sd->clrtype = pbi->common.clr_type;
- ret = vp8_post_proc_frame(&pbi->common, sd, deblock_level, noise_level, flags);
- vp8_clear_system_state();
- return ret;
-}
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index fa4fa48e4..7593edf27 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -47,21 +48,20 @@ typedef struct
typedef struct
{
- int *scan;
- UINT8 *ptr_onyxblock2context_leftabove;
- vp8_tree_index *vp8_coef_tree_ptr; //onyx_coef_tree_ptr; ???
- TOKENEXTRABITS *teb_base_ptr;
+ int const *scan;
+ UINT8 const *ptr_block2leftabove;
+ vp8_tree_index const *vp8_coef_tree_ptr;
+ TOKENEXTRABITS const *teb_base_ptr;
unsigned char *norm_ptr;
-// UINT16 *ptr_onyx_coef_bands_x;
- UINT8 *ptr_onyx_coef_bands_x;
+ UINT8 *ptr_coef_bands_x;
- ENTROPY_CONTEXT **A;
- ENTROPY_CONTEXT(*L)[4];
+ ENTROPY_CONTEXT_PLANES *A;
+ ENTROPY_CONTEXT_PLANES *L;
INT16 *qcoeff_start_ptr;
BOOL_DECODER *current_bc;
- UINT8 *coef_probs[4];
+ vp8_prob const *coef_probs[4];
UINT8 eob[25];
@@ -88,28 +88,33 @@ typedef struct VP8Decompressor
unsigned int time_loop_filtering;
volatile int b_multithreaded_rd;
- volatile int b_multithreaded_lf;
int max_threads;
- int last_mb_row_decoded;
int current_mb_col_main;
int decoding_thread_count;
int allocated_decoding_thread_count;
- // variable for threading
- DECLARE_ALIGNED(16, MACROBLOCKD, lpfmb);
+ /* variable for threading */
#if CONFIG_MULTITHREAD
- pthread_t h_thread_lpf; // thread for postprocessing
- sem_t h_event_lpf; // Event for post_proc completed
- sem_t h_event_start_lpf;
-#endif
+ int mt_baseline_filter_level[MAX_MB_SEGMENTS];
+ int sync_range;
+ int *mt_current_mb_col; /* Each row remembers its already decoded column. */
+
+ unsigned char **mt_yabove_row; /* mb_rows x width */
+ unsigned char **mt_uabove_row;
+ unsigned char **mt_vabove_row;
+ unsigned char **mt_yleft_col; /* mb_rows x 16 */
+ unsigned char **mt_uleft_col; /* mb_rows x 8 */
+ unsigned char **mt_vleft_col; /* mb_rows x 8 */
+
MB_ROW_DEC *mb_row_di;
- DECODETHREAD_DATA *de_thread_data;
-#if CONFIG_MULTITHREAD
+ DECODETHREAD_DATA *de_thread_data;
+
pthread_t *h_decoding_thread;
- sem_t *h_event_mbrdecoding;
- sem_t h_event_main;
- // end of threading data
+ sem_t *h_event_start_decoding;
+ sem_t h_event_end_decoding;
+ /* end of threading data */
#endif
+
vp8_reader *mbc;
INT64 last_time_stamp;
int ready_for_new_data;
@@ -123,6 +128,12 @@ typedef struct VP8Decompressor
struct vp8_dboolhuff_rtcd_vtable dboolhuff;
#endif
+
+ vp8_prob prob_intra;
+ vp8_prob prob_last;
+ vp8_prob prob_gf;
+ vp8_prob prob_skip_false;
+
} VP8D_COMP;
int vp8_decode_frame(VP8D_COMP *cpi);
diff --git a/vp8/decoder/reconintra_mt.c b/vp8/decoder/reconintra_mt.c
new file mode 100644
index 000000000..ad4324b27
--- /dev/null
+++ b/vp8/decoder/reconintra_mt.c
@@ -0,0 +1,982 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "recon.h"
+#include "reconintra.h"
+#include "vpx_mem/vpx_mem.h"
+#include "onyxd_int.h"
+
+/* For skip_recon_mb(), add vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) and
+ * vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x).
+ */
+
+void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
+{
+#if CONFIG_MULTITHREAD
+ unsigned char *yabove_row; /* = x->dst.y_buffer - x->dst.y_stride; */
+ unsigned char *yleft_col;
+ unsigned char yleft_buf[16];
+ unsigned char ytop_left; /* = yabove_row[-1]; */
+ unsigned char *ypred_ptr = x->predictor;
+ int r, c, i;
+
+ if (pbi->common.filter_level)
+ {
+ yabove_row = pbi->mt_yabove_row[mb_row] + mb_col*16 +32;
+ yleft_col = pbi->mt_yleft_col[mb_row];
+ } else
+ {
+ yabove_row = x->dst.y_buffer - x->dst.y_stride;
+
+ for (i = 0; i < 16; i++)
+ yleft_buf[i] = x->dst.y_buffer [i* x->dst.y_stride -1];
+ yleft_col = yleft_buf;
+ }
+
+ ytop_left = yabove_row[-1];
+
+ /* for Y */
+ switch (x->mode_info_context->mbmi.mode)
+ {
+ case DC_PRED:
+ {
+ int expected_dc;
+ int i;
+ int shift;
+ int average = 0;
+
+
+ if (x->up_available || x->left_available)
+ {
+ if (x->up_available)
+ {
+ for (i = 0; i < 16; i++)
+ {
+ average += yabove_row[i];
+ }
+ }
+
+ if (x->left_available)
+ {
+
+ for (i = 0; i < 16; i++)
+ {
+ average += yleft_col[i];
+ }
+
+ }
+
+
+
+ shift = 3 + x->up_available + x->left_available;
+ expected_dc = (average + (1 << (shift - 1))) >> shift;
+ }
+ else
+ {
+ expected_dc = 128;
+ }
+
+ vpx_memset(ypred_ptr, expected_dc, 256);
+ }
+ break;
+ case V_PRED:
+ {
+
+ for (r = 0; r < 16; r++)
+ {
+
+ ((int *)ypred_ptr)[0] = ((int *)yabove_row)[0];
+ ((int *)ypred_ptr)[1] = ((int *)yabove_row)[1];
+ ((int *)ypred_ptr)[2] = ((int *)yabove_row)[2];
+ ((int *)ypred_ptr)[3] = ((int *)yabove_row)[3];
+ ypred_ptr += 16;
+ }
+ }
+ break;
+ case H_PRED:
+ {
+
+ for (r = 0; r < 16; r++)
+ {
+
+ vpx_memset(ypred_ptr, yleft_col[r], 16);
+ ypred_ptr += 16;
+ }
+
+ }
+ break;
+ case TM_PRED:
+ {
+
+ for (r = 0; r < 16; r++)
+ {
+ for (c = 0; c < 16; c++)
+ {
+ int pred = yleft_col[r] + yabove_row[ c] - ytop_left;
+
+ if (pred < 0)
+ pred = 0;
+
+ if (pred > 255)
+ pred = 255;
+
+ ypred_ptr[c] = pred;
+ }
+
+ ypred_ptr += 16;
+ }
+
+ }
+ break;
+ case B_PRED:
+ case NEARESTMV:
+ case NEARMV:
+ case ZEROMV:
+ case NEWMV:
+ case SPLITMV:
+ case MB_MODE_COUNT:
+ break;
+ }
+#else
+ (void) pbi;
+ (void) x;
+ (void) mb_row;
+ (void) mb_col;
+#endif
+}
+
+void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
+{
+#if CONFIG_MULTITHREAD
+ unsigned char *yabove_row; /* = x->dst.y_buffer - x->dst.y_stride; */
+ unsigned char *yleft_col;
+ unsigned char yleft_buf[16];
+ unsigned char ytop_left; /* = yabove_row[-1]; */
+ unsigned char *ypred_ptr = x->predictor;
+ int r, c, i;
+
+ int y_stride = x->dst.y_stride;
+ ypred_ptr = x->dst.y_buffer; /*x->predictor;*/
+
+ if (pbi->common.filter_level)
+ {
+ yabove_row = pbi->mt_yabove_row[mb_row] + mb_col*16 +32;
+ yleft_col = pbi->mt_yleft_col[mb_row];
+ } else
+ {
+ yabove_row = x->dst.y_buffer - x->dst.y_stride;
+
+ for (i = 0; i < 16; i++)
+ yleft_buf[i] = x->dst.y_buffer [i* x->dst.y_stride -1];
+ yleft_col = yleft_buf;
+ }
+
+ ytop_left = yabove_row[-1];
+
+ /* for Y */
+ switch (x->mode_info_context->mbmi.mode)
+ {
+ case DC_PRED:
+ {
+ int expected_dc;
+ int i;
+ int shift;
+ int average = 0;
+
+
+ if (x->up_available || x->left_available)
+ {
+ if (x->up_available)
+ {
+ for (i = 0; i < 16; i++)
+ {
+ average += yabove_row[i];
+ }
+ }
+
+ if (x->left_available)
+ {
+
+ for (i = 0; i < 16; i++)
+ {
+ average += yleft_col[i];
+ }
+
+ }
+
+
+
+ shift = 3 + x->up_available + x->left_available;
+ expected_dc = (average + (1 << (shift - 1))) >> shift;
+ }
+ else
+ {
+ expected_dc = 128;
+ }
+
+ /*vpx_memset(ypred_ptr, expected_dc, 256);*/
+ for (r = 0; r < 16; r++)
+ {
+ vpx_memset(ypred_ptr, expected_dc, 16);
+ ypred_ptr += y_stride; /*16;*/
+ }
+ }
+ break;
+ case V_PRED:
+ {
+
+ for (r = 0; r < 16; r++)
+ {
+
+ ((int *)ypred_ptr)[0] = ((int *)yabove_row)[0];
+ ((int *)ypred_ptr)[1] = ((int *)yabove_row)[1];
+ ((int *)ypred_ptr)[2] = ((int *)yabove_row)[2];
+ ((int *)ypred_ptr)[3] = ((int *)yabove_row)[3];
+ ypred_ptr += y_stride; /*16;*/
+ }
+ }
+ break;
+ case H_PRED:
+ {
+
+ for (r = 0; r < 16; r++)
+ {
+
+ vpx_memset(ypred_ptr, yleft_col[r], 16);
+ ypred_ptr += y_stride; /*16;*/
+ }
+
+ }
+ break;
+ case TM_PRED:
+ {
+
+ for (r = 0; r < 16; r++)
+ {
+ for (c = 0; c < 16; c++)
+ {
+ int pred = yleft_col[r] + yabove_row[ c] - ytop_left;
+
+ if (pred < 0)
+ pred = 0;
+
+ if (pred > 255)
+ pred = 255;
+
+ ypred_ptr[c] = pred;
+ }
+
+ ypred_ptr += y_stride; /*16;*/
+ }
+
+ }
+ break;
+ case B_PRED:
+ case NEARESTMV:
+ case NEARMV:
+ case ZEROMV:
+ case NEWMV:
+ case SPLITMV:
+ case MB_MODE_COUNT:
+ break;
+ }
+#else
+ (void) pbi;
+ (void) x;
+ (void) mb_row;
+ (void) mb_col;
+#endif
+}
+
+void vp8mt_build_intra_predictors_mbuv(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
+{
+#if CONFIG_MULTITHREAD
+ unsigned char *uabove_row; /* = x->dst.u_buffer - x->dst.uv_stride; */
+ unsigned char *uleft_col; /*[16];*/
+ unsigned char uleft_buf[8];
+ unsigned char utop_left; /* = uabove_row[-1]; */
+ unsigned char *vabove_row; /* = x->dst.v_buffer - x->dst.uv_stride; */
+ unsigned char *vleft_col; /*[20];*/
+ unsigned char vleft_buf[8];
+ unsigned char vtop_left; /* = vabove_row[-1]; */
+ unsigned char *upred_ptr = &x->predictor[256];
+ unsigned char *vpred_ptr = &x->predictor[320];
+ int i, j;
+
+ if (pbi->common.filter_level)
+ {
+ uabove_row = pbi->mt_uabove_row[mb_row] + mb_col*8 +16;
+ vabove_row = pbi->mt_vabove_row[mb_row] + mb_col*8 +16;
+ uleft_col = pbi->mt_uleft_col[mb_row];
+ vleft_col = pbi->mt_vleft_col[mb_row];
+ } else
+ {
+ uabove_row = x->dst.u_buffer - x->dst.uv_stride;
+ vabove_row = x->dst.v_buffer - x->dst.uv_stride;
+
+ for (i = 0; i < 8; i++)
+ {
+ uleft_buf[i] = x->dst.u_buffer [i* x->dst.uv_stride -1];
+ vleft_buf[i] = x->dst.v_buffer [i* x->dst.uv_stride -1];
+ }
+ uleft_col = uleft_buf;
+ vleft_col = vleft_buf;
+ }
+ utop_left = uabove_row[-1];
+ vtop_left = vabove_row[-1];
+
+ switch (x->mode_info_context->mbmi.uv_mode)
+ {
+ case DC_PRED:
+ {
+ int expected_udc;
+ int expected_vdc;
+ int i;
+ int shift;
+ int Uaverage = 0;
+ int Vaverage = 0;
+
+ if (x->up_available)
+ {
+ for (i = 0; i < 8; i++)
+ {
+ Uaverage += uabove_row[i];
+ Vaverage += vabove_row[i];
+ }
+ }
+
+ if (x->left_available)
+ {
+ for (i = 0; i < 8; i++)
+ {
+ Uaverage += uleft_col[i];
+ Vaverage += vleft_col[i];
+ }
+ }
+
+ if (!x->up_available && !x->left_available)
+ {
+ expected_udc = 128;
+ expected_vdc = 128;
+ }
+ else
+ {
+ shift = 2 + x->up_available + x->left_available;
+ expected_udc = (Uaverage + (1 << (shift - 1))) >> shift;
+ expected_vdc = (Vaverage + (1 << (shift - 1))) >> shift;
+ }
+
+
+ vpx_memset(upred_ptr, expected_udc, 64);
+ vpx_memset(vpred_ptr, expected_vdc, 64);
+
+
+ }
+ break;
+ case V_PRED:
+ {
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ vpx_memcpy(upred_ptr, uabove_row, 8);
+ vpx_memcpy(vpred_ptr, vabove_row, 8);
+ upred_ptr += 8;
+ vpred_ptr += 8;
+ }
+
+ }
+ break;
+ case H_PRED:
+ {
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ vpx_memset(upred_ptr, uleft_col[i], 8);
+ vpx_memset(vpred_ptr, vleft_col[i], 8);
+ upred_ptr += 8;
+ vpred_ptr += 8;
+ }
+ }
+
+ break;
+ case TM_PRED:
+ {
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ int predu = uleft_col[i] + uabove_row[j] - utop_left;
+ int predv = vleft_col[i] + vabove_row[j] - vtop_left;
+
+ if (predu < 0)
+ predu = 0;
+
+ if (predu > 255)
+ predu = 255;
+
+ if (predv < 0)
+ predv = 0;
+
+ if (predv > 255)
+ predv = 255;
+
+ upred_ptr[j] = predu;
+ vpred_ptr[j] = predv;
+ }
+
+ upred_ptr += 8;
+ vpred_ptr += 8;
+ }
+
+ }
+ break;
+ case B_PRED:
+ case NEARESTMV:
+ case NEARMV:
+ case ZEROMV:
+ case NEWMV:
+ case SPLITMV:
+ case MB_MODE_COUNT:
+ break;
+ }
+#else
+ (void) pbi;
+ (void) x;
+ (void) mb_row;
+ (void) mb_col;
+#endif
+}
+
+void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
+{
+#if CONFIG_MULTITHREAD
+ unsigned char *uabove_row; /* = x->dst.u_buffer - x->dst.uv_stride; */
+ unsigned char *uleft_col; /*[16];*/
+ unsigned char uleft_buf[8];
+ unsigned char utop_left; /* = uabove_row[-1]; */
+ unsigned char *vabove_row; /* = x->dst.v_buffer - x->dst.uv_stride; */
+ unsigned char *vleft_col; /*[20];*/
+ unsigned char vleft_buf[8];
+ unsigned char vtop_left; /* = vabove_row[-1]; */
+ unsigned char *upred_ptr = x->dst.u_buffer; /*&x->predictor[256];*/
+ unsigned char *vpred_ptr = x->dst.v_buffer; /*&x->predictor[320];*/
+ int uv_stride = x->dst.uv_stride;
+ int i, j;
+
+ if (pbi->common.filter_level)
+ {
+ uabove_row = pbi->mt_uabove_row[mb_row] + mb_col*8 +16;
+ vabove_row = pbi->mt_vabove_row[mb_row] + mb_col*8 +16;
+ uleft_col = pbi->mt_uleft_col[mb_row];
+ vleft_col = pbi->mt_vleft_col[mb_row];
+ } else
+ {
+ uabove_row = x->dst.u_buffer - x->dst.uv_stride;
+ vabove_row = x->dst.v_buffer - x->dst.uv_stride;
+
+ for (i = 0; i < 8; i++)
+ {
+ uleft_buf[i] = x->dst.u_buffer [i* x->dst.uv_stride -1];
+ vleft_buf[i] = x->dst.v_buffer [i* x->dst.uv_stride -1];
+ }
+ uleft_col = uleft_buf;
+ vleft_col = vleft_buf;
+ }
+ utop_left = uabove_row[-1];
+ vtop_left = vabove_row[-1];
+
+ switch (x->mode_info_context->mbmi.uv_mode)
+ {
+ case DC_PRED:
+ {
+ int expected_udc;
+ int expected_vdc;
+ int i;
+ int shift;
+ int Uaverage = 0;
+ int Vaverage = 0;
+
+ if (x->up_available)
+ {
+ for (i = 0; i < 8; i++)
+ {
+ Uaverage += uabove_row[i];
+ Vaverage += vabove_row[i];
+ }
+ }
+
+ if (x->left_available)
+ {
+ for (i = 0; i < 8; i++)
+ {
+ Uaverage += uleft_col[i];
+ Vaverage += vleft_col[i];
+ }
+ }
+
+ if (!x->up_available && !x->left_available)
+ {
+ expected_udc = 128;
+ expected_vdc = 128;
+ }
+ else
+ {
+ shift = 2 + x->up_available + x->left_available;
+ expected_udc = (Uaverage + (1 << (shift - 1))) >> shift;
+ expected_vdc = (Vaverage + (1 << (shift - 1))) >> shift;
+ }
+
+
+ /*vpx_memset(upred_ptr,expected_udc,64);
+ vpx_memset(vpred_ptr,expected_vdc,64);*/
+ for (i = 0; i < 8; i++)
+ {
+ vpx_memset(upred_ptr, expected_udc, 8);
+ vpx_memset(vpred_ptr, expected_vdc, 8);
+ upred_ptr += uv_stride; /*8;*/
+ vpred_ptr += uv_stride; /*8;*/
+ }
+ }
+ break;
+ case V_PRED:
+ {
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ vpx_memcpy(upred_ptr, uabove_row, 8);
+ vpx_memcpy(vpred_ptr, vabove_row, 8);
+ upred_ptr += uv_stride; /*8;*/
+ vpred_ptr += uv_stride; /*8;*/
+ }
+
+ }
+ break;
+ case H_PRED:
+ {
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ vpx_memset(upred_ptr, uleft_col[i], 8);
+ vpx_memset(vpred_ptr, vleft_col[i], 8);
+ upred_ptr += uv_stride; /*8;*/
+ vpred_ptr += uv_stride; /*8;*/
+ }
+ }
+
+ break;
+ case TM_PRED:
+ {
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ int predu = uleft_col[i] + uabove_row[j] - utop_left;
+ int predv = vleft_col[i] + vabove_row[j] - vtop_left;
+
+ if (predu < 0)
+ predu = 0;
+
+ if (predu > 255)
+ predu = 255;
+
+ if (predv < 0)
+ predv = 0;
+
+ if (predv > 255)
+ predv = 255;
+
+ upred_ptr[j] = predu;
+ vpred_ptr[j] = predv;
+ }
+
+ upred_ptr += uv_stride; /*8;*/
+ vpred_ptr += uv_stride; /*8;*/
+ }
+
+ }
+ break;
+ case B_PRED:
+ case NEARESTMV:
+ case NEARMV:
+ case ZEROMV:
+ case NEWMV:
+ case SPLITMV:
+ case MB_MODE_COUNT:
+ break;
+ }
+#else
+ (void) pbi;
+ (void) x;
+ (void) mb_row;
+ (void) mb_col;
+#endif
+}
+
+
+void vp8mt_predict_intra4x4(VP8D_COMP *pbi,
+ MACROBLOCKD *xd,
+ int b_mode,
+ unsigned char *predictor,
+ int mb_row,
+ int mb_col,
+ int num)
+{
+#if CONFIG_MULTITHREAD
+ int i, r, c;
+
+ unsigned char *Above; /* = *(x->base_dst) + x->dst - x->dst_stride; */
+ unsigned char Left[4];
+ unsigned char top_left; /* = Above[-1]; */
+
+ BLOCKD *x = &xd->block[num];
+
+ /*Caution: For some b_mode, it needs 8 pixels (4 above + 4 above-right).*/
+ if (num < 4 && pbi->common.filter_level)
+ Above = pbi->mt_yabove_row[mb_row] + mb_col*16 + num*4 + 32;
+ else
+ Above = *(x->base_dst) + x->dst - x->dst_stride;
+
+ if (num%4==0 && pbi->common.filter_level)
+ {
+ for (i=0; i<4; i++)
+ Left[i] = pbi->mt_yleft_col[mb_row][num + i];
+ }else
+ {
+ Left[0] = (*(x->base_dst))[x->dst - 1];
+ Left[1] = (*(x->base_dst))[x->dst - 1 + x->dst_stride];
+ Left[2] = (*(x->base_dst))[x->dst - 1 + 2 * x->dst_stride];
+ Left[3] = (*(x->base_dst))[x->dst - 1 + 3 * x->dst_stride];
+ }
+
+ if ((num==4 || num==8 || num==12) && pbi->common.filter_level)
+ top_left = pbi->mt_yleft_col[mb_row][num-1];
+ else
+ top_left = Above[-1];
+
+ switch (b_mode)
+ {
+ case B_DC_PRED:
+ {
+ int expected_dc = 0;
+
+ for (i = 0; i < 4; i++)
+ {
+ expected_dc += Above[i];
+ expected_dc += Left[i];
+ }
+
+ expected_dc = (expected_dc + 4) >> 3;
+
+ for (r = 0; r < 4; r++)
+ {
+ for (c = 0; c < 4; c++)
+ {
+ predictor[c] = expected_dc;
+ }
+
+ predictor += 16;
+ }
+ }
+ break;
+ case B_TM_PRED:
+ {
+ /* prediction similar to true_motion prediction */
+ for (r = 0; r < 4; r++)
+ {
+ for (c = 0; c < 4; c++)
+ {
+ int pred = Above[c] - top_left + Left[r];
+
+ if (pred < 0)
+ pred = 0;
+
+ if (pred > 255)
+ pred = 255;
+
+ predictor[c] = pred;
+ }
+
+ predictor += 16;
+ }
+ }
+ break;
+
+ case B_VE_PRED:
+ {
+
+ unsigned int ap[4];
+ ap[0] = (top_left + 2 * Above[0] + Above[1] + 2) >> 2;
+ ap[1] = (Above[0] + 2 * Above[1] + Above[2] + 2) >> 2;
+ ap[2] = (Above[1] + 2 * Above[2] + Above[3] + 2) >> 2;
+ ap[3] = (Above[2] + 2 * Above[3] + Above[4] + 2) >> 2;
+
+ for (r = 0; r < 4; r++)
+ {
+ for (c = 0; c < 4; c++)
+ {
+
+ predictor[c] = ap[c];
+ }
+
+ predictor += 16;
+ }
+
+ }
+ break;
+
+
+ case B_HE_PRED:
+ {
+
+ unsigned int lp[4];
+ lp[0] = (top_left + 2 * Left[0] + Left[1] + 2) >> 2;
+ lp[1] = (Left[0] + 2 * Left[1] + Left[2] + 2) >> 2;
+ lp[2] = (Left[1] + 2 * Left[2] + Left[3] + 2) >> 2;
+ lp[3] = (Left[2] + 2 * Left[3] + Left[3] + 2) >> 2;
+
+ for (r = 0; r < 4; r++)
+ {
+ for (c = 0; c < 4; c++)
+ {
+ predictor[c] = lp[r];
+ }
+
+ predictor += 16;
+ }
+ }
+ break;
+ case B_LD_PRED:
+ {
+ unsigned char *ptr = Above;
+ predictor[0 * 16 + 0] = (ptr[0] + ptr[1] * 2 + ptr[2] + 2) >> 2;
+ predictor[0 * 16 + 1] =
+ predictor[1 * 16 + 0] = (ptr[1] + ptr[2] * 2 + ptr[3] + 2) >> 2;
+ predictor[0 * 16 + 2] =
+ predictor[1 * 16 + 1] =
+ predictor[2 * 16 + 0] = (ptr[2] + ptr[3] * 2 + ptr[4] + 2) >> 2;
+ predictor[0 * 16 + 3] =
+ predictor[1 * 16 + 2] =
+ predictor[2 * 16 + 1] =
+ predictor[3 * 16 + 0] = (ptr[3] + ptr[4] * 2 + ptr[5] + 2) >> 2;
+ predictor[1 * 16 + 3] =
+ predictor[2 * 16 + 2] =
+ predictor[3 * 16 + 1] = (ptr[4] + ptr[5] * 2 + ptr[6] + 2) >> 2;
+ predictor[2 * 16 + 3] =
+ predictor[3 * 16 + 2] = (ptr[5] + ptr[6] * 2 + ptr[7] + 2) >> 2;
+ predictor[3 * 16 + 3] = (ptr[6] + ptr[7] * 2 + ptr[7] + 2) >> 2;
+
+ }
+ break;
+ case B_RD_PRED:
+ {
+
+ unsigned char pp[9];
+
+ pp[0] = Left[3];
+ pp[1] = Left[2];
+ pp[2] = Left[1];
+ pp[3] = Left[0];
+ pp[4] = top_left;
+ pp[5] = Above[0];
+ pp[6] = Above[1];
+ pp[7] = Above[2];
+ pp[8] = Above[3];
+
+ predictor[3 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
+ predictor[3 * 16 + 1] =
+ predictor[2 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+ predictor[3 * 16 + 2] =
+ predictor[2 * 16 + 1] =
+ predictor[1 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
+ predictor[3 * 16 + 3] =
+ predictor[2 * 16 + 2] =
+ predictor[1 * 16 + 1] =
+ predictor[0 * 16 + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
+ predictor[2 * 16 + 3] =
+ predictor[1 * 16 + 2] =
+ predictor[0 * 16 + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
+ predictor[1 * 16 + 3] =
+ predictor[0 * 16 + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
+ predictor[0 * 16 + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2;
+
+ }
+ break;
+ case B_VR_PRED:
+ {
+
+ unsigned char pp[9];
+
+ pp[0] = Left[3];
+ pp[1] = Left[2];
+ pp[2] = Left[1];
+ pp[3] = Left[0];
+ pp[4] = top_left;
+ pp[5] = Above[0];
+ pp[6] = Above[1];
+ pp[7] = Above[2];
+ pp[8] = Above[3];
+
+
+ predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+ predictor[2 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
+ predictor[3 * 16 + 1] =
+ predictor[1 * 16 + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
+ predictor[2 * 16 + 1] =
+ predictor[0 * 16 + 0] = (pp[4] + pp[5] + 1) >> 1;
+ predictor[3 * 16 + 2] =
+ predictor[1 * 16 + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
+ predictor[2 * 16 + 2] =
+ predictor[0 * 16 + 1] = (pp[5] + pp[6] + 1) >> 1;
+ predictor[3 * 16 + 3] =
+ predictor[1 * 16 + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
+ predictor[2 * 16 + 3] =
+ predictor[0 * 16 + 2] = (pp[6] + pp[7] + 1) >> 1;
+ predictor[1 * 16 + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2;
+ predictor[0 * 16 + 3] = (pp[7] + pp[8] + 1) >> 1;
+
+ }
+ break;
+ case B_VL_PRED:
+ {
+
+ unsigned char *pp = Above;
+
+ predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1;
+ predictor[1 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
+ predictor[2 * 16 + 0] =
+ predictor[0 * 16 + 1] = (pp[1] + pp[2] + 1) >> 1;
+ predictor[1 * 16 + 1] =
+ predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+ predictor[2 * 16 + 1] =
+ predictor[0 * 16 + 2] = (pp[2] + pp[3] + 1) >> 1;
+ predictor[3 * 16 + 1] =
+ predictor[1 * 16 + 2] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
+ predictor[0 * 16 + 3] =
+ predictor[2 * 16 + 2] = (pp[3] + pp[4] + 1) >> 1;
+ predictor[1 * 16 + 3] =
+ predictor[3 * 16 + 2] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
+ predictor[2 * 16 + 3] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
+ predictor[3 * 16 + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
+ }
+ break;
+
+ case B_HD_PRED:
+ {
+ unsigned char pp[9];
+ pp[0] = Left[3];
+ pp[1] = Left[2];
+ pp[2] = Left[1];
+ pp[3] = Left[0];
+ pp[4] = top_left;
+ pp[5] = Above[0];
+ pp[6] = Above[1];
+ pp[7] = Above[2];
+ pp[8] = Above[3];
+
+
+ predictor[3 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1;
+ predictor[3 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
+ predictor[2 * 16 + 0] =
+ predictor[3 * 16 + 2] = (pp[1] + pp[2] + 1) >> 1;
+ predictor[2 * 16 + 1] =
+ predictor[3 * 16 + 3] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+ predictor[2 * 16 + 2] =
+ predictor[1 * 16 + 0] = (pp[2] + pp[3] + 1) >> 1;
+ predictor[2 * 16 + 3] =
+ predictor[1 * 16 + 1] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
+ predictor[1 * 16 + 2] =
+ predictor[0 * 16 + 0] = (pp[3] + pp[4] + 1) >> 1;
+ predictor[1 * 16 + 3] =
+ predictor[0 * 16 + 1] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
+ predictor[0 * 16 + 2] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
+ predictor[0 * 16 + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
+ }
+ break;
+
+
+ case B_HU_PRED:
+ {
+ unsigned char *pp = Left;
+ predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1;
+ predictor[0 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
+ predictor[0 * 16 + 2] =
+ predictor[1 * 16 + 0] = (pp[1] + pp[2] + 1) >> 1;
+ predictor[0 * 16 + 3] =
+ predictor[1 * 16 + 1] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+ predictor[1 * 16 + 2] =
+ predictor[2 * 16 + 0] = (pp[2] + pp[3] + 1) >> 1;
+ predictor[1 * 16 + 3] =
+ predictor[2 * 16 + 1] = (pp[2] + pp[3] * 2 + pp[3] + 2) >> 2;
+ predictor[2 * 16 + 2] =
+ predictor[2 * 16 + 3] =
+ predictor[3 * 16 + 0] =
+ predictor[3 * 16 + 1] =
+ predictor[3 * 16 + 2] =
+ predictor[3 * 16 + 3] = pp[3];
+ }
+ break;
+
+
+ }
+#else
+ (void) pbi;
+ (void) xd;
+ (void) b_mode;
+ (void) predictor;
+ (void) mb_row;
+ (void) mb_col;
+ (void) num;
+#endif
+}
+
+/* copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and
+ * to the right prediction have filled in pixels to use.
+ */
+void vp8mt_intra_prediction_down_copy(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
+{
+#if CONFIG_MULTITHREAD
+ unsigned char *above_right; /* = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16; */
+ unsigned int *src_ptr;
+ unsigned int *dst_ptr0;
+ unsigned int *dst_ptr1;
+ unsigned int *dst_ptr2;
+
+ if (pbi->common.filter_level)
+ above_right = pbi->mt_yabove_row[mb_row] + mb_col*16 + 32 +16;
+ else
+ above_right = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16;
+
+ src_ptr = (unsigned int *)above_right;
+ /*dst_ptr0 = (unsigned int *)(above_right + 4 * x->block[0].dst_stride);
+ dst_ptr1 = (unsigned int *)(above_right + 8 * x->block[0].dst_stride);
+ dst_ptr2 = (unsigned int *)(above_right + 12 * x->block[0].dst_stride);*/
+ dst_ptr0 = (unsigned int *)(*(x->block[0].base_dst) + x->block[0].dst + 16 + 3 * x->block[0].dst_stride);
+ dst_ptr1 = (unsigned int *)(*(x->block[0].base_dst) + x->block[0].dst + 16 + 7 * x->block[0].dst_stride);
+ dst_ptr2 = (unsigned int *)(*(x->block[0].base_dst) + x->block[0].dst + 16 + 11 * x->block[0].dst_stride);
+ *dst_ptr0 = *src_ptr;
+ *dst_ptr1 = *src_ptr;
+ *dst_ptr2 = *src_ptr;
+#else
+ (void) pbi;
+ (void) x;
+ (void) mb_row;
+ (void) mb_col;
+#endif
+}
diff --git a/vp8/decoder/reconintra_mt.h b/vp8/decoder/reconintra_mt.h
new file mode 100644
index 000000000..d401295b2
--- /dev/null
+++ b/vp8/decoder/reconintra_mt.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef __INC_RECONINTRA_MT_H
+#define __INC_RECONINTRA_MT_H
+
+/* reconintra functions used in multi-threaded decoder */
+#if CONFIG_MULTITHREAD
+extern void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
+extern void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
+extern void vp8mt_build_intra_predictors_mbuv(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
+extern void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
+
+extern void vp8mt_predict_intra4x4(VP8D_COMP *pbi, MACROBLOCKD *x, int b_mode, unsigned char *predictor, int mb_row, int mb_col, int num);
+extern void vp8mt_intra_prediction_down_copy(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
+#endif
+
+#endif
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index e35d1757f..fc2fad516 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -1,16 +1,20 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WIN32
# include <unistd.h>
#endif
+#ifdef __APPLE__
+#include <mach/mach_init.h>
+#endif
#include "onyxd_int.h"
#include "vpx_mem/vpx_mem.h"
#include "threading.h"
@@ -18,20 +22,22 @@
#include "loopfilter.h"
#include "extend.h"
#include "vpx_ports/vpx_timer.h"
+#include "detokenize.h"
+#include "reconinter.h"
+#include "reconintra_mt.h"
-extern void vp8_decode_mb_row(VP8D_COMP *pbi,
- VP8_COMMON *pc,
- int mb_row,
- MACROBLOCKD *xd);
-
+extern void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
+extern void clamp_mvs(MACROBLOCKD *xd);
extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
-extern void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd);
+
+#if CONFIG_RUNTIME_CPU_DETECT
+#define RTCD_VTABLE(x) (&(pbi)->common.rtcd.x)
+#else
+#define RTCD_VTABLE(x) NULL
+#endif
void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
{
-
-
-
#if CONFIG_MULTITHREAD
VP8_COMMON *const pc = & pbi->common;
int i, j;
@@ -42,15 +48,11 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
#if CONFIG_RUNTIME_CPU_DETECT
mbd->rtcd = xd->rtcd;
#endif
-
-
mbd->subpixel_predict = xd->subpixel_predict;
mbd->subpixel_predict8x4 = xd->subpixel_predict8x4;
mbd->subpixel_predict8x8 = xd->subpixel_predict8x8;
mbd->subpixel_predict16x16 = xd->subpixel_predict16x16;
- mbd->gf_active_ptr = xd->gf_active_ptr;
- mbd->mode_info = pc->mi - 1;
mbd->mode_info_context = pc->mi + pc->mode_info_stride * (i + 1);
mbd->mode_info_stride = pc->mode_info_stride;
@@ -58,11 +60,8 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
mbd->frames_since_golden = pc->frames_since_golden;
mbd->frames_till_alt_ref_frame = pc->frames_till_alt_ref_frame;
- mbd->pre = pc->last_frame;
- mbd->dst = pc->new_frame;
-
-
-
+ mbd->pre = pc->yv12_fb[pc->lst_fb_idx];
+ mbd->dst = pc->yv12_fb[pc->new_fb_idx];
vp8_setup_block_dptrs(mbd);
vp8_build_block_doffsets(mbd);
@@ -70,8 +69,14 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
- mbd->mbmi.mode = DC_PRED;
- mbd->mbmi.uv_mode = DC_PRED;
+ /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/
+ vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
+ /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/
+ vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
+ /*unsigned char mode_ref_lf_delta_enabled;
+ unsigned char mode_ref_lf_delta_update;*/
+ mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled;
+ mbd->mode_ref_lf_delta_update = xd->mode_ref_lf_delta_update;
mbd->current_bc = &pbi->bc2;
@@ -81,6 +86,8 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
}
}
+ for (i=0; i< pc->mb_rows; i++)
+ pbi->mt_current_mb_col[i]=-1;
#else
(void) pbi;
(void) xd;
@@ -90,348 +97,390 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
}
-THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
+void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col)
{
#if CONFIG_MULTITHREAD
- int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
- VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
- MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
- ENTROPY_CONTEXT mb_row_left_context[4][4];
+ int eobtotal = 0;
+ int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs;
+ VP8_COMMON *pc = &pbi->common;
- while (1)
+ if (xd->mode_info_context->mbmi.mb_skip_coeff)
{
- if (pbi->b_multithreaded_rd == 0)
- break;
-
- //if(WaitForSingleObject(pbi->h_event_mbrdecoding[ithread], INFINITE) == WAIT_OBJECT_0)
- if (sem_wait(&pbi->h_event_mbrdecoding[ithread]) == 0)
- {
- if (pbi->b_multithreaded_rd == 0)
- break;
- else
- {
- VP8_COMMON *pc = &pbi->common;
- int mb_row = mbrd->mb_row;
- MACROBLOCKD *xd = &mbrd->mbd;
-
- //printf("ithread:%d mb_row %d\n", ithread, mb_row);
- int i;
- int recon_yoffset, recon_uvoffset;
- int mb_col;
- int recon_y_stride = pc->last_frame.y_stride;
- int recon_uv_stride = pc->last_frame.uv_stride;
-
- volatile int *last_row_current_mb_col;
-
- if (ithread > 0)
- last_row_current_mb_col = &pbi->mb_row_di[ithread-1].current_mb_col;
- else
- last_row_current_mb_col = &pbi->current_mb_col_main;
-
- recon_yoffset = mb_row * recon_y_stride * 16;
- recon_uvoffset = mb_row * recon_uv_stride * 8;
- // reset above block coeffs
-
- xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT];
- xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT];
- xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT];
- xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
- xd->left_context = mb_row_left_context;
- vpx_memset(mb_row_left_context, 0, sizeof(mb_row_left_context));
- xd->up_available = (mb_row != 0);
-
- xd->mb_to_top_edge = -((mb_row * 16)) << 3;
- xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
-
- for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
- {
-
- while (mb_col > (*last_row_current_mb_col - 1) && *last_row_current_mb_col != pc->mb_cols - 1)
- {
- x86_pause_hint();
- thread_sleep(0);
- }
-
- // Take a copy of the mode and Mv information for this macroblock into the xd->mbmi
- vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi, 32); //sizeof(MB_MODE_INFO) );
-
- if (xd->mbmi.mode == SPLITMV || xd->mbmi.mode == B_PRED)
- {
- for (i = 0; i < 16; i++)
- {
- BLOCKD *d = &xd->block[i];
- vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
- }
- }
-
- // Distance of Mb to the various image edges.
- // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
- xd->mb_to_left_edge = -((mb_col * 16) << 3);
- xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
-
- xd->dst.y_buffer = pc->new_frame.y_buffer + recon_yoffset;
- xd->dst.u_buffer = pc->new_frame.u_buffer + recon_uvoffset;
- xd->dst.v_buffer = pc->new_frame.v_buffer + recon_uvoffset;
-
- xd->left_available = (mb_col != 0);
-
- // Select the appropriate reference frame for this MB
- if (xd->mbmi.ref_frame == LAST_FRAME)
- {
- xd->pre.y_buffer = pc->last_frame.y_buffer + recon_yoffset;
- xd->pre.u_buffer = pc->last_frame.u_buffer + recon_uvoffset;
- xd->pre.v_buffer = pc->last_frame.v_buffer + recon_uvoffset;
- }
- else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
- {
- // Golden frame reconstruction buffer
- xd->pre.y_buffer = pc->golden_frame.y_buffer + recon_yoffset;
- xd->pre.u_buffer = pc->golden_frame.u_buffer + recon_uvoffset;
- xd->pre.v_buffer = pc->golden_frame.v_buffer + recon_uvoffset;
- }
- else
- {
- // Alternate reference frame reconstruction buffer
- xd->pre.y_buffer = pc->alt_ref_frame.y_buffer + recon_yoffset;
- xd->pre.u_buffer = pc->alt_ref_frame.u_buffer + recon_uvoffset;
- xd->pre.v_buffer = pc->alt_ref_frame.v_buffer + recon_uvoffset;
- }
-
- vp8_build_uvmvs(xd, pc->full_pixel);
-
- vp8dx_bool_decoder_fill(xd->current_bc);
- vp8_decode_macroblock(pbi, xd);
-
+ vp8_reset_mb_tokens_context(xd);
+ }
+ else
+ {
+ eobtotal = vp8_decode_mb_tokens(pbi, xd);
+ }
- recon_yoffset += 16;
- recon_uvoffset += 8;
+ /* Perform temporary clamping of the MV to be used for prediction */
+ if (do_clamp)
+ {
+ clamp_mvs(xd);
+ }
- ++xd->mode_info_context; /* next mb */
+ xd->mode_info_context->mbmi.dc_diff = 1;
- xd->gf_active_ptr++; // GF useage flag for next MB
+ if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV && eobtotal == 0)
+ {
+ xd->mode_info_context->mbmi.dc_diff = 0;
- xd->above_context[Y1CONTEXT] += 4;
- xd->above_context[UCONTEXT ] += 2;
- xd->above_context[VCONTEXT ] += 2;
- xd->above_context[Y2CONTEXT] ++;
- pbi->mb_row_di[ithread].current_mb_col = mb_col;
+ /*mt_skip_recon_mb(pbi, xd, mb_row, mb_col);*/
+ if (xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
+ {
+ vp8mt_build_intra_predictors_mbuv_s(pbi, xd, mb_row, mb_col);
+ vp8mt_build_intra_predictors_mby_s(pbi, xd, mb_row, mb_col);
+ }
+ else
+ {
+ vp8_build_inter_predictors_mb_s(xd);
+ }
+ return;
+ }
- }
+ if (xd->segmentation_enabled)
+ mb_init_dequantizer(pbi, xd);
- // adjust to the next row of mbs
- vp8_extend_mb_row(
- &pc->new_frame,
- xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
- );
+ /* do prediction */
+ if (xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
+ {
+ vp8mt_build_intra_predictors_mbuv(pbi, xd, mb_row, mb_col);
- ++xd->mode_info_context; /* skip prediction column */
+ if (xd->mode_info_context->mbmi.mode != B_PRED)
+ {
+ vp8mt_build_intra_predictors_mby(pbi, xd, mb_row, mb_col);
+ } else {
+ vp8mt_intra_prediction_down_copy(pbi, xd, mb_row, mb_col);
+ }
+ }
+ else
+ {
+ vp8_build_inter_predictors_mb(xd);
+ }
- // since we have multithread
- xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
+ /* dequantization and idct */
+ if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
+ {
+ BLOCKD *b = &xd->block[24];
+ DEQUANT_INVOKE(&pbi->dequant, block)(b);
- //memcpy(&pbi->lpfmb, &pbi->mb, sizeof(pbi->mb));
- if ((mb_row & 1) == 1)
- {
- pbi->last_mb_row_decoded = mb_row;
- //printf("S%d", pbi->last_mb_row_decoded);
- }
+ /* do 2nd order transform on the dc block */
+ if (xd->eobs[24] > 1)
+ {
+ IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
+ ((int *)b->qcoeff)[0] = 0;
+ ((int *)b->qcoeff)[1] = 0;
+ ((int *)b->qcoeff)[2] = 0;
+ ((int *)b->qcoeff)[3] = 0;
+ ((int *)b->qcoeff)[4] = 0;
+ ((int *)b->qcoeff)[5] = 0;
+ ((int *)b->qcoeff)[6] = 0;
+ ((int *)b->qcoeff)[7] = 0;
+ }
+ else
+ {
+ IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff);
+ ((int *)b->qcoeff)[0] = 0;
+ }
- if (ithread == (pbi->decoding_thread_count - 1) || mb_row == pc->mb_rows - 1)
- {
- //SetEvent(pbi->h_event_main);
- sem_post(&pbi->h_event_main);
+ DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block)
+ (xd->qcoeff, xd->block[0].dequant,
+ xd->predictor, xd->dst.y_buffer,
+ xd->dst.y_stride, xd->eobs, xd->block[24].diff);
+ }
+ else if ((xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && xd->mode_info_context->mbmi.mode == B_PRED)
+ {
+ for (i = 0; i < 16; i++)
+ {
+ BLOCKD *b = &xd->block[i];
+ vp8mt_predict_intra4x4(pbi, xd, b->bmi.mode, b->predictor, mb_row, mb_col, i);
- }
+ if (xd->eobs[i] > 1)
+ {
+ DEQUANT_INVOKE(&pbi->dequant, idct_add)
+ (b->qcoeff, b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 16, b->dst_stride);
+ }
+ else
+ {
+ IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
+ (b->qcoeff[0] * b->dequant[0], b->predictor,
+ *(b->base_dst) + b->dst, 16, b->dst_stride);
+ ((int *)b->qcoeff)[0] = 0;
}
}
}
+ else
+ {
+ DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
+ (xd->qcoeff, xd->block[0].dequant,
+ xd->predictor, xd->dst.y_buffer,
+ xd->dst.y_stride, xd->eobs);
+ }
+ DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
+ (xd->qcoeff+16*16, xd->block[16].dequant,
+ xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
+ xd->dst.uv_stride, xd->eobs+16);
#else
- (void) p_data;
+ (void) pbi;
+ (void) xd;
+ (void) mb_row;
+ (void) mb_col;
#endif
-
- return 0 ;
}
-THREAD_FUNCTION vp8_thread_loop_filter(void *p_data)
+
+THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
{
#if CONFIG_MULTITHREAD
- VP8D_COMP *pbi = (VP8D_COMP *)p_data;
+ int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
+ VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
+ MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
+ ENTROPY_CONTEXT_PLANES mb_row_left_context;
while (1)
{
- if (pbi->b_multithreaded_lf == 0)
+ if (pbi->b_multithreaded_rd == 0)
break;
- //printf("before waiting for start_lpf\n");
-
- //if(WaitForSingleObject(pbi->h_event_start_lpf, INFINITE) == WAIT_OBJECT_0)
- if (sem_wait(&pbi->h_event_start_lpf) == 0)
+ /*if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)*/
+ if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0)
{
- if (pbi->b_multithreaded_lf == 0) // we're shutting down
+ if (pbi->b_multithreaded_rd == 0)
break;
else
{
+ VP8_COMMON *pc = &pbi->common;
+ MACROBLOCKD *xd = &mbrd->mbd;
- VP8_COMMON *cm = &pbi->common;
- MACROBLOCKD *mbd = &pbi->lpfmb;
- int default_filt_lvl = pbi->common.filter_level;
+ int mb_row;
+ int num_part = 1 << pbi->common.multi_token_partition;
+ volatile int *last_row_current_mb_col;
+ int nsync = pbi->sync_range;
- YV12_BUFFER_CONFIG *post = &cm->new_frame;
- loop_filter_info *lfi = cm->lf_info;
+ for (mb_row = ithread+1; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
+ {
+ int i;
+ int recon_yoffset, recon_uvoffset;
+ int mb_col;
+ int ref_fb_idx = pc->lst_fb_idx;
+ int dst_fb_idx = pc->new_fb_idx;
+ int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
+ int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
- int mb_row;
- int mb_col;
+ int filter_level;
+ loop_filter_info *lfi = pc->lf_info;
+ int alt_flt_enabled = xd->segmentation_enabled;
+ int Segment;
+ pbi->mb_row_di[ithread].mb_row = mb_row;
+ pbi->mb_row_di[ithread].mbd.current_bc = &pbi->mbc[mb_row%num_part];
- int baseline_filter_level[MAX_MB_SEGMENTS];
- int filter_level;
- int alt_flt_enabled = mbd->segmentation_enabled;
+ last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
- int i;
- unsigned char *y_ptr, *u_ptr, *v_ptr;
+ recon_yoffset = mb_row * recon_y_stride * 16;
+ recon_uvoffset = mb_row * recon_uv_stride * 8;
+ /* reset above block coeffs */
- volatile int *last_mb_row_decoded = &pbi->last_mb_row_decoded;
+ xd->above_context = pc->above_context;
+ xd->left_context = &mb_row_left_context;
+ vpx_memset(&mb_row_left_context, 0, sizeof(mb_row_left_context));
+ xd->up_available = (mb_row != 0);
- //MODE_INFO * this_mb_mode_info = cm->mi;
- mbd->mode_info_context = cm->mi; // Point at base of Mb MODE_INFO list
+ xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+ xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
- // Note the baseline filter values for each segment
- if (alt_flt_enabled)
- {
- for (i = 0; i < MAX_MB_SEGMENTS; i++)
+ for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
{
- if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
- baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
- else
+ if ((mb_col & (nsync-1)) == 0)
{
- baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
- baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; // Clamp to valid range
+ while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
+ {
+ x86_pause_hint();
+ thread_sleep(0);
+ }
}
- }
- }
- else
- {
- for (i = 0; i < MAX_MB_SEGMENTS; i++)
- baseline_filter_level[i] = default_filt_lvl;
- }
- // Initialize the loop filter for this frame.
- vp8_init_loop_filter(cm);
+ if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
+ {
+ for (i = 0; i < 16; i++)
+ {
+ BLOCKD *d = &xd->block[i];
+ vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
+ }
+ }
- // Set up the buffer pointers
- y_ptr = post->y_buffer;
- u_ptr = post->u_buffer;
- v_ptr = post->v_buffer;
+ if(pbi->common.filter_level)
+ {
+ /*update loopfilter info*/
+ Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
+ filter_level = pbi->mt_baseline_filter_level[Segment];
+ /* Distance of Mb to the various image edges.
+ * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+ * Apply any context driven MB level adjustment
+ */
+ vp8_adjust_mb_lf_value(xd, &filter_level);
+ }
- // vp8_filter each macro block
- for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
- {
+ /* Distance of Mb to the various image edges.
+ * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+ */
+ xd->mb_to_left_edge = -((mb_col * 16) << 3);
+ xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
- while (mb_row >= *last_mb_row_decoded)
- {
- x86_pause_hint();
- thread_sleep(0);
- }
-
- //printf("R%d", mb_row);
- for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
- {
- int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
+ xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+ xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+ xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
- filter_level = baseline_filter_level[Segment];
+ xd->left_available = (mb_col != 0);
- // Apply any context driven MB level adjustment
- vp8_adjust_mb_lf_value(mbd, &filter_level);
-
- if (filter_level)
- {
- if (mb_col > 0)
- cm->lf_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
+ /* Select the appropriate reference frame for this MB */
+ if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
+ ref_fb_idx = pc->lst_fb_idx;
+ else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
+ ref_fb_idx = pc->gld_fb_idx;
+ else
+ ref_fb_idx = pc->alt_fb_idx;
- if (mbd->mode_info_context->mbmi.dc_diff > 0)
- cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
+ xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
+ xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
+ xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
- // don't apply across umv border
- if (mb_row > 0)
- cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
+ vp8_build_uvmvs(xd, pc->full_pixel);
+ vp8mt_decode_macroblock(pbi, xd, mb_row, mb_col);
- if (mbd->mode_info_context->mbmi.dc_diff > 0)
- cm->lf_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
+ if (pbi->common.filter_level)
+ {
+ if( mb_row != pc->mb_rows-1 )
+ {
+ /* Save decoded MB last row data for next-row decoding */
+ vpx_memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
+ vpx_memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
+ vpx_memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
+ }
+
+ /* save left_col for next MB decoding */
+ if(mb_col != pc->mb_cols-1)
+ {
+ MODE_INFO *next = xd->mode_info_context +1;
+
+ if (xd->frame_type == KEY_FRAME || next->mbmi.ref_frame == INTRA_FRAME)
+ {
+ for (i = 0; i < 16; i++)
+ pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
+ for (i = 0; i < 8; i++)
+ {
+ pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
+ pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
+ }
+ }
+ }
+
+ /* loopfilter on this macroblock. */
+ if (filter_level)
+ {
+ if (mb_col > 0)
+ pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+
+ if (xd->mode_info_context->mbmi.dc_diff > 0)
+ pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+
+ /* don't apply across umv border */
+ if (mb_row > 0)
+ pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+
+ if (xd->mode_info_context->mbmi.dc_diff > 0)
+ pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+ }
}
- y_ptr += 16;
- u_ptr += 8;
- v_ptr += 8;
+ recon_yoffset += 16;
+ recon_uvoffset += 8;
+
+ ++xd->mode_info_context; /* next mb */
- mbd->mode_info_context++; // step to next MB
+ xd->above_context++;
+ /*pbi->mb_row_di[ithread].current_mb_col = mb_col;*/
+ pbi->mt_current_mb_col[mb_row] = mb_col;
}
- y_ptr += post->y_stride * 16 - post->y_width;
- u_ptr += post->uv_stride * 8 - post->uv_width;
- v_ptr += post->uv_stride * 8 - post->uv_width;
+ /* adjust to the next row of mbs */
+ if (pbi->common.filter_level)
+ {
+ if(mb_row != pc->mb_rows-1)
+ {
+ int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
+ int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
+
+ for (i = 0; i < 4; i++)
+ {
+ pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
+ pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
+ pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
+ }
+ }
+ } else
+ vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
- mbd->mode_info_context++; // Skip border mb
- }
+ ++xd->mode_info_context; /* skip prediction column */
- //printf("R%d\n", mb_row);
- // When done, signal main thread that ME is finished
- //SetEvent(pbi->h_event_lpf);
- sem_post(&pbi->h_event_lpf);
+ /* since we have multithread */
+ xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
+ }
}
-
+ }
+ /* add this to each frame */
+ if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
+ {
+ /*SetEvent(pbi->h_event_end_decoding);*/
+ sem_post(&pbi->h_event_end_decoding);
}
}
-
#else
(void) p_data;
#endif
- return 0;
+
+ return 0 ;
}
+
void vp8_decoder_create_threads(VP8D_COMP *pbi)
{
#if CONFIG_MULTITHREAD
int core_count = 0;
int ithread;
+ int i;
pbi->b_multithreaded_rd = 0;
- pbi->b_multithreaded_lf = 0;
pbi->allocated_decoding_thread_count = 0;
- core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads; //vp8_get_proc_core_count();
- if (core_count > 1)
- {
- sem_init(&pbi->h_event_lpf, 0, 0);
- sem_init(&pbi->h_event_start_lpf, 0, 0);
- pbi->b_multithreaded_lf = 1;
- pthread_create(&pbi->h_thread_lpf, 0, vp8_thread_loop_filter, (pbi));
- }
+ core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads;
if (core_count > 1)
{
pbi->b_multithreaded_rd = 1;
- pbi->decoding_thread_count = core_count - 1;
+ pbi->decoding_thread_count = core_count -1;
CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
- CHECK_MEM_ERROR(pbi->h_event_mbrdecoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
+ CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count));
vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);
CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));
for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
{
- sem_init(&pbi->h_event_mbrdecoding[ithread], 0, 0);
+ sem_init(&pbi->h_event_start_decoding[ithread], 0, 0);
pbi->de_thread_data[ithread].ithread = ithread;
pbi->de_thread_data[ithread].ptr1 = (void *)pbi;
pbi->de_thread_data[ithread].ptr2 = (void *) &pbi->mb_row_di[ithread];
pthread_create(&pbi->h_decoding_thread[ithread], 0, vp8_thread_decoding_proc, (&pbi->de_thread_data[ithread]));
-
}
- sem_init(&pbi->h_event_main, 0, 0);
+ sem_init(&pbi->h_event_end_decoding, 0, 0);
+
pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
}
@@ -440,45 +489,196 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi)
#endif
}
-void vp8_decoder_remove_threads(VP8D_COMP *pbi)
+
+void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
{
#if CONFIG_MULTITHREAD
+ VP8_COMMON *const pc = & pbi->common;
+ int i;
- if (pbi->b_multithreaded_lf)
- {
- pbi->b_multithreaded_lf = 0;
- sem_post(&pbi->h_event_start_lpf);
- pthread_join(pbi->h_thread_lpf, 0);
- sem_destroy(&pbi->h_event_start_lpf);
- }
-
- //shutdown MB Decoding thread;
if (pbi->b_multithreaded_rd)
{
- pbi->b_multithreaded_rd = 0;
- // allow all threads to exit
+ if (pbi->mt_current_mb_col)
{
- int i;
+ vpx_free(pbi->mt_current_mb_col);
+ pbi->mt_current_mb_col = NULL ;
+ }
- for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
+ /* Free above_row buffers. */
+ if (pbi->mt_yabove_row)
+ {
+ for (i=0; i< mb_rows; i++)
{
+ if (pbi->mt_yabove_row[i])
+ {
+ vpx_free(pbi->mt_yabove_row[i]);
+ pbi->mt_yabove_row[i] = NULL ;
+ }
+ }
+ vpx_free(pbi->mt_yabove_row);
+ pbi->mt_yabove_row = NULL ;
+ }
- sem_post(&pbi->h_event_mbrdecoding[i]);
- pthread_join(pbi->h_decoding_thread[i], NULL);
+ if (pbi->mt_uabove_row)
+ {
+ for (i=0; i< mb_rows; i++)
+ {
+ if (pbi->mt_uabove_row[i])
+ {
+ vpx_free(pbi->mt_uabove_row[i]);
+ pbi->mt_uabove_row[i] = NULL ;
+ }
}
+ vpx_free(pbi->mt_uabove_row);
+ pbi->mt_uabove_row = NULL ;
}
+
+ if (pbi->mt_vabove_row)
{
+ for (i=0; i< mb_rows; i++)
+ {
+ if (pbi->mt_vabove_row[i])
+ {
+ vpx_free(pbi->mt_vabove_row[i]);
+ pbi->mt_vabove_row[i] = NULL ;
+ }
+ }
+ vpx_free(pbi->mt_vabove_row);
+ pbi->mt_vabove_row = NULL ;
+ }
- int i;
- for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
+ /* Free left_col buffers. */
+ if (pbi->mt_yleft_col)
+ {
+ for (i=0; i< mb_rows; i++)
+ {
+ if (pbi->mt_yleft_col[i])
+ {
+ vpx_free(pbi->mt_yleft_col[i]);
+ pbi->mt_yleft_col[i] = NULL ;
+ }
+ }
+ vpx_free(pbi->mt_yleft_col);
+ pbi->mt_yleft_col = NULL ;
+ }
+
+ if (pbi->mt_uleft_col)
+ {
+ for (i=0; i< mb_rows; i++)
{
- sem_destroy(&pbi->h_event_mbrdecoding[i]);
+ if (pbi->mt_uleft_col[i])
+ {
+ vpx_free(pbi->mt_uleft_col[i]);
+ pbi->mt_uleft_col[i] = NULL ;
+ }
}
+ vpx_free(pbi->mt_uleft_col);
+ pbi->mt_uleft_col = NULL ;
+ }
+
+ if (pbi->mt_vleft_col)
+ {
+ for (i=0; i< mb_rows; i++)
+ {
+ if (pbi->mt_vleft_col[i])
+ {
+ vpx_free(pbi->mt_vleft_col[i]);
+ pbi->mt_vleft_col[i] = NULL ;
+ }
+ }
+ vpx_free(pbi->mt_vleft_col);
+ pbi->mt_vleft_col = NULL ;
+ }
+ }
+#else
+ (void) pbi;
+#endif
+}
+
+int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
+{
+#if CONFIG_MULTITHREAD
+ VP8_COMMON *const pc = & pbi->common;
+ int i;
+ int uv_width;
+
+ if (pbi->b_multithreaded_rd)
+ {
+ vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
+
+ /* our internal buffers are always multiples of 16 */
+ if ((width & 0xf) != 0)
+ width += 16 - (width & 0xf);
+
+ if (width < 640) pbi->sync_range = 1;
+ else if (width <= 1280) pbi->sync_range = 8;
+ else if (width <= 2560) pbi->sync_range =16;
+ else pbi->sync_range = 32;
+
+ uv_width = width >>1;
+
+ /* Allocate an int for each mb row. */
+ CHECK_MEM_ERROR(pbi->mt_current_mb_col, vpx_malloc(sizeof(int) * pc->mb_rows));
+
+ /* Allocate memory for above_row buffers. */
+ CHECK_MEM_ERROR(pbi->mt_yabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
+ for (i=0; i< pc->mb_rows; i++)
+ CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_calloc(sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)), 1));
+
+ CHECK_MEM_ERROR(pbi->mt_uabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
+ for (i=0; i< pc->mb_rows; i++)
+ CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
+
+ CHECK_MEM_ERROR(pbi->mt_vabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
+ for (i=0; i< pc->mb_rows; i++)
+ CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
+
+ /* Allocate memory for left_col buffers. */
+ CHECK_MEM_ERROR(pbi->mt_yleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
+ for (i=0; i< pc->mb_rows; i++)
+ CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1));
+
+ CHECK_MEM_ERROR(pbi->mt_uleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
+ for (i=0; i< pc->mb_rows; i++)
+ CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
+
+ CHECK_MEM_ERROR(pbi->mt_vleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
+ for (i=0; i< pc->mb_rows; i++)
+ CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
+ }
+ return 0;
+#else
+ (void) pbi;
+ (void) width;
+#endif
+}
+
+
+void vp8_decoder_remove_threads(VP8D_COMP *pbi)
+{
+#if CONFIG_MULTITHREAD
+ /* shutdown MB Decoding thread; */
+ if (pbi->b_multithreaded_rd)
+ {
+ int i;
+
+ pbi->b_multithreaded_rd = 0;
+
+ /* allow all threads to exit */
+ for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
+ {
+ sem_post(&pbi->h_event_start_decoding[i]);
+ pthread_join(pbi->h_decoding_thread[i], NULL);
+ }
+
+ for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
+ {
+ sem_destroy(&pbi->h_event_start_decoding[i]);
}
- sem_destroy(&pbi->h_event_main);
+ sem_destroy(&pbi->h_event_end_decoding);
if (pbi->h_decoding_thread)
{
@@ -486,10 +686,10 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
pbi->h_decoding_thread = NULL;
}
- if (pbi->h_event_mbrdecoding)
+ if (pbi->h_event_start_decoding)
{
- vpx_free(pbi->h_event_mbrdecoding);
- pbi->h_event_mbrdecoding = NULL;
+ vpx_free(pbi->h_event_start_decoding);
+ pbi->h_event_start_decoding = NULL;
}
if (pbi->mb_row_di)
@@ -504,43 +704,65 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
pbi->de_thread_data = NULL;
}
}
-
#else
(void) pbi;
#endif
}
-void vp8_start_lfthread(VP8D_COMP *pbi)
+void vp8mt_lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
{
#if CONFIG_MULTITHREAD
- memcpy(&pbi->lpfmb, &pbi->mb, sizeof(pbi->mb));
- pbi->last_mb_row_decoded = 0;
- sem_post(&pbi->h_event_start_lpf);
-#else
- (void) pbi;
-#endif
-}
-
-void vp8_stop_lfthread(VP8D_COMP *pbi)
-{
-#if CONFIG_MULTITHREAD
- struct vpx_usec_timer timer;
-
- vpx_usec_timer_start(&timer);
-
- sem_wait(&pbi->h_event_lpf);
+ VP8_COMMON *cm = &pbi->common;
+ MACROBLOCKD *mbd = &pbi->mb;
+ /*YV12_BUFFER_CONFIG *post = &cm->new_frame;*/ /*frame_to_show;*/
+ loop_filter_info *lfi = cm->lf_info;
+ FRAME_TYPE frame_type = cm->frame_type;
+
+ /*int mb_row;
+ int mb_col;
+ int baseline_filter_level[MAX_MB_SEGMENTS];*/
+ int filter_level;
+ int alt_flt_enabled = mbd->segmentation_enabled;
+
+ int i;
+ /*unsigned char *y_ptr, *u_ptr, *v_ptr;*/
+
+ /* Note the baseline filter values for each segment */
+ if (alt_flt_enabled)
+ {
+ for (i = 0; i < MAX_MB_SEGMENTS; i++)
+ {
+ /* Abs value */
+ if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
+ pbi->mt_baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
+ /* Delta Value */
+ else
+ {
+ pbi->mt_baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
+ pbi->mt_baseline_filter_level[i] = (pbi->mt_baseline_filter_level[i] >= 0) ? ((pbi->mt_baseline_filter_level[i] <= MAX_LOOP_FILTER) ? pbi->mt_baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
+ }
+ }
+ }
+ else
+ {
+ for (i = 0; i < MAX_MB_SEGMENTS; i++)
+ pbi->mt_baseline_filter_level[i] = default_filt_lvl;
+ }
- vpx_usec_timer_mark(&timer);
- pbi->time_loop_filtering += vpx_usec_timer_elapsed(&timer);
+ /* Initialize the loop filter for this frame. */
+ if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
+ vp8_init_loop_filter(cm);
+ else if (frame_type != cm->last_frame_type)
+ vp8_frame_init_loop_filter(lfi, frame_type);
#else
(void) pbi;
+ (void) default_filt_lvl;
#endif
}
-void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
- MACROBLOCKD *xd)
+void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
{
#if CONFIG_MULTITHREAD
int mb_row;
@@ -548,47 +770,212 @@ void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
int ibc = 0;
int num_part = 1 << pbi->common.multi_token_partition;
+ int i, j;
+ volatile int *last_row_current_mb_col = NULL;
+ int nsync = pbi->sync_range;
+
+ int filter_level;
+ loop_filter_info *lfi = pc->lf_info;
+ int alt_flt_enabled = xd->segmentation_enabled;
+ int Segment;
+
+ if(pbi->common.filter_level)
+ {
+ /* Set above_row buffer to 127 for decoding first MB row */
+ vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5);
+ vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
+ vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
+
+ for (i=1; i<pc->mb_rows; i++)
+ {
+ vpx_memset(pbi->mt_yabove_row[i] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
+ vpx_memset(pbi->mt_uabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
+ vpx_memset(pbi->mt_vabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
+ }
+
+ /* Set left_col to 129 initially */
+ for (i=0; i<pc->mb_rows; i++)
+ {
+ vpx_memset(pbi->mt_yleft_col[i], (unsigned char)129, 16);
+ vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8);
+ vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8);
+ }
+ vp8mt_lpf_init(pbi, pc->filter_level);
+ }
vp8_setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
+ for (i = 0; i < pbi->decoding_thread_count; i++)
+ sem_post(&pbi->h_event_start_decoding[i]);
+
for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
{
int i;
- pbi->current_mb_col_main = -1;
-
- xd->current_bc = &pbi->mbc[ibc];
- ibc++ ;
- if (ibc == num_part)
- ibc = 0;
+ xd->current_bc = &pbi->mbc[mb_row%num_part];
- for (i = 0; i < pbi->decoding_thread_count; i++)
+ /* vp8_decode_mb_row(pbi, pc, mb_row, xd); */
{
- if ((mb_row + i + 1) >= pc->mb_rows)
- break;
+ int i;
+ int recon_yoffset, recon_uvoffset;
+ int mb_col;
+ int ref_fb_idx = pc->lst_fb_idx;
+ int dst_fb_idx = pc->new_fb_idx;
+ int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
+ int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
- pbi->mb_row_di[i].mb_row = mb_row + i + 1;
- pbi->mb_row_di[i].mbd.current_bc = &pbi->mbc[ibc];
- ibc++;
+ /* volatile int *last_row_current_mb_col = NULL; */
+ if (mb_row > 0)
+ last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
- if (ibc == num_part)
- ibc = 0;
+ vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
+ recon_yoffset = mb_row * recon_y_stride * 16;
+ recon_uvoffset = mb_row * recon_uv_stride * 8;
+ /* reset above block coeffs */
- pbi->mb_row_di[i].current_mb_col = -1;
- sem_post(&pbi->h_event_mbrdecoding[i]);
- }
+ xd->above_context = pc->above_context;
+ xd->up_available = (mb_row != 0);
- vp8_decode_mb_row(pbi, pc, mb_row, xd);
+ xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+ xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
- xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
+ for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
+ {
+ if ( mb_row > 0 && (mb_col & (nsync-1)) == 0){
+ while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
+ {
+ x86_pause_hint();
+ thread_sleep(0);
+ }
+ }
- if (mb_row < pc->mb_rows - 1)
- {
- sem_wait(&pbi->h_event_main);
+ if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
+ {
+ for (i = 0; i < 16; i++)
+ {
+ BLOCKD *d = &xd->block[i];
+ vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
+ }
+ }
+
+ if(pbi->common.filter_level)
+ {
+ /* update loopfilter info */
+ Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
+ filter_level = pbi->mt_baseline_filter_level[Segment];
+ /* Distance of Mb to the various image edges.
+ * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+ * Apply any context driven MB level adjustment
+ */
+ vp8_adjust_mb_lf_value(xd, &filter_level);
+ }
+
+ /* Distance of Mb to the various image edges.
+ * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+ */
+ xd->mb_to_left_edge = -((mb_col * 16) << 3);
+ xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
+
+ xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+ xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+ xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
+
+ xd->left_available = (mb_col != 0);
+
+ /* Select the appropriate reference frame for this MB */
+ if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
+ ref_fb_idx = pc->lst_fb_idx;
+ else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
+ ref_fb_idx = pc->gld_fb_idx;
+ else
+ ref_fb_idx = pc->alt_fb_idx;
+
+ xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
+ xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
+ xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
+
+ vp8_build_uvmvs(xd, pc->full_pixel);
+ vp8mt_decode_macroblock(pbi, xd, mb_row, mb_col);
+
+ if (pbi->common.filter_level)
+ {
+ /* Save decoded MB last row data for next-row decoding */
+ if(mb_row != pc->mb_rows-1)
+ {
+ vpx_memcpy((pbi->mt_yabove_row[mb_row +1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
+ vpx_memcpy((pbi->mt_uabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
+ vpx_memcpy((pbi->mt_vabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
+ }
+
+ /* save left_col for next MB decoding */
+ if(mb_col != pc->mb_cols-1)
+ {
+ MODE_INFO *next = xd->mode_info_context +1;
+
+ if (xd->frame_type == KEY_FRAME || next->mbmi.ref_frame == INTRA_FRAME)
+ {
+ for (i = 0; i < 16; i++)
+ pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
+ for (i = 0; i < 8; i++)
+ {
+ pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
+ pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
+ }
+ }
+ }
+
+ /* loopfilter on this macroblock. */
+ if (filter_level)
+ {
+ if (mb_col > 0)
+ pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+
+ if (xd->mode_info_context->mbmi.dc_diff > 0)
+ pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+
+ /* don't apply across umv border */
+ if (mb_row > 0)
+ pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+
+ if (xd->mode_info_context->mbmi.dc_diff > 0)
+ pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+ }
+ }
+
+ recon_yoffset += 16;
+ recon_uvoffset += 8;
+
+ ++xd->mode_info_context; /* next mb */
+
+ xd->above_context++;
+
+ pbi->mt_current_mb_col[mb_row] = mb_col;
+ }
+
+ /* adjust to the next row of mbs */
+ if (pbi->common.filter_level)
+ {
+ if(mb_row != pc->mb_rows-1)
+ {
+ int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
+ int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
+
+ for (i = 0; i < 4; i++)
+ {
+ pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
+ pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
+ pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
+ }
+ }
+ }else
+ vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
+
+ ++xd->mode_info_context; /* skip prediction column */
}
+ xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
}
- pbi->last_mb_row_decoded = mb_row;
+ sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */
#else
(void) pbi;
(void) xd;
diff --git a/vp8/decoder/treereader.h b/vp8/decoder/treereader.h
index eb10e2460..277842896 100644
--- a/vp8/decoder/treereader.h
+++ b/vp8/decoder/treereader.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/decoder/x86/dequantize_mmx.asm b/vp8/decoder/x86/dequantize_mmx.asm
index 02be4872e..0d6133a46 100644
--- a/vp8/decoder/x86/dequantize_mmx.asm
+++ b/vp8/decoder/x86/dequantize_mmx.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
@@ -49,12 +50,12 @@ sym(vp8_dequantize_b_impl_mmx):
ret
-;void dequant_idct_mmx(short *input, short *dq, short *output, int pitch)
-global sym(vp8_dequant_idct_mmx)
-sym(vp8_dequant_idct_mmx):
+;void dequant_idct_add_mmx(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride)
+global sym(vp8_dequant_idct_add_mmx)
+sym(vp8_dequant_idct_add_mmx):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 6
GET_GOT rbx
push rsi
push rdi
@@ -76,7 +77,8 @@ sym(vp8_dequant_idct_mmx):
movq mm3, [rax+24]
pmullw mm3, [rdx+24]
- mov rdx, arg(2) ;output
+ mov rdx, arg(3) ;dest
+ mov rsi, arg(2) ;pred
pxor mm7, mm7
@@ -87,7 +89,8 @@ sym(vp8_dequant_idct_mmx):
movq [rax+24],mm7
- movsxd rax, dword ptr arg(3) ;pitch
+ movsxd rax, dword ptr arg(4) ;pitch
+ movsxd rdi, dword ptr arg(5) ;stride
psubw mm0, mm2 ; b1= 0-2
paddw mm2, mm2 ;
@@ -95,11 +98,11 @@ sym(vp8_dequant_idct_mmx):
movq mm5, mm1
paddw mm2, mm0 ; a1 =0+2
- pmulhw mm5, [x_s1sqr2 GLOBAL];
+ pmulhw mm5, [GLOBAL(x_s1sqr2)];
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
movq mm7, mm3 ;
- pmulhw mm7, [x_c1sqr2less1 GLOBAL];
+ pmulhw mm7, [GLOBAL(x_c1sqr2less1)];
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
psubw mm7, mm5 ; c1
@@ -107,10 +110,10 @@ sym(vp8_dequant_idct_mmx):
movq mm5, mm1
movq mm4, mm3
- pmulhw mm5, [x_c1sqr2less1 GLOBAL]
+ pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
paddw mm5, mm1
- pmulhw mm3, [x_s1sqr2 GLOBAL]
+ pmulhw mm3, [GLOBAL(x_s1sqr2)]
paddw mm3, mm4
paddw mm3, mm5 ; d1
@@ -150,11 +153,11 @@ sym(vp8_dequant_idct_mmx):
movq mm5, mm1
paddw mm2, mm0 ; a1 =0+2
- pmulhw mm5, [x_s1sqr2 GLOBAL];
+ pmulhw mm5, [GLOBAL(x_s1sqr2)];
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
movq mm7, mm3 ;
- pmulhw mm7, [x_c1sqr2less1 GLOBAL];
+ pmulhw mm7, [GLOBAL(x_c1sqr2less1)];
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
psubw mm7, mm5 ; c1
@@ -162,16 +165,16 @@ sym(vp8_dequant_idct_mmx):
movq mm5, mm1
movq mm4, mm3
- pmulhw mm5, [x_c1sqr2less1 GLOBAL]
+ pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
paddw mm5, mm1
- pmulhw mm3, [x_s1sqr2 GLOBAL]
+ pmulhw mm3, [GLOBAL(x_s1sqr2)]
paddw mm3, mm4
paddw mm3, mm5 ; d1
- paddw mm0, [fours GLOBAL]
+ paddw mm0, [GLOBAL(fours)]
- paddw mm2, [fours GLOBAL]
+ paddw mm2, [GLOBAL(fours)]
movq mm6, mm2 ; a1
movq mm4, mm0 ; b1
@@ -206,13 +209,34 @@ sym(vp8_dequant_idct_mmx):
punpckldq mm2, mm4 ; 32 22 12 02
punpckhdq mm5, mm4 ; 33 23 13 03
- movq [rdx], mm0
+ pxor mm7, mm7
- movq [rdx+rax], mm1
- movq [rdx+rax*2], mm2
+ movd mm4, [rsi]
+ punpcklbw mm4, mm7
+ paddsw mm0, mm4
+ packuswb mm0, mm7
+ movd [rdx], mm0
- add rdx, rax
- movq [rdx+rax*2], mm5
+ movd mm4, [rsi+rax]
+ punpcklbw mm4, mm7
+ paddsw mm1, mm4
+ packuswb mm1, mm7
+ movd [rdx+rdi], mm1
+
+ movd mm4, [rsi+2*rax]
+ punpcklbw mm4, mm7
+ paddsw mm2, mm4
+ packuswb mm2, mm7
+ movd [rdx+rdi*2], mm2
+
+ add rdx, rdi
+ add rsi, rax
+
+ movd mm4, [rsi+2*rax]
+ punpcklbw mm4, mm7
+ paddsw mm5, mm4
+ packuswb mm5, mm7
+ movd [rdx+rdi*2], mm5
; begin epilog
pop rdi
@@ -223,12 +247,12 @@ sym(vp8_dequant_idct_mmx):
ret
-;void dequant_dc_idct_mmx(short *input, short *dq, short *output, int pitch, int Dc)
-global sym(vp8_dequant_dc_idct_mmx)
-sym(vp8_dequant_dc_idct_mmx):
+;void dequant_dc_idct_add_mmx(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc)
+global sym(vp8_dequant_dc_idct_add_mmx)
+sym(vp8_dequant_dc_idct_add_mmx):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
+ SHADOW_ARGS_TO_STACK 7
GET_GOT rbx
push rsi
push rdi
@@ -237,8 +261,6 @@ sym(vp8_dequant_dc_idct_mmx):
mov rax, arg(0) ;input
mov rdx, arg(1) ;dq
- movsxd rcx, dword ptr arg(4) ;Dc
-
movq mm0, [rax ]
pmullw mm0, [rdx]
@@ -251,7 +273,8 @@ sym(vp8_dequant_dc_idct_mmx):
movq mm3, [rax+24]
pmullw mm3, [rdx+24]
- mov rdx, arg(2) ;output
+ mov rdx, arg(3) ;dest
+ mov rsi, arg(2) ;pred
pxor mm7, mm7
@@ -261,8 +284,15 @@ sym(vp8_dequant_dc_idct_mmx):
movq [rax+16],mm7
movq [rax+24],mm7
- pinsrw mm0, rcx, 0
- movsxd rax, dword ptr arg(3) ;pitch
+ ; move lower word of Dc to lower word of mm0
+ psrlq mm0, 16
+ movzx rcx, word ptr arg(6) ;Dc
+ psllq mm0, 16
+ movq mm7, rcx
+ por mm0, mm7
+
+ movsxd rax, dword ptr arg(4) ;pitch
+ movsxd rdi, dword ptr arg(5) ;stride
psubw mm0, mm2 ; b1= 0-2
paddw mm2, mm2 ;
@@ -270,11 +300,11 @@ sym(vp8_dequant_dc_idct_mmx):
movq mm5, mm1
paddw mm2, mm0 ; a1 =0+2
- pmulhw mm5, [x_s1sqr2 GLOBAL];
+ pmulhw mm5, [GLOBAL(x_s1sqr2)];
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
movq mm7, mm3 ;
- pmulhw mm7, [x_c1sqr2less1 GLOBAL];
+ pmulhw mm7, [GLOBAL(x_c1sqr2less1)];
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
psubw mm7, mm5 ; c1
@@ -282,10 +312,10 @@ sym(vp8_dequant_dc_idct_mmx):
movq mm5, mm1
movq mm4, mm3
- pmulhw mm5, [x_c1sqr2less1 GLOBAL]
+ pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
paddw mm5, mm1
- pmulhw mm3, [x_s1sqr2 GLOBAL]
+ pmulhw mm3, [GLOBAL(x_s1sqr2)]
paddw mm3, mm4
paddw mm3, mm5 ; d1
@@ -325,11 +355,11 @@ sym(vp8_dequant_dc_idct_mmx):
movq mm5, mm1
paddw mm2, mm0 ; a1 =0+2
- pmulhw mm5, [x_s1sqr2 GLOBAL];
+ pmulhw mm5, [GLOBAL(x_s1sqr2)];
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
movq mm7, mm3 ;
- pmulhw mm7, [x_c1sqr2less1 GLOBAL];
+ pmulhw mm7, [GLOBAL(x_c1sqr2less1)];
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
psubw mm7, mm5 ; c1
@@ -337,16 +367,16 @@ sym(vp8_dequant_dc_idct_mmx):
movq mm5, mm1
movq mm4, mm3
- pmulhw mm5, [x_c1sqr2less1 GLOBAL]
+ pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
paddw mm5, mm1
- pmulhw mm3, [x_s1sqr2 GLOBAL]
+ pmulhw mm3, [GLOBAL(x_s1sqr2)]
paddw mm3, mm4
paddw mm3, mm5 ; d1
- paddw mm0, [fours GLOBAL]
+ paddw mm0, [GLOBAL(fours)]
- paddw mm2, [fours GLOBAL]
+ paddw mm2, [GLOBAL(fours)]
movq mm6, mm2 ; a1
movq mm4, mm0 ; b1
@@ -381,13 +411,34 @@ sym(vp8_dequant_dc_idct_mmx):
punpckldq mm2, mm4 ; 32 22 12 02
punpckhdq mm5, mm4 ; 33 23 13 03
- movq [rdx], mm0
-
- movq [rdx+rax], mm1
- movq [rdx+rax*2], mm2
-
- add rdx, rax
- movq [rdx+rax*2], mm5
+ pxor mm7, mm7
+
+ movd mm4, [rsi]
+ punpcklbw mm4, mm7
+ paddsw mm0, mm4
+ packuswb mm0, mm7
+ movd [rdx], mm0
+
+ movd mm4, [rsi+rax]
+ punpcklbw mm4, mm7
+ paddsw mm1, mm4
+ packuswb mm1, mm7
+ movd [rdx+rdi], mm1
+
+ movd mm4, [rsi+2*rax]
+ punpcklbw mm4, mm7
+ paddsw mm2, mm4
+ packuswb mm2, mm7
+ movd [rdx+rdi*2], mm2
+
+ add rdx, rdi
+ add rsi, rax
+
+ movd mm4, [rsi+2*rax]
+ punpcklbw mm4, mm7
+ paddsw mm5, mm4
+ packuswb mm5, mm7
+ movd [rdx+rdi*2], mm5
; begin epilog
pop rdi
diff --git a/vp8/decoder/x86/dequantize_x86.h b/vp8/decoder/x86/dequantize_x86.h
index 5def406d3..dc68daab3 100644
--- a/vp8/decoder/x86/dequantize_x86.h
+++ b/vp8/decoder/x86/dequantize_x86.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -20,19 +21,48 @@
*/
#if HAVE_MMX
extern prototype_dequant_block(vp8_dequantize_b_mmx);
-extern prototype_dequant_idct(vp8_dequant_idct_mmx);
-extern prototype_dequant_idct_dc(vp8_dequant_dc_idct_mmx);
-
+extern prototype_dequant_idct_add(vp8_dequant_idct_add_mmx);
+extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_mmx);
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_mmx);
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_mmx);
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_mmx);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_dequant_block
#define vp8_dequant_block vp8_dequantize_b_mmx
-#undef vp8_dequant_idct
-#define vp8_dequant_idct vp8_dequant_idct_mmx
+#undef vp8_dequant_idct_add
+#define vp8_dequant_idct_add vp8_dequant_idct_add_mmx
+
+#undef vp8_dequant_dc_idct_add
+#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_mmx
+
+#undef vp8_dequant_dc_idct_add_y_block
+#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_mmx
+
+#undef vp8_dequant_idct_add_y_block
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_mmx
+
+#undef vp8_dequant_idct_add_uv_block
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_mmx
+
+#endif
+#endif
+
+#if HAVE_SSE2
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_sse2);
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_sse2);
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_sse2);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef vp8_dequant_dc_idct_add_y_block
+#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_sse2
+
+#undef vp8_dequant_idct_add_y_block
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_sse2
-#undef vp8_dequant_idct_dc
-#define vp8_dequant_idct_dc vp8_dequant_dc_idct_mmx
+#undef vp8_dequant_idct_add_uv_block
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_sse2
#endif
#endif
diff --git a/vp8/decoder/x86/idct_blk_mmx.c b/vp8/decoder/x86/idct_blk_mmx.c
new file mode 100644
index 000000000..78c91d3d2
--- /dev/null
+++ b/vp8/decoder/x86/idct_blk_mmx.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "idct.h"
+#include "dequantize.h"
+
+void vp8_dequant_dc_idct_add_y_block_mmx
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs, short *dc)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_dc_idct_add_mmx (q, dq, pre, dst, 16, stride, dc[0]);
+ else
+ vp8_dc_only_idct_add_mmx (dc[0], pre, dst, 16, stride);
+
+ if (eobs[1] > 1)
+ vp8_dequant_dc_idct_add_mmx (q+16, dq, pre+4, dst+4, 16, stride, dc[1]);
+ else
+ vp8_dc_only_idct_add_mmx (dc[1], pre+4, dst+4, 16, stride);
+
+ if (eobs[2] > 1)
+ vp8_dequant_dc_idct_add_mmx (q+32, dq, pre+8, dst+8, 16, stride, dc[2]);
+ else
+ vp8_dc_only_idct_add_mmx (dc[2], pre+8, dst+8, 16, stride);
+
+ if (eobs[3] > 1)
+ vp8_dequant_dc_idct_add_mmx (q+48, dq, pre+12, dst+12, 16, stride, dc[3]);
+ else
+ vp8_dc_only_idct_add_mmx (dc[3], pre+12, dst+12, 16, stride);
+
+ q += 64;
+ dc += 4;
+ pre += 64;
+ dst += 4*stride;
+ eobs += 4;
+ }
+}
+
+void vp8_dequant_idct_add_y_block_mmx
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_idct_add_mmx (q, dq, pre, dst, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_mmx (q[0]*dq[0], pre, dst, 16, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp8_dequant_idct_add_mmx (q+16, dq, pre+4, dst+4, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_mmx (q[16]*dq[0], pre+4, dst+4, 16, stride);
+ ((int *)(q+16))[0] = 0;
+ }
+
+ if (eobs[2] > 1)
+ vp8_dequant_idct_add_mmx (q+32, dq, pre+8, dst+8, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_mmx (q[32]*dq[0], pre+8, dst+8, 16, stride);
+ ((int *)(q+32))[0] = 0;
+ }
+
+ if (eobs[3] > 1)
+ vp8_dequant_idct_add_mmx (q+48, dq, pre+12, dst+12, 16, stride);
+ else
+ {
+ vp8_dc_only_idct_add_mmx (q[48]*dq[0], pre+12, dst+12, 16, stride);
+ ((int *)(q+48))[0] = 0;
+ }
+
+ q += 64;
+ pre += 64;
+ dst += 4*stride;
+ eobs += 4;
+ }
+}
+
+void vp8_dequant_idct_add_uv_block_mmx
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+ int i;
+
+ for (i = 0; i < 2; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_idct_add_mmx (q, dq, pre, dstu, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_mmx (q[0]*dq[0], pre, dstu, 8, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp8_dequant_idct_add_mmx (q+16, dq, pre+4, dstu+4, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_mmx (q[16]*dq[0], pre+4, dstu+4, 8, stride);
+ ((int *)(q+16))[0] = 0;
+ }
+
+ q += 32;
+ pre += 32;
+ dstu += 4*stride;
+ eobs += 2;
+ }
+
+ for (i = 0; i < 2; i++)
+ {
+ if (eobs[0] > 1)
+ vp8_dequant_idct_add_mmx (q, dq, pre, dstv, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_mmx (q[0]*dq[0], pre, dstv, 8, stride);
+ ((int *)q)[0] = 0;
+ }
+
+ if (eobs[1] > 1)
+ vp8_dequant_idct_add_mmx (q+16, dq, pre+4, dstv+4, 8, stride);
+ else
+ {
+ vp8_dc_only_idct_add_mmx (q[16]*dq[0], pre+4, dstv+4, 8, stride);
+ ((int *)(q+16))[0] = 0;
+ }
+
+ q += 32;
+ pre += 32;
+ dstv += 4*stride;
+ eobs += 2;
+ }
+}
diff --git a/vp8/decoder/x86/idct_blk_sse2.c b/vp8/decoder/x86/idct_blk_sse2.c
new file mode 100644
index 000000000..0273d6ed2
--- /dev/null
+++ b/vp8/decoder/x86/idct_blk_sse2.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "idct.h"
+#include "dequantize.h"
+
+void idct_dequant_dc_0_2x_sse2
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int dst_stride, short *dc);
+void idct_dequant_dc_full_2x_sse2
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int dst_stride, short *dc);
+
+void idct_dequant_0_2x_sse2
+ (short *q, short *dq ,unsigned char *pre,
+ unsigned char *dst, int dst_stride, int blk_stride);
+void idct_dequant_full_2x_sse2
+ (short *q, short *dq ,unsigned char *pre,
+ unsigned char *dst, int dst_stride, int blk_stride);
+
+void vp8_dequant_dc_idct_add_y_block_sse2
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs, short *dc)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ if (((short *)(eobs))[0] & 0xfefe)
+ idct_dequant_dc_full_2x_sse2 (q, dq, pre, dst, stride, dc);
+ else
+ idct_dequant_dc_0_2x_sse2 (q, dq, pre, dst, stride, dc);
+
+ if (((short *)(eobs))[1] & 0xfefe)
+ idct_dequant_dc_full_2x_sse2 (q+32, dq, pre+8, dst+8, stride, dc+2);
+ else
+ idct_dequant_dc_0_2x_sse2 (q+32, dq, pre+8, dst+8, stride, dc+2);
+
+ q += 64;
+ dc += 4;
+ pre += 64;
+ dst += stride*4;
+ eobs += 4;
+ }
+}
+
+void vp8_dequant_idct_add_y_block_sse2
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dst, int stride, char *eobs)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ if (((short *)(eobs))[0] & 0xfefe)
+ idct_dequant_full_2x_sse2 (q, dq, pre, dst, stride, 16);
+ else
+ idct_dequant_0_2x_sse2 (q, dq, pre, dst, stride, 16);
+
+ if (((short *)(eobs))[1] & 0xfefe)
+ idct_dequant_full_2x_sse2 (q+32, dq, pre+8, dst+8, stride, 16);
+ else
+ idct_dequant_0_2x_sse2 (q+32, dq, pre+8, dst+8, stride, 16);
+
+ q += 64;
+ pre += 64;
+ dst += stride*4;
+ eobs += 4;
+ }
+}
+
+void vp8_dequant_idct_add_uv_block_sse2
+ (short *q, short *dq, unsigned char *pre,
+ unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+ if (((short *)(eobs))[0] & 0xfefe)
+ idct_dequant_full_2x_sse2 (q, dq, pre, dstu, stride, 8);
+ else
+ idct_dequant_0_2x_sse2 (q, dq, pre, dstu, stride, 8);
+
+ q += 32;
+ pre += 32;
+ dstu += stride*4;
+
+ if (((short *)(eobs))[1] & 0xfefe)
+ idct_dequant_full_2x_sse2 (q, dq, pre, dstu, stride, 8);
+ else
+ idct_dequant_0_2x_sse2 (q, dq, pre, dstu, stride, 8);
+
+ q += 32;
+ pre += 32;
+
+ if (((short *)(eobs))[2] & 0xfefe)
+ idct_dequant_full_2x_sse2 (q, dq, pre, dstv, stride, 8);
+ else
+ idct_dequant_0_2x_sse2 (q, dq, pre, dstv, stride, 8);
+
+ q += 32;
+ pre += 32;
+ dstv += stride*4;
+
+ if (((short *)(eobs))[3] & 0xfefe)
+ idct_dequant_full_2x_sse2 (q, dq, pre, dstv, stride, 8);
+ else
+ idct_dequant_0_2x_sse2 (q, dq, pre, dstv, stride, 8);
+}
diff --git a/vp8/decoder/x86/onyxdxv.c b/vp8/decoder/x86/onyxdxv.c
index 75a676a07..50293c792 100644
--- a/vp8/decoder/x86/onyxdxv.c
+++ b/vp8/decoder/x86/onyxdxv.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/decoder/x86/x86_dsystemdependent.c b/vp8/decoder/x86/x86_dsystemdependent.c
index 6d7cc3666..47e346dd9 100644
--- a/vp8/decoder/x86/x86_dsystemdependent.c
+++ b/vp8/decoder/x86/x86_dsystemdependent.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -38,14 +39,24 @@ void vp8_arch_x86_decode_init(VP8D_COMP *pbi)
#if CONFIG_RUNTIME_CPU_DETECT
/* Override default functions with fastest ones for this CPU. */
#if HAVE_MMX
-
if (flags & HAS_MMX)
{
- pbi->dequant.block = vp8_dequantize_b_mmx;
- pbi->dequant.idct = vp8_dequant_idct_mmx;
- pbi->dequant.idct_dc = vp8_dequant_dc_idct_mmx;
+ pbi->dequant.block = vp8_dequantize_b_mmx;
+ pbi->dequant.idct_add = vp8_dequant_idct_add_mmx;
+ pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_mmx;
+ pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_mmx;
+ pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_mmx;
+ pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx;
+ }
+#endif
+#if HAVE_SSE2
+ if (flags & HAS_SSE2)
+ {
+ pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_sse2;
+ pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_sse2;
+ pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2;
}
-
#endif
+
#endif
}
diff --git a/vp8/decoder/xprintf.c b/vp8/decoder/xprintf.c
deleted file mode 100644
index cb2221c15..000000000
--- a/vp8/decoder/xprintf.c
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-
-/****************************************************************************
-*
-* Module Title : xprintf.cpp
-*
-* Description : Display a printf style message on the current video frame.
-*
-****************************************************************************/
-
-/****************************************************************************
-* Header Files
-****************************************************************************/
-
-#include <stdio.h>
-#include <stdarg.h>
-#ifdef _WIN32_WCE
-#include <windows.h>
-#endif
-#include "xprintf.h"
-
-/****************************************************************************
- *
- * ROUTINE : xprintf
- *
- * INPUTS : const PB_INSTANCE *ppbi : Pointer to decoder instance.
- * long n_pixel : Offset into buffer to write text.
- * const char *format : Format string for print.
- * ... : Variable length argument list.
- *
- * OUTPUTS : None.
- *
- * RETURNS : int: Size (in bytes) of the formatted text.
- *
- * FUNCTION : Display a printf style message on the current video frame.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
-int onyx_xprintf(unsigned char *ppbuffer, long n_pixel, long n_size, long n_stride, const char *format, ...)
-{
- BOOL b_rc;
- va_list arglist;
- HFONT hfont, hfonto;
-
- int rc = 0;
- char sz_formatted[256] = "";
- unsigned char *p_dest = &ppbuffer[n_pixel];
-
-#ifdef _WIN32_WCE
- // Set up temporary bitmap
- HDC hdc_memory = NULL;
- HBITMAP hbm_temp = NULL;
- HBITMAP hbm_orig = NULL;
-
- RECT rect;
-
- // Copy bitmap to video frame
- long x;
- long y;
-
- // Format text
- va_start(arglist, format);
- _vsnprintf(sz_formatted, sizeof(sz_formatted), format, arglist);
- va_end(arglist);
-
- rect.left = 0;
- rect.top = 0;
- rect.right = 8 * strlen(sz_formatted);
- rect.bottom = 8;
-
- hdc_memory = create_compatible_dc(NULL);
-
- if (hdc_memory == NULL)
- goto Exit;
-
- hbm_temp = create_bitmap(rect.right, rect.bottom, 1, 1, NULL);
-
- if (hbm_temp == NULL)
- goto Exit;
-
- hbm_orig = (HBITMAP)(select_object(hdc_memory, hbm_temp));
-
- if (!hbm_orig)
- goto Exit;
-
- // Write text into bitmap
- // font?
- hfont = create_font(8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, VARIABLE_PITCH | FF_SWISS, "");
-
- if (hfont == NULL)
- goto Exit;
-
- hfonto = (HFONT)(select_object(hdc_memory, hbm_temp));
-
- if (!hfonto)
- goto Exit;
-
- select_object(hdc_memory, hfont);
- set_text_color(hdc_memory, 1);
- set_bk_color(hdc_memory, 0);
- set_bk_mode(hdc_memory, TRANSPARENT);
-
- b_rc = bit_blt(hdc_memory, rect.left, rect.top, rect.right, rect.bottom, hdc_memory, rect.left, rect.top, BLACKNESS);
-
- if (!b_rc)
- goto Exit;
-
- b_rc = ext_text_out(hdc_memory, 0, 0, ETO_CLIPPED, &rect, sz_formatted, strlen(sz_formatted), NULL);
-
- if (!b_rc)
- goto Exit;
-
- for (y = rect.top; y < rect.bottom; ++y)
- {
- for (x = rect.left; x < rect.right; ++x)
- {
- if (get_pixel(hdc_memory, x, rect.bottom - 1 - y))
- p_dest[x] = 255;
- }
-
- p_dest += n_stride;
- }
-
- rc = strlen(sz_formatted);
-
-Exit:
-
- if (hbm_temp != NULL)
- {
- if (hbm_orig != NULL)
- {
- select_object(hdc_memory, hbm_orig);
- }
-
- delete_object(hbm_temp);
- }
-
- if (hfont != NULL)
- {
- if (hfonto != NULL)
- select_object(hdc_memory, hfonto);
-
- delete_object(hfont);
- }
-
- if (hdc_memory != NULL)
- delete_dc(hdc_memory);
-
- hdc_memory = 0;
-
-#endif
-
- return rc;
-}
diff --git a/vp8/decoder/xprintf.h b/vp8/decoder/xprintf.h
deleted file mode 100644
index 2f175e943..000000000
--- a/vp8/decoder/xprintf.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-
-/****************************************************************************
-*
-* Module Title : xprintf.h
-*
-* Description : Debug print interface header file.
-*
-****************************************************************************/
-#ifndef __INC_XPRINTF_H
-#define __INC_XPRINTF_H
-
-/****************************************************************************
-* Header Files
-****************************************************************************/
-
-/****************************************************************************
-* Functions
-****************************************************************************/
-
-// Display a printf style message on the current video frame
-extern int onyx_xprintf(unsigned char *ppbuffer, long n_pixel, long n_size, long n_stride, const char *format, ...);
-
-#endif
diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c
new file mode 100644
index 000000000..a1f110260
--- /dev/null
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
+#include "variance.h"
+#include "onyx_int.h"
+
+extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
+extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
+extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
+
+void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+ int flags = cpi->common.rtcd.flags;
+ int has_edsp = flags & HAS_EDSP;
+ int has_media = flags & HAS_MEDIA;
+ int has_neon = flags & HAS_NEON;
+
+#if HAVE_ARMV6
+ if (has_media)
+ {
+ /*cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c;
+ cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c;
+ cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c;
+ cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c;
+ cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c;*/
+
+ /*cpi->rtcd.variance.var4x4 = vp8_variance4x4_c;
+ cpi->rtcd.variance.var8x8 = vp8_variance8x8_c;
+ cpi->rtcd.variance.var8x16 = vp8_variance8x16_c;
+ cpi->rtcd.variance.var16x8 = vp8_variance16x8_c;
+ cpi->rtcd.variance.var16x16 = vp8_variance16x16_c;*/
+
+ /*cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c;
+ cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c;
+ cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
+ cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;
+ cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c;*/
+
+ /*cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c;
+ cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
+
+ /*cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c;
+ cpi->rtcd.variance.get8x8var = vp8_get8x8var_c;
+ cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;;
+ cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c;*/
+
+ /*cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c;
+ cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c;
+ cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_c;
+ cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_c;*/
+ cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_armv6;
+
+ /*cpi->rtcd.encodemb.berr = vp8_block_error_c;
+ cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c;
+ cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c;
+ cpi->rtcd.encodemb.subb = vp8_subtract_b_c;
+ cpi->rtcd.encodemb.submby = vp8_subtract_mby_c;
+ cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_c;*/
+
+ /*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;
+ cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c;*/
+ }
+#endif
+
+#if HAVE_ARMV7
+ if (has_neon)
+ {
+ cpi->rtcd.variance.sad16x16 = vp8_sad16x16_neon;
+ cpi->rtcd.variance.sad16x8 = vp8_sad16x8_neon;
+ cpi->rtcd.variance.sad8x16 = vp8_sad8x16_neon;
+ cpi->rtcd.variance.sad8x8 = vp8_sad8x8_neon;
+ cpi->rtcd.variance.sad4x4 = vp8_sad4x4_neon;
+
+ /*cpi->rtcd.variance.var4x4 = vp8_variance4x4_c;*/
+ cpi->rtcd.variance.var8x8 = vp8_variance8x8_neon;
+ cpi->rtcd.variance.var8x16 = vp8_variance8x16_neon;
+ cpi->rtcd.variance.var16x8 = vp8_variance16x8_neon;
+ cpi->rtcd.variance.var16x16 = vp8_variance16x16_neon;
+
+ /*cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c;*/
+ cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_neon;
+ /*cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
+ cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;*/
+ cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_neon;
+ cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_neon;
+ cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_neon;
+ cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_neon;
+
+ cpi->rtcd.variance.mse16x16 = vp8_mse16x16_neon;
+ /*cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
+
+ cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_neon;
+ /*cpi->rtcd.variance.get8x8var = vp8_get8x8var_c;
+ cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;*/
+ cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_neon;
+
+ cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_neon;
+ cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_neon;
+ cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_neon;
+ cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_neon;
+ cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_neon;
+
+ /*cpi->rtcd.encodemb.berr = vp8_block_error_c;
+ cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c;
+ cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c;*/
+ cpi->rtcd.encodemb.subb = vp8_subtract_b_neon;
+ cpi->rtcd.encodemb.submby = vp8_subtract_mby_neon;
+ cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_neon;
+
+ /*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;
+ cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c;*/
+ /* The neon quantizer has not been updated to match the new exact
+ * quantizer introduced in commit e04e2935
+ */
+ /*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_neon;*/
+ }
+#endif
+
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (has_neon)
+#endif
+ {
+ vp8_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon;
+ }
+#endif
+#endif
+}
diff --git a/vp8/encoder/arm/neon/boolhuff_armv7.asm b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
index 9a5f36661..e78dc3322 100644
--- a/vp8/encoder/arm/neon/boolhuff_armv7.asm
+++ b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
@@ -204,17 +205,10 @@ token_count_lt_zero_se
ldr r5, [r0, #vp8_writer_range]
ldr r3, [r0, #vp8_writer_count]
- ; reverse the stream of bits to be packed. Normally
- ; the most significant bit is peeled off and compared
- ; in the form of (v >> --n) & 1. ARM architecture has
- ; the ability to set a flag based on the value of the
- ; bit shifted off the bottom of the register. To make
- ; that happen the bitstream is reversed.
- rbit r11, r1
rsb r4, r10, #32 ; 32-n
; v is kept in r1 during the token pack loop
- lsr r1, r11, r4 ; v >>= 32 - n
+ lsl r1, r1, r4 ; r1 = v << 32 - n
encode_value_loop
sub r7, r5, #1 ; range-1
@@ -222,7 +216,7 @@ encode_value_loop
; Decisions are made based on the bit value shifted
; off of v, so set a flag here based on this.
; This value is refered to as "bb"
- lsrs r1, r1, #1 ; bit = v >> n
+ lsls r1, r1, #1 ; bit = v >> n
mov r4, r7, lsl #7 ; ((range-1) * 128)
mov r7, #1
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
index 9c52c52f6..3233d2a96 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
@@ -1,14 +1,15 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp8cx_pack_tokens_armv7|
+ EXPORT |vp8cx_pack_tokens_armv5|
INCLUDE vpx_vp8_enc_asm_offsets.asm
@@ -24,7 +25,7 @@
; r3 vp8_coef_encodings
; s0 vp8_extra_bits
; s1 vp8_coef_tree
-|vp8cx_pack_tokens_armv7| PROC
+|vp8cx_pack_tokens_armv5| PROC
push {r4-r11, lr}
; Add size of xcount * sizeof (TOKENEXTRA) to get stop
@@ -56,18 +57,11 @@ while_p_lt_stop
movne lr, #2 ; i = 2
subne r8, r8, #1 ; --n
- ; reverse the stream of bits to be packed. Normally
- ; the most significant bit is peeled off and compared
- ; in the form of (v >> --n) & 1. ARM architecture has
- ; the ability to set a flag based on the value of the
- ; bit shifted off the bottom of the register. To make
- ; that happen the bitstream is reversed.
- rbit r12, r6
rsb r4, r8, #32 ; 32-n
ldr r10, [sp, #52] ; vp8_coef_tree
; v is kept in r12 during the token pack loop
- lsr r12, r12, r4 ; v >>= 32 - n
+ lsl r12, r6, r4 ; r12 = v << 32 - n
; loop start
token_loop
@@ -77,7 +71,7 @@ token_loop
; Decisions are made based on the bit value shifted
; off of v, so set a flag here based on this.
; This value is refered to as "bb"
- lsrs r12, r12, #1 ; bb = v >> n
+ lsls r12, r12, #1 ; bb = v >> n
mul r4, r4, r7 ; ((range-1) * pp[i>>1]))
; bb can only be 0 or 1. So only execute this statement
@@ -171,16 +165,15 @@ token_count_lt_zero
ldr r10, [r12, #vp8_extra_bit_struct_tree]
str r10, [sp, #4] ; b->tree
- rbit r12, r7 ; reverse v
rsb r4, r8, #32
- lsr r12, r12, r4
+ lsl r12, r7, r4
mov lr, #0 ; i = 0
extra_bits_loop
ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
sub r7, r5, #1 ; range-1
- lsrs r12, r12, #1 ; v >> n
+ lsls r12, r12, #1 ; v >> n
mul r4, r4, r7 ; (range-1) * pp[i>>1]
addcs lr, lr, #1 ; i + bb
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
index 92b098909..a9b552ae1 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
@@ -1,14 +1,15 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp8cx_pack_mb_row_tokens_armv7|
+ EXPORT |vp8cx_pack_mb_row_tokens_armv5|
INCLUDE vpx_vp8_enc_asm_offsets.asm
@@ -24,7 +25,7 @@
; r3 vp8_extra_bits
; s0 vp8_coef_tree
-|vp8cx_pack_mb_row_tokens_armv7| PROC
+|vp8cx_pack_mb_row_tokens_armv5| PROC
push {r4-r11, lr}
sub sp, sp, #24
@@ -77,18 +78,11 @@ while_p_lt_stop
movne lr, #2 ; i = 2
subne r8, r8, #1 ; --n
- ; reverse the stream of bits to be packed. Normally
- ; the most significant bit is peeled off and compared
- ; in the form of (v >> --n) & 1. ARM architecture has
- ; the ability to set a flag based on the value of the
- ; bit shifted off the bottom of the register. To make
- ; that happen the bitstream is reversed.
- rbit r12, r6
rsb r4, r8, #32 ; 32-n
ldr r10, [sp, #60] ; vp8_coef_tree
; v is kept in r12 during the token pack loop
- lsr r12, r12, r4 ; v >>= 32 - n
+ lsl r12, r6, r4 ; r12 = v << 32 - n
; loop start
token_loop
@@ -98,7 +92,7 @@ token_loop
; Decisions are made based on the bit value shifted
; off of v, so set a flag here based on this.
; This value is refered to as "bb"
- lsrs r12, r12, #1 ; bb = v >> n
+ lsls r12, r12, #1 ; bb = v >> n
mul r4, r4, r7 ; ((range-1) * pp[i>>1]))
; bb can only be 0 or 1. So only execute this statement
@@ -192,16 +186,15 @@ token_count_lt_zero
ldr r10, [r12, #vp8_extra_bit_struct_tree]
str r10, [sp, #4] ; b->tree
- rbit r12, r7 ; reverse v
rsb r4, r8, #32
- lsr r12, r12, r4
+ lsl r12, r7, r4
mov lr, #0 ; i = 0
extra_bits_loop
ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
sub r7, r5, #1 ; range-1
- lsrs r12, r12, #1 ; v >> n
+ lsls r12, r12, #1 ; v >> n
mul r4, r4, r7 ; (range-1) * pp[i>>1]
addcs lr, lr, #1 ; i + bb
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
index 6d5f882ed..0835164e5 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
@@ -1,14 +1,15 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp8cx_pack_tokens_into_partitions_armv7|
+ EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
INCLUDE vpx_vp8_enc_asm_offsets.asm
@@ -26,7 +27,7 @@
; s1 vp8_extra_bits,
; s2 const vp8_tree_index *,
-|vp8cx_pack_tokens_into_partitions_armv7| PROC
+|vp8cx_pack_tokens_into_partitions_armv5| PROC
push {r4-r11, lr}
sub sp, sp, #44
@@ -105,18 +106,11 @@ while_p_lt_stop
movne lr, #2 ; i = 2
subne r8, r8, #1 ; --n
- ; reverse the stream of bits to be packed. Normally
- ; the most significant bit is peeled off and compared
- ; in the form of (v >> --n) & 1. ARM architecture has
- ; the ability to set a flag based on the value of the
- ; bit shifted off the bottom of the register. To make
- ; that happen the bitstream is reversed.
- rbit r12, r6
rsb r4, r8, #32 ; 32-n
ldr r10, [sp, #88] ; vp8_coef_tree
; v is kept in r12 during the token pack loop
- lsr r12, r12, r4 ; v >>= 32 - n
+ lsl r12, r6, r4 ; r12 = v << 32 - n
; loop start
token_loop
@@ -126,7 +120,7 @@ token_loop
; Decisions are made based on the bit value shifted
; off of v, so set a flag here based on this.
; This value is refered to as "bb"
- lsrs r12, r12, #1 ; bb = v >> n
+ lsls r12, r12, #1 ; bb = v >> n
mul r4, r4, r7 ; ((range-1) * pp[i>>1]))
; bb can only be 0 or 1. So only execute this statement
@@ -220,16 +214,15 @@ token_count_lt_zero
ldr r10, [r12, #vp8_extra_bit_struct_tree]
str r10, [sp, #4] ; b->tree
- rbit r12, r7 ; reverse v
rsb r4, r8, #32
- lsr r12, r12, r4
+ lsl r12, r7, r4
mov lr, #0 ; i = 0
extra_bits_loop
ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
sub r7, r5, #1 ; range-1
- lsrs r12, r12, #1 ; v >> n
+ lsls r12, r12, #1 ; v >> n
mul r4, r4, r7 ; (range-1) * pp[i>>1]
addcs lr, lr, #1 ; i + bb
diff --git a/vp8/encoder/arm/armv6/walsh_v6.asm b/vp8/encoder/arm/armv6/walsh_v6.asm
index 608c9ae65..61ffdb315 100644
--- a/vp8/encoder/arm/armv6/walsh_v6.asm
+++ b/vp8/encoder/arm/armv6/walsh_v6.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_short_walsh4x4_armv6|
diff --git a/vp8/encoder/arm/boolhuff_arm.c b/vp8/encoder/arm/boolhuff_arm.c
index e70b3ad47..fe8e70c16 100644
--- a/vp8/encoder/arm/boolhuff_arm.c
+++ b/vp8/encoder/arm/boolhuff_arm.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/encoder/arm/csystemdependent.c b/vp8/encoder/arm/csystemdependent.c
deleted file mode 100644
index 003979680..000000000
--- a/vp8/encoder/arm/csystemdependent.c
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "variance.h"
-#include "onyx_int.h"
-
-void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-
-void vp8_cmachine_specific_config(VP8_COMP *cpi)
-{
-#if CONFIG_RUNTIME_CPU_DETECT
- cpi->rtcd.common = &cpi->common.rtcd;
-
-#if HAVE_ARMV7
- cpi->rtcd.variance.sad16x16 = vp8_sad16x16_neon;
- cpi->rtcd.variance.sad16x8 = vp8_sad16x8_neon;
- cpi->rtcd.variance.sad8x16 = vp8_sad8x16_neon;
- cpi->rtcd.variance.sad8x8 = vp8_sad8x8_neon;
- cpi->rtcd.variance.sad4x4 = vp8_sad4x4_neon;
-
- cpi->rtcd.variance.var4x4 = vp8_variance4x4_c;
- cpi->rtcd.variance.var8x8 = vp8_variance8x8_neon;
- cpi->rtcd.variance.var8x16 = vp8_variance8x16_neon;
- cpi->rtcd.variance.var16x8 = vp8_variance16x8_neon;
- cpi->rtcd.variance.var16x16 = vp8_variance16x16_neon;
-
- cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c;
- cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_neon;
- cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
- cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;
- cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_neon;
-
- cpi->rtcd.variance.mse16x16 = vp8_mse16x16_neon;
- cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;
-
- cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_neon;
- cpi->rtcd.variance.get8x8var = vp8_get8x8var_c;
- cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;;
- cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_neon;
-
- cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_neon;
- cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_neon;
- cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_neon;
- cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_neon;
- cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_neon;
-
- cpi->rtcd.encodemb.berr = vp8_block_error_c;
- cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c;
- cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c;
- cpi->rtcd.encodemb.subb = vp8_subtract_b_neon;
- cpi->rtcd.encodemb.submby = vp8_subtract_mby_neon;
- cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_neon;
-
- cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;
- cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_neon;
-#elif HAVE_ARMV6
- cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c;
- cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c;
- cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c;
- cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c;
- cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c;
-
- cpi->rtcd.variance.var4x4 = vp8_variance4x4_c;
- cpi->rtcd.variance.var8x8 = vp8_variance8x8_c;
- cpi->rtcd.variance.var8x16 = vp8_variance8x16_c;
- cpi->rtcd.variance.var16x8 = vp8_variance16x8_c;
- cpi->rtcd.variance.var16x16 = vp8_variance16x16_c;
-
- cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c;
- cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c;
- cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
- cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;
- cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c;
-
- cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c;
- cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;
-
- cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c;
- cpi->rtcd.variance.get8x8var = vp8_get8x8var_c;
- cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;;
- cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c;
-
- cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c;
- cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c;
- cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_c;
- cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_c;
- cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_armv6;
-
- cpi->rtcd.encodemb.berr = vp8_block_error_c;
- cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c;
- cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c;
- cpi->rtcd.encodemb.subb = vp8_subtract_b_c;
- cpi->rtcd.encodemb.submby = vp8_subtract_mby_c;
- cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_c;
-
- cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;
- cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c;
-#else
- //pure c
- cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c;
- cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c;
- cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c;
- cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c;
- cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c;
-
- cpi->rtcd.variance.var4x4 = vp8_variance4x4_c;
- cpi->rtcd.variance.var8x8 = vp8_variance8x8_c;
- cpi->rtcd.variance.var8x16 = vp8_variance8x16_c;
- cpi->rtcd.variance.var16x8 = vp8_variance16x8_c;
- cpi->rtcd.variance.var16x16 = vp8_variance16x16_c;
-
- cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c;
- cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c;
- cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
- cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;
- cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c;
-
- cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c;
- cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;
-
- cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c;
- cpi->rtcd.variance.get8x8var = vp8_get8x8var_c;
- cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;;
- cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c;
-
- cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c;
- cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c;
- cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_c;
- cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_c;
- cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c;
-
- cpi->rtcd.encodemb.berr = vp8_block_error_c;
- cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c;
- cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c;
- cpi->rtcd.encodemb.subb = vp8_subtract_b_c;
- cpi->rtcd.encodemb.submby = vp8_subtract_mby_c;
- cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_c;
-
- cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;
- cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c;
-#endif
-#endif
-
-#if HAVE_ARMV7
- vp8_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon;
-#else
- vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame;
-#endif
-}
diff --git a/vp8/encoder/arm/dct_arm.h b/vp8/encoder/arm/dct_arm.h
index a671862fb..41fa5d192 100644
--- a/vp8/encoder/arm/dct_arm.h
+++ b/vp8/encoder/arm/dct_arm.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -14,9 +15,11 @@
#if HAVE_ARMV6
extern prototype_fdct(vp8_short_walsh4x4_armv6);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_fdct_walsh_short4x4
#define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_armv6
#endif
+#endif
#if HAVE_ARMV7
extern prototype_fdct(vp8_short_fdct4x4_neon);
@@ -25,6 +28,7 @@ extern prototype_fdct(vp8_fast_fdct4x4_neon);
extern prototype_fdct(vp8_fast_fdct8x4_neon);
extern prototype_fdct(vp8_short_walsh4x4_neon);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_fdct_short4x4
#define vp8_fdct_short4x4 vp8_short_fdct4x4_neon
@@ -39,6 +43,7 @@ extern prototype_fdct(vp8_short_walsh4x4_neon);
#undef vp8_fdct_walsh_short4x4
#define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_neon
+#endif
#endif
diff --git a/vp8/encoder/arm/encodemb_arm.c b/vp8/encoder/arm/encodemb_arm.c
index 3f1d05391..cc9e014b2 100644
--- a/vp8/encoder/arm/encodemb_arm.c
+++ b/vp8/encoder/arm/encodemb_arm.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/encoder/arm/encodemb_arm.h b/vp8/encoder/arm/encodemb_arm.h
index 28f9e5c5f..8fe453735 100644
--- a/vp8/encoder/arm/encodemb_arm.h
+++ b/vp8/encoder/arm/encodemb_arm.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -29,6 +30,7 @@ extern prototype_submbuv(vp8_subtract_mbuv_neon);
//#undef vp8_encodemb_mbuverr
//#define vp8_encodemb_mbuverr vp8_mbuverror_c
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_encodemb_subb
#define vp8_encodemb_subb vp8_subtract_b_neon
@@ -37,6 +39,7 @@ extern prototype_submbuv(vp8_subtract_mbuv_neon);
#undef vp8_encodemb_submbuv
#define vp8_encodemb_submbuv vp8_subtract_mbuv_neon
+#endif
#endif
diff --git a/vp8/encoder/arm/mcomp_arm.c b/vp8/encoder/arm/mcomp_arm.c
deleted file mode 100644
index 07f218605..000000000
--- a/vp8/encoder/arm/mcomp_arm.c
+++ /dev/null
@@ -1,1662 +0,0 @@
-/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-
-#include "mcomp.h"
-#include "vpx_mem/vpx_mem.h"
-
-#include <stdio.h>
-#include <limits.h>
-#include <math.h>
-
-#ifdef ENTROPY_STATS
-static int mv_ref_ct [31] [4] [2];
-static int mv_mode_cts [4] [2];
-#endif
-
-static int mv_bits_sadcost[256];
-
-extern unsigned int vp8_sub_pixel_variance16x16s_neon
-(
- unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse
-);
-extern unsigned int vp8_sub_pixel_variance16x16s_4_0_neon
-(
- unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse
-);
-extern unsigned int vp8_sub_pixel_variance16x16s_0_4_neon
-(
- unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse
-);
-extern unsigned int vp8_sub_pixel_variance16x16s_4_4_neon
-(
- unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse
-);
-
-void vp8cx_init_mv_bits_sadcost()
-{
- int i;
-
- for (i = 0; i < 256; i++)
- {
- mv_bits_sadcost[i] = (int)sqrt(i * 16);
- }
-}
-
-
-int vp8_mv_bit_cost(MV *mv, MV *ref, int *mvcost[2], int Weight)
-{
- // MV costing is based on the distribution of vectors in the previous frame and as such will tend to
- // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
- // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
- // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
- return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col) >> 1]) * Weight) >> 7;
-}
-
-int vp8_mv_err_cost(MV *mv, MV *ref, int *mvcost[2], int error_per_bit)
-{
- //int i;
- //return ((mvcost[0][(mv->row - ref->row)>>1] + mvcost[1][(mv->col - ref->col)>>1] + 128) * error_per_bit) >> 8;
- //return ( (vp8_mv_bit_cost(mv, ref, mvcost, 100) + 128) * error_per_bit) >> 8;
-
- //i = (vp8_mv_bit_cost(mv, ref, mvcost, 100) * error_per_bit + 128) >> 8;
- return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col) >> 1]) * error_per_bit + 128) >> 8;
- //return (vp8_mv_bit_cost(mv, ref, mvcost, 128) * error_per_bit + 128) >> 8;
-}
-
-
-static int mv_bits(MV *mv, MV *ref, int *mvcost[2])
-{
- // get the estimated number of bits for a motion vector, to be used for costing in SAD based
- // motion estimation
- return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col)>> 1]) + 128) >> 8;
-}
-
-void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
-{
- int Len;
- int search_site_count = 0;
-
-
- // Generate offsets for 4 search sites per step.
- Len = MAX_FIRST_STEP;
- x->ss[search_site_count].mv.col = 0;
- x->ss[search_site_count].mv.row = 0;
- x->ss[search_site_count].offset = 0;
- search_site_count++;
-
- while (Len > 0)
- {
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = 0;
- x->ss[search_site_count].mv.row = -Len;
- x->ss[search_site_count].offset = -Len * stride;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = 0;
- x->ss[search_site_count].mv.row = Len;
- x->ss[search_site_count].offset = Len * stride;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = -Len;
- x->ss[search_site_count].mv.row = 0;
- x->ss[search_site_count].offset = -Len;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = Len;
- x->ss[search_site_count].mv.row = 0;
- x->ss[search_site_count].offset = Len;
- search_site_count++;
-
- // Contract.
- Len /= 2;
- }
-
- x->ss_count = search_site_count;
- x->searches_per_step = 4;
-}
-
-void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
-{
- int Len;
- int search_site_count = 0;
-
- // Generate offsets for 8 search sites per step.
- Len = MAX_FIRST_STEP;
- x->ss[search_site_count].mv.col = 0;
- x->ss[search_site_count].mv.row = 0;
- x->ss[search_site_count].offset = 0;
- search_site_count++;
-
- while (Len > 0)
- {
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = 0;
- x->ss[search_site_count].mv.row = -Len;
- x->ss[search_site_count].offset = -Len * stride;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = 0;
- x->ss[search_site_count].mv.row = Len;
- x->ss[search_site_count].offset = Len * stride;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = -Len;
- x->ss[search_site_count].mv.row = 0;
- x->ss[search_site_count].offset = -Len;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = Len;
- x->ss[search_site_count].mv.row = 0;
- x->ss[search_site_count].offset = Len;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = -Len;
- x->ss[search_site_count].mv.row = -Len;
- x->ss[search_site_count].offset = -Len * stride - Len;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = Len;
- x->ss[search_site_count].mv.row = -Len;
- x->ss[search_site_count].offset = -Len * stride + Len;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = -Len;
- x->ss[search_site_count].mv.row = Len;
- x->ss[search_site_count].offset = Len * stride - Len;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = Len;
- x->ss[search_site_count].mv.row = Len;
- x->ss[search_site_count].offset = Len * stride + Len;
- search_site_count++;
-
-
- // Contract.
- Len /= 2;
- }
-
- x->ss_count = search_site_count;
- x->searches_per_step = 8;
-}
-
-
-#define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
-#define PRE(r,c) (*(d->base_pre) + d->pre + ((r)>>2) * d->pre_stride + ((c)>>2)) // pointer to predictor base of a motionvector
-#define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc
-#define DIST(r,c) svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
-#define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
-#define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
-#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
-#define MIN(x,y) (((x)<(y))?(x):(y))
-#define MAX(x,y) (((x)>(y))?(x):(y))
-
-//#define CHECK_BETTER(v,r,c) if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }
-
-int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
-{
- unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
- unsigned char *z = (*(b->base_src) + b->src);
-
- int rr = ref_mv->row >> 1, rc = ref_mv->col >> 1;
- int br = bestmv->row << 2, bc = bestmv->col << 2;
- int tr = br, tc = bc;
- unsigned int besterr = INT_MAX;
- unsigned int left, right, up, down, diag;
- unsigned int sse;
- unsigned int whichdir;
- unsigned int halfiters = 4;
- unsigned int quarteriters = 4;
-
- int minc = MAX(x->mv_col_min << 2, (ref_mv->col >> 1) - ((1 << mvlong_width) - 1));
- int maxc = MIN(x->mv_col_max << 2, (ref_mv->col >> 1) + ((1 << mvlong_width) - 1));
- int minr = MAX(x->mv_row_min << 2, (ref_mv->row >> 1) - ((1 << mvlong_width) - 1));
- int maxr = MIN(x->mv_row_max << 2, (ref_mv->row >> 1) + ((1 << mvlong_width) - 1));
-
- // central mv
- bestmv->row <<= 3;
- bestmv->col <<= 3;
-
- // calculate central point error
- besterr = vf(y, d->pre_stride, z, b->src_stride, &sse);
- besterr += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
-
- // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
- while (--halfiters)
- {
- // 1/2 pel
- CHECK_BETTER(left, tr, tc - 2);
- CHECK_BETTER(right, tr, tc + 2);
- CHECK_BETTER(up, tr - 2, tc);
- CHECK_BETTER(down, tr + 2, tc);
-
- whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
- switch (whichdir)
- {
- case 0:
- CHECK_BETTER(diag, tr - 2, tc - 2);
- break;
- case 1:
- CHECK_BETTER(diag, tr - 2, tc + 2);
- break;
- case 2:
- CHECK_BETTER(diag, tr + 2, tc - 2);
- break;
- case 3:
- CHECK_BETTER(diag, tr + 2, tc + 2);
- break;
- }
-
- // no reason to check the same one again.
- if (tr == br && tc == bc)
- break;
-
- tr = br;
- tc = bc;
- }
-
- // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
- // 1/4 pel
- while (--quarteriters)
- {
- CHECK_BETTER(left, tr, tc - 1);
- CHECK_BETTER(right, tr, tc + 1);
- CHECK_BETTER(up, tr - 1, tc);
- CHECK_BETTER(down, tr + 1, tc);
-
- whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
- switch (whichdir)
- {
- case 0:
- CHECK_BETTER(diag, tr - 1, tc - 1);
- break;
- case 1:
- CHECK_BETTER(diag, tr - 1, tc + 1);
- break;
- case 2:
- CHECK_BETTER(diag, tr + 1, tc - 1);
- break;
- case 3:
- CHECK_BETTER(diag, tr + 1, tc + 1);
- break;
- }
-
- // no reason to check the same one again.
- if (tr == br && tc == bc)
- break;
-
- tr = br;
- tc = bc;
- }
-
- bestmv->row = br << 1;
- bestmv->col = bc << 1;
-
- if ((abs(bestmv->col - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs(bestmv->row - ref_mv->row) > MAX_FULL_PEL_VAL))
- return INT_MAX;
-
- return besterr;
-}
-#undef MVC
-#undef PRE
-#undef SP
-#undef DIST
-#undef ERR
-#undef CHECK_BETTER
-#undef MIN
-#undef MAX
-int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
-{
- int bestmse = INT_MAX;
- MV startmv;
- //MV this_mv;
- MV this_mv;
- unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
- unsigned char *z = (*(b->base_src) + b->src);
- int left, right, up, down, diag;
- unsigned int sse;
- int whichdir ;
-
-
- // Trap uncodable vectors
- if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL))
- {
- bestmv->row <<= 3;
- bestmv->col <<= 3;
- return INT_MAX;
- }
-
- // central mv
- bestmv->row <<= 3;
- bestmv->col <<= 3;
- startmv = *bestmv;
-
- // calculate central point error
- bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse);
- bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
-
- // go left then right and check error
- this_mv.row = startmv.row;
- this_mv.col = ((startmv.col - 8) | 4);
- left = vp8_sub_pixel_variance16x16s_4_0_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);
- left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (left < bestmse)
- {
- *bestmv = this_mv;
- bestmse = left;
- }
-
- this_mv.col += 8;
- right = vp8_sub_pixel_variance16x16s_4_0_neon(y, d->pre_stride, z, b->src_stride, &sse);
- right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (right < bestmse)
- {
- *bestmv = this_mv;
- bestmse = right;
- }
-
- // go up then down and check error
- this_mv.col = startmv.col;
- this_mv.row = ((startmv.row - 8) | 4);
- up = vp8_sub_pixel_variance16x16s_0_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
- up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (up < bestmse)
- {
- *bestmv = this_mv;
- bestmse = up;
- }
-
- this_mv.row += 8;
- down = vp8_sub_pixel_variance16x16s_0_4_neon(y, d->pre_stride, z, b->src_stride, &sse);
- down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (down < bestmse)
- {
- *bestmv = this_mv;
- bestmse = down;
- }
-
-
- // now check 1 more diagonal
- whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
- //for(whichdir =0;whichdir<4;whichdir++)
- //{
- this_mv = startmv;
-
- switch (whichdir)
- {
- case 0:
- this_mv.col = (this_mv.col - 8) | 4;
- this_mv.row = (this_mv.row - 8) | 4;
- diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
- break;
- case 1:
- this_mv.col += 4;
- this_mv.row = (this_mv.row - 8) | 4;
- diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
- break;
- case 2:
- this_mv.col = (this_mv.col - 8) | 4;
- this_mv.row += 4;
- diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);
- break;
- case 3:
- this_mv.col += 4;
- this_mv.row += 4;
- diag = vp8_sub_pixel_variance16x16s_4_4_neon(y, d->pre_stride, z, b->src_stride, &sse);
- break;
- }
-
- diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (diag < bestmse)
- {
- *bestmv = this_mv;
- bestmse = diag;
- }
-
-// }
-
-
- // time to check quarter pels.
- if (bestmv->row < startmv.row)
- y -= d->pre_stride;
-
- if (bestmv->col < startmv.col)
- y--;
-
- startmv = *bestmv;
-
-
-
- // go left then right and check error
- this_mv.row = startmv.row;
-
- if (startmv.col & 7)
- {
- this_mv.col = startmv.col - 2;
- left = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
- }
- else
- {
- this_mv.col = (startmv.col - 8) | 6;
- left = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);
- }
-
- left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (left < bestmse)
- {
- *bestmv = this_mv;
- bestmse = left;
- }
-
- this_mv.col += 4;
- right = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
- right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (right < bestmse)
- {
- *bestmv = this_mv;
- bestmse = right;
- }
-
- // go up then down and check error
- this_mv.col = startmv.col;
-
- if (startmv.row & 7)
- {
- this_mv.row = startmv.row - 2;
- up = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
- }
- else
- {
- this_mv.row = (startmv.row - 8) | 6;
- up = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
- }
-
- up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (up < bestmse)
- {
- *bestmv = this_mv;
- bestmse = up;
- }
-
- this_mv.row += 4;
- down = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
- down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (down < bestmse)
- {
- *bestmv = this_mv;
- bestmse = down;
- }
-
-
- // now check 1 more diagonal
- whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
-// for(whichdir=0;whichdir<4;whichdir++)
-// {
- this_mv = startmv;
-
- switch (whichdir)
- {
- case 0:
-
- if (startmv.row & 7)
- {
- this_mv.row -= 2;
-
- if (startmv.col & 7)
- {
- this_mv.col -= 2;
- diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
- }
- else
- {
- this_mv.col = (startmv.col - 8) | 6;
- diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
- }
- }
- else
- {
- this_mv.row = (startmv.row - 8) | 6;
-
- if (startmv.col & 7)
- {
- this_mv.col -= 2;
- diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
- }
- else
- {
- this_mv.col = (startmv.col - 8) | 6;
- diag = svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);
- }
- }
-
- break;
- case 1:
- this_mv.col += 2;
-
- if (startmv.row & 7)
- {
- this_mv.row -= 2;
- diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
- }
- else
- {
- this_mv.row = (startmv.row - 8) | 6;
- diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
- }
-
- break;
- case 2:
- this_mv.row += 2;
-
- if (startmv.col & 7)
- {
- this_mv.col -= 2;
- diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
- }
- else
- {
- this_mv.col = (startmv.col - 8) | 6;
- diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
- }
-
- break;
- case 3:
- this_mv.col += 2;
- this_mv.row += 2;
- diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
- break;
- }
-
- diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (diag < bestmse)
- {
- *bestmv = this_mv;
- bestmse = diag;
- }
-
-// }
-
- return bestmse;
-}
-
-int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
-{
- int bestmse = INT_MAX;
- MV startmv;
- //MV this_mv;
- MV this_mv;
- unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
- unsigned char *z = (*(b->base_src) + b->src);
- int left, right, up, down, diag;
- unsigned int sse;
-
- // Trap uncodable vectors
- if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL))
- {
- bestmv->row <<= 3;
- bestmv->col <<= 3;
- return INT_MAX;
- }
-
- // central mv
- bestmv->row <<= 3;
- bestmv->col <<= 3;
- startmv = *bestmv;
-
- // calculate central point error
- bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse);
- bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
-
- // go left then right and check error
- this_mv.row = startmv.row;
- this_mv.col = ((startmv.col - 8) | 4);
- left = vp8_sub_pixel_variance16x16s_4_0_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);
- left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (left < bestmse)
- {
- *bestmv = this_mv;
- bestmse = left;
- }
-
- this_mv.col += 8;
- right = vp8_sub_pixel_variance16x16s_4_0_neon(y, d->pre_stride, z, b->src_stride, &sse);
- right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (right < bestmse)
- {
- *bestmv = this_mv;
- bestmse = right;
- }
-
- // go up then down and check error
- this_mv.col = startmv.col;
- this_mv.row = ((startmv.row - 8) | 4);
- up = vp8_sub_pixel_variance16x16s_0_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
- up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (up < bestmse)
- {
- *bestmv = this_mv;
- bestmse = up;
- }
-
- this_mv.row += 8;
- down = vp8_sub_pixel_variance16x16s_0_4_neon(y, d->pre_stride, z, b->src_stride, &sse);
- down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (down < bestmse)
- {
- *bestmv = this_mv;
- bestmse = down;
- }
-
- // somewhat strangely not doing all the diagonals for half pel is slower than doing them.
-#if 0
- // now check 1 more diagonal -
- whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
- this_mv = startmv;
-
- switch (whichdir)
- {
- case 0:
- this_mv.col = (this_mv.col - 8) | 4;
- this_mv.row = (this_mv.row - 8) | 4;
- diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
- break;
- case 1:
- this_mv.col += 4;
- this_mv.row = (this_mv.row - 8) | 4;
- diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
- break;
- case 2:
- this_mv.col = (this_mv.col - 8) | 4;
- this_mv.row += 4;
- diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
- break;
- case 3:
- this_mv.col += 4;
- this_mv.row += 4;
- diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
- break;
- }
-
- diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (diag < bestmse)
- {
- *bestmv = this_mv;
- bestmse = diag;
- }
-
-#else
- this_mv.col = (this_mv.col - 8) | 4;
- this_mv.row = (this_mv.row - 8) | 4;
- diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
- diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (diag < bestmse)
- {
- *bestmv = this_mv;
- bestmse = diag;
- }
-
- this_mv.col += 8;
- diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
- diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (diag < bestmse)
- {
- *bestmv = this_mv;
- bestmse = diag;
- }
-
- this_mv.col = (this_mv.col - 8) | 4;
- this_mv.row = startmv.row + 4;
- diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);
- diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (diag < bestmse)
- {
- *bestmv = this_mv;
- bestmse = diag;
- }
-
- this_mv.col += 8;
- diag = vp8_sub_pixel_variance16x16s_4_4_neon(y, d->pre_stride, z, b->src_stride, &sse);
- diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
- if (diag < bestmse)
- {
- *bestmv = this_mv;
- bestmse = diag;
- }
-
-#endif
- return bestmse;
-}
-
-#if 1
-
-#define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
-#define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector
-#define DIST(r,c,v) sf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score.
-#define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost
-#define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best
-const MV next_chkpts[6][3] =
-{
- {{ -2, 0}, { -1, -2}, {1, -2}},
- {{ -1, -2}, {1, -2}, {2, 0}},
- {{1, -2}, {2, 0}, {1, 2}},
- {{2, 0}, {1, 2}, { -1, 2}},
- {{1, 2}, { -1, 2}, { -2, 0}},
- {{ -1, 2}, { -2, 0}, { -1, -2}}
-};
-int vp8_hex_search
-(
- MACROBLOCK *x,
- BLOCK *b,
- BLOCKD *d,
- MV *ref_mv,
- MV *best_mv,
- int search_param,
- int error_per_bit,
- int *num00,
- vp8_variance_fn_t vf,
- vp8_sad_fn_t sf,
- int *mvsadcost[2],
- int *mvcost[2]
-)
-{
- MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
- MV neighbors[8] = { { -1, -1}, { -1, 0}, { -1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1} } ;
- int i, j;
- unsigned char *src = (*(b->base_src) + b->src);
- int src_stride = b->src_stride;
- int rr = ref_mv->row, rc = ref_mv->col, br = rr >> 3, bc = rc >> 3, tr, tc;
- unsigned int besterr, thiserr = 0x7fffffff;
- int k = -1, tk;
-
- if (bc < x->mv_col_min) bc = x->mv_col_min;
-
- if (bc > x->mv_col_max) bc = x->mv_col_max;
-
- if (br < x->mv_row_min) br = x->mv_row_min;
-
- if (br > x->mv_row_max) br = x->mv_row_max;
-
- rr >>= 1;
- rc >>= 1;
-
- besterr = ERR(br, bc, thiserr);
-
- // hex search
- //j=0
- tr = br;
- tc = bc;
-
- for (i = 0; i < 6; i++)
- {
- int nr = tr + hex[i].row, nc = tc + hex[i].col;
-
- if (nc < x->mv_col_min) continue;
-
- if (nc > x->mv_col_max) continue;
-
- if (nr < x->mv_row_min) continue;
-
- if (nr > x->mv_row_max) continue;
-
- //CHECK_BETTER(thiserr,nr,nc);
- if ((thiserr = ERR(nr, nc, besterr)) < besterr)
- {
- besterr = thiserr;
- br = nr;
- bc = nc;
- k = i;
- }
- }
-
- if (tr == br && tc == bc)
- goto cal_neighbors;
-
- for (j = 1; j < 127; j++)
- {
- tr = br;
- tc = bc;
- tk = k;
-
- for (i = 0; i < 3; i++)
- {
- int nr = tr + next_chkpts[tk][i].row, nc = tc + next_chkpts[tk][i].col;
-
- if (nc < x->mv_col_min) continue;
-
- if (nc > x->mv_col_max) continue;
-
- if (nr < x->mv_row_min) continue;
-
- if (nr > x->mv_row_max) continue;
-
- //CHECK_BETTER(thiserr,nr,nc);
- if ((thiserr = ERR(nr, nc, besterr)) < besterr)
- {
- besterr = thiserr;
- br = nr;
- bc = nc; //k=(tk+5+i)%6;}
- k = tk + 5 + i;
-
- if (k >= 12) k -= 12;
- else if (k >= 6) k -= 6;
- }
- }
-
- if (tr == br && tc == bc)
- break;
- }
-
- // check 8 1 away neighbors
-cal_neighbors:
- tr = br;
- tc = bc;
-
- for (i = 0; i < 8; i++)
- {
- int nr = tr + neighbors[i].row, nc = tc + neighbors[i].col;
-
- if (nc < x->mv_col_min) continue;
-
- if (nc > x->mv_col_max) continue;
-
- if (nr < x->mv_row_min) continue;
-
- if (nr > x->mv_row_max) continue;
-
- CHECK_BETTER(thiserr, nr, nc);
- }
-
- best_mv->row = br;
- best_mv->col = bc;
-
- return vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ;
-}
-#undef MVC
-#undef PRE
-#undef SP
-#undef DIST
-#undef ERR
-#undef CHECK_BETTER
-
-#else
-
-#define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
-#define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector
-#define DIST(r,c,v) sf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score.
-#define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost
-#define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best
-
-int vp8_hex_search
-(
- MACROBLOCK *x,
- BLOCK *b,
- BLOCKD *d,
- MV *ref_mv,
- MV *best_mv,
- int search_param,
- int error_per_bit,
- int *num00,
- vp8_variance_fn_t vf,
- vp8_sad_fn_t sf,
- int *mvsadcost[2],
- int *mvcost[2]
-)
-{
- MV hex[6] = { { -2, 0}, { -1, -2}, { -1, 2}, {2, 0}, {1, 2}, {1, -2} } ;
- MV neighbors[8] = { { -1, -1}, { -1, 0}, { -1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1} } ;
- int i, j;
- unsigned char *src = (*(b->base_src) + b->src);
- int src_stride = b->src_stride;
- //int rr= ref_mv->row,rc= ref_mv->col,br=rr,bc=rc,tr,tc;
- int rr = ref_mv->row, rc = ref_mv->col, br = rr >> 3, bc = rc >> 3, tr, tc;
- unsigned int besterr, thiserr = 0x7fffffff;
-
- /*
- if ( rc < x->mv_col_min) bc = x->mv_col_min;
- if ( rc > x->mv_col_max) bc = x->mv_col_max;
- if ( rr < x->mv_row_min) br = x->mv_row_min;
- if ( rr > x->mv_row_max) br = x->mv_row_max;
- rr>>=1;
- rc>>=1;
- br>>=3;
- bc>>=3;
- */
- if (bc < x->mv_col_min) bc = x->mv_col_min;
-
- if (bc > x->mv_col_max) bc = x->mv_col_max;
-
- if (br < x->mv_row_min) br = x->mv_row_min;
-
- if (br > x->mv_row_max) br = x->mv_row_max;
-
- rr >>= 1;
- rc >>= 1;
-
- besterr = ERR(br, bc, thiserr);
-
- // hex search jbb changed to 127 to avoid max 256 problem steping by 2.
- for (j = 0; j < 127; j++)
- {
- tr = br;
- tc = bc;
-
- for (i = 0; i < 6; i++)
- {
- int nr = tr + hex[i].row, nc = tc + hex[i].col;
-
- if (nc < x->mv_col_min) continue;
-
- if (nc > x->mv_col_max) continue;
-
- if (nr < x->mv_row_min) continue;
-
- if (nr > x->mv_row_max) continue;
-
- CHECK_BETTER(thiserr, nr, nc);
- }
-
- if (tr == br && tc == bc)
- break;
- }
-
- // check 8 1 away neighbors
- tr = br;
- tc = bc;
-
- for (i = 0; i < 8; i++)
- {
- int nr = tr + neighbors[i].row, nc = tc + neighbors[i].col;
-
- if (nc < x->mv_col_min) continue;
-
- if (nc > x->mv_col_max) continue;
-
- if (nr < x->mv_row_min) continue;
-
- if (nr > x->mv_row_max) continue;
-
- CHECK_BETTER(thiserr, nr, nc);
- }
-
- best_mv->row = br;
- best_mv->col = bc;
-
- return vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ;
-}
-#undef MVC
-#undef PRE
-#undef SP
-#undef DIST
-#undef ERR
-#undef CHECK_BETTER
-
-#endif
-
-int vp8_diamond_search_sad
-(
- MACROBLOCK *x,
- BLOCK *b,
- BLOCKD *d,
- MV *ref_mv,
- MV *best_mv,
- int search_param,
- int error_per_bit,
- int *num00,
- vp8_variance_fn_ptr_t *fn_ptr,
- int *mvsadcost[2],
- int *mvcost[2]
-)
-{
- int i, j, step;
-
- unsigned char *what = (*(b->base_src) + b->src);
- int what_stride = b->src_stride;
- unsigned char *in_what;
- int in_what_stride = d->pre_stride;
- unsigned char *best_address;
-
- int tot_steps;
- MV this_mv;
-
- int bestsad = INT_MAX;
- int best_site = 0;
- int last_site = 0;
-
- int ref_row = ref_mv->row >> 3;
- int ref_col = ref_mv->col >> 3;
- int this_row_offset;
- int this_col_offset;
- search_site *ss;
-
- unsigned char *check_here;
- int thissad;
-
- // Work out the start point for the search
- in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
- best_address = in_what;
-
- // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
- if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
- (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
- {
- // Check the starting position
- bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
- }
-
- // search_param determines the length of the initial step and hence the number of iterations
- // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
- ss = &x->ss[search_param * x->searches_per_step];
- tot_steps = (x->ss_count / x->searches_per_step) - search_param;
-
- i = 1;
- best_mv->row = ref_row;
- best_mv->col = ref_col;
-
- *num00 = 0;
-
- for (step = 0; step < tot_steps ; step++)
- {
- for (j = 0 ; j < x->searches_per_step ; j++)
- {
- // Trap illegal vectors
- this_row_offset = best_mv->row + ss[i].mv.row;
- this_col_offset = best_mv->col + ss[i].mv.col;
-
- if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
- (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
-
- {
- check_here = ss[i].offset + best_address;
- thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
-
- if (thissad < bestsad)
- {
- this_mv.row = this_row_offset << 3;
- this_mv.col = this_col_offset << 3;
- thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
-
- if (thissad < bestsad)
- {
- bestsad = thissad;
- best_site = i;
- }
- }
- }
-
- i++;
- }
-
- if (best_site != last_site)
- {
- best_mv->row += ss[best_site].mv.row;
- best_mv->col += ss[best_site].mv.col;
- best_address += ss[best_site].offset;
- last_site = best_site;
- }
- else if (best_address == in_what)
- (*num00)++;
- }
-
- this_mv.row = best_mv->row << 3;
- this_mv.col = best_mv->col << 3;
-
- if (bestsad == INT_MAX)
- return INT_MAX;
-
- return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
- + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-}
-
-int vp8_diamond_search_sadx4
-(
- MACROBLOCK *x,
- BLOCK *b,
- BLOCKD *d,
- MV *ref_mv,
- MV *best_mv,
- int search_param,
- int error_per_bit,
- int *num00,
- vp8_variance_fn_ptr_t *fn_ptr,
- int *mvsadcost[2],
- int *mvcost[2]
-)
-{
- int i, j, step;
-
- unsigned char *what = (*(b->base_src) + b->src);
- int what_stride = b->src_stride;
- unsigned char *in_what;
- int in_what_stride = d->pre_stride;
- unsigned char *best_address;
-
- int tot_steps;
- MV this_mv;
-
- int bestsad = INT_MAX;
- int best_site = 0;
- int last_site = 0;
-
- int ref_row = ref_mv->row >> 3;
- int ref_col = ref_mv->col >> 3;
- int this_row_offset;
- int this_col_offset;
- search_site *ss;
-
- unsigned char *check_here;
- int thissad;
-
- // Work out the start point for the search
- in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
- best_address = in_what;
-
- // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
- if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
- (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
- {
- // Check the starting position
- bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
- }
-
- // search_param determines the length of the initial step and hence the number of iterations
- // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
- ss = &x->ss[search_param * x->searches_per_step];
- tot_steps = (x->ss_count / x->searches_per_step) - search_param;
-
- i = 1;
- best_mv->row = ref_row;
- best_mv->col = ref_col;
-
- *num00 = 0;
-
- for (step = 0; step < tot_steps ; step++)
- {
- int check_row_min, check_col_min, check_row_max, check_col_max;
-
- check_row_min = x->mv_row_min - best_mv->row;
- check_row_max = x->mv_row_max - best_mv->row;
- check_col_min = x->mv_col_min - best_mv->col;
- check_col_max = x->mv_col_max - best_mv->col;
-
- for (j = 0 ; j < x->searches_per_step ; j += 4)
- {
- char *block_offset[4];
- unsigned int valid_block[4];
- int all_in = 1, t;
-
- for (t = 0; t < 4; t++)
- {
- valid_block [t] = (ss[t+i].mv.col > check_col_min);
- valid_block [t] &= (ss[t+i].mv.col < check_col_max);
- valid_block [t] &= (ss[t+i].mv.row > check_row_min);
- valid_block [t] &= (ss[t+i].mv.row < check_row_max);
-
- all_in &= valid_block[t];
- block_offset[t] = ss[i+t].offset + best_address;
- }
-
- if (all_in)
- {
- int sad_array[4];
-
- fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
-
- for (t = 0; t < 4; t++, i++)
- {
- thissad = sad_array[t];
-
- if (thissad < bestsad)
- {
- this_mv.row = (best_mv->row + ss[i].mv.row) << 3;
- this_mv.col = (best_mv->col + ss[i].mv.col) << 3;
- thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
-
- if (thissad < bestsad)
- {
- bestsad = thissad;
- best_site = i;
- }
- }
- }
- }
- else
- {
- int t;
-
- for (t = 0; t < 4; i++, t++)
- {
- // Trap illegal vectors
- if (valid_block[t])
-
- {
- check_here = block_offset[t];
- thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
-
- if (thissad < bestsad)
- {
- this_row_offset = best_mv->row + ss[i].mv.row;
- this_col_offset = best_mv->col + ss[i].mv.col;
-
- this_mv.row = this_row_offset << 3;
- this_mv.col = this_col_offset << 3;
- thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
-
- if (thissad < bestsad)
- {
- bestsad = thissad;
- best_site = i;
- }
- }
- }
- }
- }
- }
-
- if (best_site != last_site)
- {
- best_mv->row += ss[best_site].mv.row;
- best_mv->col += ss[best_site].mv.col;
- best_address += ss[best_site].offset;
- last_site = best_site;
- }
- else if (best_address == in_what)
- (*num00)++;
- }
-
- this_mv.row = best_mv->row << 3;
- this_mv.col = best_mv->col << 3;
-
- if (bestsad == INT_MAX)
- return INT_MAX;
-
- return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
- + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-}
-
-
-#if !(CONFIG_REALTIME_ONLY)
-int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
-{
- unsigned char *what = (*(b->base_src) + b->src);
- int what_stride = b->src_stride;
- unsigned char *in_what;
- int in_what_stride = d->pre_stride;
- int mv_stride = d->pre_stride;
- unsigned char *bestaddress;
- MV *best_mv = &d->bmi.mv.as_mv;
- MV this_mv;
- int bestsad = INT_MAX;
- int r, c;
-
- unsigned char *check_here;
- int thissad;
-
- int ref_row = ref_mv->row >> 3;
- int ref_col = ref_mv->col >> 3;
-
- int row_min = ref_row - distance;
- int row_max = ref_row + distance;
- int col_min = ref_col - distance;
- int col_max = ref_col + distance;
-
- // Work out the mid point for the search
- in_what = *(d->base_pre) + d->pre;
- bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
-
- best_mv->row = ref_row;
- best_mv->col = ref_col;
-
- // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
- if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
- (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
- {
- // Baseline value at the centre
-
- //bestsad = fn_ptr->sf( what,what_stride,bestaddress,in_what_stride) + (int)sqrt(vp8_mv_err_cost(ref_mv,ref_mv, mvcost,error_per_bit*14));
- bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
- }
-
- // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
- if (col_min < x->mv_col_min)
- col_min = x->mv_col_min;
-
- if (col_max > x->mv_col_max)
- col_max = x->mv_col_max;
-
- if (row_min < x->mv_row_min)
- row_min = x->mv_row_min;
-
- if (row_max > x->mv_row_max)
- row_max = x->mv_row_max;
-
- for (r = row_min; r < row_max ; r++)
- {
- this_mv.row = r << 3;
- check_here = r * mv_stride + in_what + col_min;
-
- for (c = col_min; c < col_max; c++)
- {
- thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
-
- this_mv.col = c << 3;
- //thissad += (int)sqrt(vp8_mv_err_cost(&this_mv,ref_mv, mvcost,error_per_bit*14));
- //thissad += error_per_bit * mv_bits_sadcost[mv_bits(&this_mv, ref_mv, mvcost)];
- thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost);
-
- if (thissad < bestsad)
- {
- bestsad = thissad;
- best_mv->row = r;
- best_mv->col = c;
- bestaddress = check_here;
- }
-
- check_here++;
- }
- }
-
- this_mv.row = best_mv->row << 3;
- this_mv.col = best_mv->col << 3;
-
- if (bestsad < INT_MAX)
- return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
- + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
- else
- return INT_MAX;
-}
-
-int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
-{
- unsigned char *what = (*(b->base_src) + b->src);
- int what_stride = b->src_stride;
- unsigned char *in_what;
- int in_what_stride = d->pre_stride;
- int mv_stride = d->pre_stride;
- unsigned char *bestaddress;
- MV *best_mv = &d->bmi.mv.as_mv;
- MV this_mv;
- int bestsad = INT_MAX;
- int r, c;
-
- unsigned char *check_here;
- int thissad;
-
- int ref_row = ref_mv->row >> 3;
- int ref_col = ref_mv->col >> 3;
-
- int row_min = ref_row - distance;
- int row_max = ref_row + distance;
- int col_min = ref_col - distance;
- int col_max = ref_col + distance;
-
- int sad_array[3];
-
- // Work out the mid point for the search
- in_what = *(d->base_pre) + d->pre;
- bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
-
- best_mv->row = ref_row;
- best_mv->col = ref_col;
-
- // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
- if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
- (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
- {
- // Baseline value at the centre
- bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
- }
-
- // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
- if (col_min < x->mv_col_min)
- col_min = x->mv_col_min;
-
- if (col_max > x->mv_col_max)
- col_max = x->mv_col_max;
-
- if (row_min < x->mv_row_min)
- row_min = x->mv_row_min;
-
- if (row_max > x->mv_row_max)
- row_max = x->mv_row_max;
-
- for (r = row_min; r < row_max ; r++)
- {
- this_mv.row = r << 3;
- check_here = r * mv_stride + in_what + col_min;
- c = col_min;
-
- while ((c + 3) < col_max)
- {
- int i;
-
- fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
-
- for (i = 0; i < 3; i++)
- {
- thissad = sad_array[i];
-
- if (thissad < bestsad)
- {
- this_mv.col = c << 3;
- thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
-
- if (thissad < bestsad)
- {
- bestsad = thissad;
- best_mv->row = r;
- best_mv->col = c;
- bestaddress = check_here;
- }
- }
-
- check_here++;
- c++;
- }
- }
-
- while (c < col_max)
- {
- thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
-
- if (thissad < bestsad)
- {
- this_mv.col = c << 3;
- thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
-
- if (thissad < bestsad)
- {
- bestsad = thissad;
- best_mv->row = r;
- best_mv->col = c;
- bestaddress = check_here;
- }
- }
-
- check_here ++;
- c ++;
- }
-
- }
-
- this_mv.row = best_mv->row << 3;
- this_mv.col = best_mv->col << 3;
-
- if (bestsad < INT_MAX)
- return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
- + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
- else
- return INT_MAX;
-}
-#endif
-
-#ifdef ENTROPY_STATS
-void print_mode_context(void)
-{
- FILE *f = fopen("modecont.c", "w");
- int i, j;
-
- fprintf(f, "#include \"entropy.h\"\n");
- fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
- fprintf(f, "{\n");
-
- for (j = 0; j < 6; j++)
- {
- fprintf(f, " { // %d \n", j);
- fprintf(f, " ");
-
- for (i = 0; i < 4; i++)
- {
- int overal_prob;
- int this_prob;
- int count; // = mv_ref_ct[j][i][0]+mv_ref_ct[j][i][1];
-
- // Overall probs
- count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
-
- if (count)
- overal_prob = 256 * mv_mode_cts[i][0] / count;
- else
- overal_prob = 128;
-
- if (overal_prob == 0)
- overal_prob = 1;
-
- // context probs
- count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
-
- if (count)
- this_prob = 256 * mv_ref_ct[j][i][0] / count;
- else
- this_prob = 128;
-
- if (this_prob == 0)
- this_prob = 1;
-
- fprintf(f, "%5d, ", this_prob);
- //fprintf(f,"%5d, %5d, %8d,", this_prob, overal_prob, (this_prob << 10)/overal_prob);
- //fprintf(f,"%8d, ", (this_prob << 10)/overal_prob);
- }
-
- fprintf(f, " },\n");
- }
-
- fprintf(f, "};\n");
- fclose(f);
-}
-
-/* MV ref count ENTROPY_STATS stats code */
-#ifdef ENTROPY_STATS
-void init_mv_ref_counts()
-{
- vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
- vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
-}
-
-void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
-{
- if (m == ZEROMV)
- {
- ++mv_ref_ct [ct[0]] [0] [0];
- ++mv_mode_cts[0][0];
- }
- else
- {
- ++mv_ref_ct [ct[0]] [0] [1];
- ++mv_mode_cts[0][1];
-
- if (m == NEARESTMV)
- {
- ++mv_ref_ct [ct[1]] [1] [0];
- ++mv_mode_cts[1][0];
- }
- else
- {
- ++mv_ref_ct [ct[1]] [1] [1];
- ++mv_mode_cts[1][1];
-
- if (m == NEARMV)
- {
- ++mv_ref_ct [ct[2]] [2] [0];
- ++mv_mode_cts[2][0];
- }
- else
- {
- ++mv_ref_ct [ct[2]] [2] [1];
- ++mv_mode_cts[2][1];
-
- if (m == NEWMV)
- {
- ++mv_ref_ct [ct[3]] [3] [0];
- ++mv_mode_cts[3][0];
- }
- else
- {
- ++mv_ref_ct [ct[3]] [3] [1];
- ++mv_mode_cts[3][1];
- }
- }
- }
- }
-}
-
-#endif/* END MV ref count ENTROPY_STATS stats code */
-
-#endif
diff --git a/vp8/encoder/arm/neon/fastfdct4x4_neon.asm b/vp8/encoder/arm/neon/fastfdct4x4_neon.asm
index d5dec440d..8c191a753 100644
--- a/vp8/encoder/arm/neon/fastfdct4x4_neon.asm
+++ b/vp8/encoder/arm/neon/fastfdct4x4_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/encoder/arm/neon/fastfdct8x4_neon.asm b/vp8/encoder/arm/neon/fastfdct8x4_neon.asm
index de1c25469..ca351a1c4 100644
--- a/vp8/encoder/arm/neon/fastfdct8x4_neon.asm
+++ b/vp8/encoder/arm/neon/fastfdct8x4_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/encoder/arm/neon/fastquantizeb_neon.asm b/vp8/encoder/arm/neon/fastquantizeb_neon.asm
index 11070377b..ca1ea9c18 100644
--- a/vp8/encoder/arm/neon/fastquantizeb_neon.asm
+++ b/vp8/encoder/arm/neon/fastquantizeb_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/encoder/arm/neon/sad16_neon.asm b/vp8/encoder/arm/neon/sad16_neon.asm
index 6169f10da..d7c590e15 100644
--- a/vp8/encoder/arm/neon/sad16_neon.asm
+++ b/vp8/encoder/arm/neon/sad16_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/encoder/arm/neon/sad8_neon.asm b/vp8/encoder/arm/neon/sad8_neon.asm
index 28604ddeb..23ba6df93 100644
--- a/vp8/encoder/arm/neon/sad8_neon.asm
+++ b/vp8/encoder/arm/neon/sad8_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/encoder/arm/neon/shortfdct_neon.asm b/vp8/encoder/arm/neon/shortfdct_neon.asm
index 26bc0d06c..5af5cb888 100644
--- a/vp8/encoder/arm/neon/shortfdct_neon.asm
+++ b/vp8/encoder/arm/neon/shortfdct_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/encoder/arm/neon/subtract_neon.asm b/vp8/encoder/arm/neon/subtract_neon.asm
index 8781ca0cc..3ea00f8b9 100644
--- a/vp8/encoder/arm/neon/subtract_neon.asm
+++ b/vp8/encoder/arm/neon/subtract_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/encoder/arm/neon/variance_neon.asm b/vp8/encoder/arm/neon/variance_neon.asm
index 64b83ca43..e1a46869a 100644
--- a/vp8/encoder/arm/neon/variance_neon.asm
+++ b/vp8/encoder/arm/neon/variance_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
index f26b4d7ae..b0450e523 100644
--- a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
index f53596727..6af4e87ba 100644
--- a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm b/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm
index 5269c0af8..ba3decf6c 100644
--- a/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
index aec716e3b..1b09cfe4c 100644
--- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
index 3d02d7c40..0a2b71c49 100644
--- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
@@ -1,16 +1,17 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp8_sub_pixel_variance16x16s_4_0_neon|
- EXPORT |vp8_sub_pixel_variance16x16s_0_4_neon|
- EXPORT |vp8_sub_pixel_variance16x16s_4_4_neon|
+ EXPORT |vp8_variance_halfpixvar16x16_h_neon|
+ EXPORT |vp8_variance_halfpixvar16x16_v_neon|
+ EXPORT |vp8_variance_halfpixvar16x16_hv_neon|
EXPORT |vp8_sub_pixel_variance16x16s_neon|
ARM
REQUIRE8
@@ -19,7 +20,7 @@
AREA ||.text||, CODE, READONLY, ALIGN=2
;================================================
-;unsigned int vp8_sub_pixel_variance16x16s_4_0_neon
+;unsigned int vp8_variance_halfpixvar16x16_h_neon
;(
; unsigned char *src_ptr, r0
; int src_pixels_per_line, r1
@@ -28,7 +29,7 @@
; unsigned int *sse
;);
;================================================
-|vp8_sub_pixel_variance16x16s_4_0_neon| PROC
+|vp8_variance_halfpixvar16x16_h_neon| PROC
push {lr}
mov r12, #4 ;loop counter
@@ -119,7 +120,7 @@ vp8_filt_fpo16x16s_4_0_loop_neon
ENDP
;================================================
-;unsigned int vp8_sub_pixel_variance16x16s_0_4_neon
+;unsigned int vp8_variance_halfpixvar16x16_v_neon
;(
; unsigned char *src_ptr, r0
; int src_pixels_per_line, r1
@@ -128,7 +129,7 @@ vp8_filt_fpo16x16s_4_0_loop_neon
; unsigned int *sse
;);
;================================================
-|vp8_sub_pixel_variance16x16s_0_4_neon| PROC
+|vp8_variance_halfpixvar16x16_v_neon| PROC
push {lr}
mov r12, #4 ;loop counter
@@ -215,7 +216,7 @@ vp8_filt_spo16x16s_0_4_loop_neon
ENDP
;================================================
-;unsigned int vp8_sub_pixel_variance16x16s_4_4_neon
+;unsigned int vp8_variance_halfpixvar16x16_hv_neon
;(
; unsigned char *src_ptr, r0
; int src_pixels_per_line, r1
@@ -224,7 +225,7 @@ vp8_filt_spo16x16s_0_4_loop_neon
; unsigned int *sse
;);
;================================================
-|vp8_sub_pixel_variance16x16s_4_4_neon| PROC
+|vp8_variance_halfpixvar16x16_hv_neon| PROC
push {lr}
vld1.u8 {d0, d1, d2, d3}, [r0], r1 ;load src data
diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
index bd56761fa..cf4da62fa 100644
--- a/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/encoder/arm/picklpf_arm.c b/vp8/encoder/arm/picklpf_arm.c
index 0586e55d8..b2d8f2b2c 100644
--- a/vp8/encoder/arm/picklpf_arm.c
+++ b/vp8/encoder/arm/picklpf_arm.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/encoder/arm/quantize_arm.c b/vp8/encoder/arm/quantize_arm.c
index 46906d3a2..65c616614 100644
--- a/vp8/encoder/arm/quantize_arm.c
+++ b/vp8/encoder/arm/quantize_arm.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -28,7 +29,7 @@ extern int vp8_fast_quantize_b_neon_func(short *coeff_ptr, short *zbin_ptr, shor
void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d)
{
- d->eob = vp8_fast_quantize_b_neon_func(b->coeff, &b->zbin[0][0], d->qcoeff, d->dqcoeff, d->dequant[0], vp8_rvsplus1_default_zig_zag1d, &b->round[0][0], &b->quant[0][0]);
+ d->eob = vp8_fast_quantize_b_neon_func(b->coeff, b->zbin, d->qcoeff, d->dqcoeff, d->dequant, vp8_rvsplus1_default_zig_zag1d, b->round, b->quant);
}
/*
diff --git a/vp8/encoder/arm/quantize_arm.h b/vp8/encoder/arm/quantize_arm.h
index e93f0fef1..5f9155eb1 100644
--- a/vp8/encoder/arm/quantize_arm.h
+++ b/vp8/encoder/arm/quantize_arm.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -14,8 +15,11 @@
#if HAVE_ARMV7
extern prototype_quantize_block(vp8_fast_quantize_b_neon);
-#undef vp8_quantize_fastquantb
-#define vp8_quantize_fastquantb vp8_fast_quantize_b_neon
+/* The neon quantizer has not been updated to match the new exact
+ * quantizer introduced in commit e04e2935
+ */
+//#undef vp8_quantize_fastquantb
+//#define vp8_quantize_fastquantb vp8_fast_quantize_b_neon
#endif
diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h
index d9fc9b3e0..0e5f62fcf 100644
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -29,6 +30,9 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_neon);
//extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_c);
//extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_c);
extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_neon);
+extern prototype_variance(vp8_variance_halfpixvar16x16_h_neon);
+extern prototype_variance(vp8_variance_halfpixvar16x16_v_neon);
+extern prototype_variance(vp8_variance_halfpixvar16x16_hv_neon);
//extern prototype_getmbss(vp8_get_mb_ss_c);
extern prototype_variance(vp8_mse16x16_neon);
@@ -37,6 +41,7 @@ extern prototype_sad(vp8_get16x16pred_error_neon);
//extern prototype_variance2(vp8_get16x16var_c);
extern prototype_sad(vp8_get4x4sse_cs_neon);
+#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_variance_sad4x4
#define vp8_variance_sad4x4 vp8_sad4x4_neon
@@ -82,6 +87,15 @@ extern prototype_sad(vp8_get4x4sse_cs_neon);
#undef vp8_variance_subpixvar16x16
#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_neon
+#undef vp8_variance_halfpixvar16x16_h
+#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_neon
+
+#undef vp8_variance_halfpixvar16x16_v
+#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_neon
+
+#undef vp8_variance_halfpixvar16x16_hv
+#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_neon
+
//#undef vp8_variance_getmbss
//#define vp8_variance_getmbss vp8_get_mb_ss_c
@@ -99,6 +113,7 @@ extern prototype_sad(vp8_get4x4sse_cs_neon);
#undef vp8_variance_get4x4sse_cs
#define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_neon
+#endif
#endif
diff --git a/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c b/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c
index 8cdf0791f..c595ca3c0 100644
--- a/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c
+++ b/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index e468f40f0..412542d10 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -791,7 +792,8 @@ static void write_mv_ref
assert(NEARESTMV <= m && m <= SPLITMV);
- vp8_write_token(w, vp8_mv_ref_tree, p, VP8_MVREFENCODINGS + m);
+ vp8_write_token(w, vp8_mv_ref_tree, p,
+ vp8_mv_ref_encoding_array - NEARESTMV + m);
}
static void write_sub_mv_ref
@@ -801,7 +803,8 @@ static void write_sub_mv_ref
{
assert(LEFT4X4 <= m && m <= NEW4X4);
- vp8_write_token(w, vp8_sub_mv_ref_tree, p, VP8_SUBMVREFENCODINGS + m);
+ vp8_write_token(w, vp8_sub_mv_ref_tree, p,
+ vp8_sub_mv_ref_encoding_array - LEFT4X4 + m);
}
static void write_mv
@@ -869,6 +872,8 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
int prob_skip_false = 0;
ms = pc->mi - 1;
+ cpi->mb.partition_info = cpi->mb.pi;
+
// Calculate the probabilities to be used to code the reference frame based on actual useage this frame
if (!(cpi->prob_intra_coded = rf_intra * 255 / (rf_intra + rf_inter)))
cpi->prob_intra_coded = 1;
@@ -1017,7 +1022,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
do
{
- const B_MODE_INFO *const b = mi->partition_bmi + j;
+ const B_MODE_INFO *const b = cpi->mb.partition_info->bmi + j;
const int *const L = vp8_mbsplits [mi->partitioning];
int k = -1; /* first block in subset j */
int mv_contz;
@@ -1039,7 +1044,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
write_mv(w, &b->mv.as_mv, &best_mv, (const MV_CONTEXT *) mvc);
}
}
- while (++j < mi->partition_count);
+ while (++j < cpi->mb.partition_info->count);
}
break;
default:
@@ -1048,9 +1053,11 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
}
++m;
+ cpi->mb.partition_info++;
}
++m; /* skip L prediction border */
+ cpi->mb.partition_info++;
}
}
@@ -1483,9 +1490,11 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
if (xd->mode_ref_lf_delta_enabled)
{
// Do the deltas need to be updated
- vp8_write_bit(bc, (xd->mode_ref_lf_delta_update) ? 1 : 0);
+ int send_update = xd->mode_ref_lf_delta_update
+ || cpi->oxcf.error_resilient_mode;
- if (xd->mode_ref_lf_delta_update)
+ vp8_write_bit(bc, send_update);
+ if (send_update)
{
int Data;
@@ -1495,8 +1504,10 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
Data = xd->ref_lf_deltas[i];
// Frame level data
- if (Data)
+ if (xd->ref_lf_deltas[i] != xd->last_ref_lf_deltas[i]
+ || cpi->oxcf.error_resilient_mode)
{
+ xd->last_ref_lf_deltas[i] = xd->ref_lf_deltas[i];
vp8_write_bit(bc, 1);
if (Data > 0)
@@ -1520,8 +1531,10 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
{
Data = xd->mode_lf_deltas[i];
- if (Data)
+ if (xd->mode_lf_deltas[i] != xd->last_mode_lf_deltas[i]
+ || cpi->oxcf.error_resilient_mode)
{
+ xd->last_mode_lf_deltas[i] = xd->mode_lf_deltas[i];
vp8_write_bit(bc, 1);
if (Data > 0)
diff --git a/vp8/encoder/bitstream.h b/vp8/encoder/bitstream.h
index ee69f66e4..f5d148ea4 100644
--- a/vp8/encoder/bitstream.h
+++ b/vp8/encoder/bitstream.h
@@ -1,35 +1,36 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
#ifndef __INC_BITSTREAM_H
#define __INC_BITSTREAM_H
-#if HAVE_ARMV7
-void vp8cx_pack_tokens_armv7(vp8_writer *w, const TOKENEXTRA *p, int xcount,
+#if HAVE_ARMV5TE
+void vp8cx_pack_tokens_armv5(vp8_writer *w, const TOKENEXTRA *p, int xcount,
vp8_token *,
vp8_extra_bit_struct *,
const vp8_tree_index *);
-void vp8cx_pack_tokens_into_partitions_armv7(VP8_COMP *, unsigned char *, int , int *,
+void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *, unsigned char *, int , int *,
vp8_token *,
vp8_extra_bit_struct *,
const vp8_tree_index *);
-void vp8cx_pack_mb_row_tokens_armv7(VP8_COMP *cpi, vp8_writer *w,
+void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w,
vp8_token *,
vp8_extra_bit_struct *,
const vp8_tree_index *);
# define pack_tokens(a,b,c) \
- vp8cx_pack_tokens_armv7(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
+ vp8cx_pack_tokens_armv5(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
# define pack_tokens_into_partitions(a,b,c,d) \
- vp8cx_pack_tokens_into_partitions_armv7(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
+ vp8cx_pack_tokens_into_partitions_armv5(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
# define pack_mb_row_tokens(a,b) \
- vp8cx_pack_mb_row_tokens_armv7(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
+ vp8cx_pack_mb_row_tokens_armv5(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
#else
# define pack_tokens(a,b,c) pack_tokens_c(a,b,c)
# define pack_tokens_into_partitions(a,b,c,d) pack_tokens_into_partitions_c(a,b,c,d)
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index cc4cbe067..e94e54976 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -31,10 +32,11 @@ typedef struct
short *coeff;
// 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
- short(*quant)[4];
- short(*zbin)[4];
- short(*zrun_zbin_boost);
- short(*round)[4];
+ short *quant;
+ short *quant_shift;
+ short *zbin;
+ short *zrun_zbin_boost;
+ short *round;
// Zbin Over Quant value
short zbin_extra;
@@ -50,6 +52,12 @@ typedef struct
typedef struct
{
+ int count;
+ B_MODE_INFO bmi[16];
+} PARTITION_INFO;
+
+typedef struct
+{
DECLARE_ALIGNED(16, short, src_diff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
DECLARE_ALIGNED(16, short, coeff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
@@ -59,6 +67,9 @@ typedef struct
YV12_BUFFER_CONFIG src;
MACROBLOCKD e_mbd;
+ PARTITION_INFO *partition_info; /* work pointer */
+ PARTITION_INFO *pi; /* Corresponds to upper left visible macroblock */
+ PARTITION_INFO *pip; /* Base of allocated array */
search_site *ss;
int ss_count;
@@ -91,6 +102,9 @@ typedef struct
int encode_breakout;
+ //char * gf_active_ptr;
+ signed char *gf_active_ptr;
+
unsigned char *active_ptr;
MV_CONTEXT *mvc;
@@ -99,15 +113,8 @@ typedef struct
void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
- void (*short_fdct4x4rd)(short *input, short *output, int pitch);
- void (*short_fdct8x4rd)(short *input, short *output, int pitch);
- void (*vp8_short_fdct4x4_ptr)(short *input, short *output, int pitch);
void (*short_walsh4x4)(short *input, short *output, int pitch);
-
void (*quantize_b)(BLOCK *b, BLOCKD *d);
- void (*quantize_brd)(BLOCK *b, BLOCKD *d);
-
-
} MACROBLOCK;
diff --git a/vp8/encoder/boolhuff.c b/vp8/encoder/boolhuff.c
index c101384d9..82006b196 100644
--- a/vp8/encoder/boolhuff.c
+++ b/vp8/encoder/boolhuff.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/encoder/boolhuff.h b/vp8/encoder/boolhuff.h
index 0d929f067..f723da3f0 100644
--- a/vp8/encoder/boolhuff.h
+++ b/vp8/encoder/boolhuff.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c
index 5207e39c4..b5a11ae34 100644
--- a/vp8/encoder/dct.c
+++ b/vp8/encoder/dct.c
@@ -1,172 +1,64 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
#include <math.h>
-
-static const short dct_matrix2[4][4] =
-{
- { 23170, 30274, 23170, 12540 },
- { 23170, 12540, -23170, -30274 },
- { 23170, -12540, -23170, 30274 },
- { 23170, -30274, 23170, -12540 }
-};
-
-static const short dct_matrix1[4][4] =
-{
- { 23170, 23170, 23170, 23170 },
- { 30274, 12540, -12540, -30274 },
- { 23170, -23170, -23170, 23170 },
- { 12540, -30274, 30274, -12540 }
-};
-
-
-#define _1STSTAGESHIFT 14
-#define _1STSTAGEROUNDING (1<<( _1STSTAGESHIFT-1))
-#define _2NDSTAGESHIFT 16
-#define _2NDSTAGEROUNDING (1<<( _2NDSTAGESHIFT-1))
-
-// using matrix multiply
void vp8_short_fdct4x4_c(short *input, short *output, int pitch)
{
- int i, j, k;
- short temp[4][4];
- int sumtemp;
- pitch >>= 1;
-
- for (i = 0; i < 4; i++)
- {
- for (j = 0; j < 4; j++)
- {
- sumtemp = 0;
-
- for (k = 0; k < 4; k++)
- {
- sumtemp += input[i*pitch+k] * dct_matrix2[k][j];
-
- }
-
- temp[i][j] = (short)((sumtemp + _1STSTAGEROUNDING) >> _1STSTAGESHIFT);
- }
- }
-
-
- for (i = 0; i < 4; i++)
- {
- for (j = 0; j < 4; j++)
- {
- sumtemp = 0;
-
- for (k = 0; k < 4; k++)
- {
- sumtemp += dct_matrix1[i][ k] * temp[k][ j];
- }
-
- output[i*4+j] = (short)((sumtemp + _2NDSTAGEROUNDING) >> _2NDSTAGESHIFT);
- }
- }
-
-}
-
-
-void vp8_short_fdct8x4_c(short *input, short *output, int pitch)
-{
- vp8_short_fdct4x4_c(input, output, pitch);
- vp8_short_fdct4x4_c(input + 4, output + 16, pitch);
-}
-
-
-static const signed short x_c1 = 60547;
-static const signed short x_c2 = 46341;
-static const signed short x_c3 = 25080;
-
-void vp8_fast_fdct4x4_c(short *input, short *output, int pitch)
-{
int i;
int a1, b1, c1, d1;
- int a2, b2, c2, d2;
short *ip = input;
-
short *op = output;
- int temp1, temp2;
for (i = 0; i < 4; i++)
{
- a1 = (ip[0] + ip[3]) * 2;
- b1 = (ip[1] + ip[2]) * 2;
- c1 = (ip[1] - ip[2]) * 2;
- d1 = (ip[0] - ip[3]) * 2;
-
- temp1 = a1 + b1;
- temp2 = a1 - b1;
-
- op[0] = ((temp1 * x_c2) >> 16) + temp1;
- op[2] = ((temp2 * x_c2) >> 16) + temp2;
-
- temp1 = (c1 * x_c3) >> 16;
- temp2 = ((d1 * x_c1) >> 16) + d1;
+ a1 = ((ip[0] + ip[3])<<3);
+ b1 = ((ip[1] + ip[2])<<3);
+ c1 = ((ip[1] - ip[2])<<3);
+ d1 = ((ip[0] - ip[3])<<3);
- op[1] = temp1 + temp2;
-
- temp1 = (d1 * x_c3) >> 16;
- temp2 = ((c1 * x_c1) >> 16) + c1;
+ op[0] = a1 + b1;
+ op[2] = a1 - b1;
- op[3] = temp1 - temp2;
+ op[1] = (c1 * 2217 + d1 * 5352 + 14500)>>12;
+ op[3] = (d1 * 2217 - c1 * 5352 + 7500)>>12;
ip += pitch / 2;
op += 4;
- }
+ }
ip = output;
op = output;
-
for (i = 0; i < 4; i++)
{
-
a1 = ip[0] + ip[12];
b1 = ip[4] + ip[8];
c1 = ip[4] - ip[8];
d1 = ip[0] - ip[12];
+ op[0] = ( a1 + b1 + 7)>>4;
+ op[8] = ( a1 - b1 + 7)>>4;
- temp1 = a1 + b1;
- temp2 = a1 - b1;
-
- a2 = ((temp1 * x_c2) >> 16) + temp1;
- c2 = ((temp2 * x_c2) >> 16) + temp2;
-
- temp1 = (c1 * x_c3) >> 16;
- temp2 = ((d1 * x_c1) >> 16) + d1;
-
- b2 = temp1 + temp2;
-
- temp1 = (d1 * x_c3) >> 16;
- temp2 = ((c1 * x_c1) >> 16) + c1;
-
- d2 = temp1 - temp2;
-
-
- op[0] = (a2 + 1) >> 1;
- op[4] = (b2 + 1) >> 1;
- op[8] = (c2 + 1) >> 1;
- op[12] = (d2 + 1) >> 1;
+ op[4] =((c1 * 2217 + d1 * 5352 + 12000)>>16) + (d1!=0);
+ op[12] = (d1 * 2217 - c1 * 5352 + 51000)>>16;
ip++;
op++;
}
}
-void vp8_fast_fdct8x4_c(short *input, short *output, int pitch)
+void vp8_short_fdct8x4_c(short *input, short *output, int pitch)
{
- vp8_fast_fdct4x4_c(input, output, pitch);
- vp8_fast_fdct4x4_c(input + 4, output + 16, pitch);
+ vp8_short_fdct4x4_c(input, output, pitch);
+ vp8_short_fdct4x4_c(input + 4, output + 16, pitch);
}
void vp8_short_walsh4x4_c(short *input, short *output, int pitch)
@@ -177,17 +69,18 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch)
short *ip = input;
short *op = output;
+
for (i = 0; i < 4; i++)
{
- a1 = ip[0] + ip[3];
- b1 = ip[1] + ip[2];
- c1 = ip[1] - ip[2];
- d1 = ip[0] - ip[3];
-
- op[0] = a1 + b1;
- op[1] = c1 + d1;
- op[2] = a1 - b1;
- op[3] = d1 - c1;
+ a1 = ((ip[0] + ip[2])<<2);
+ d1 = ((ip[1] + ip[3])<<2);
+ c1 = ((ip[1] - ip[3])<<2);
+ b1 = ((ip[0] - ip[2])<<2);
+
+ op[0] = a1 + d1 + (a1!=0);
+ op[1] = b1 + c1;
+ op[2] = b1 - c1;
+ op[3] = a1 - d1;
ip += pitch / 2;
op += 4;
}
@@ -197,25 +90,25 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch)
for (i = 0; i < 4; i++)
{
- a1 = ip[0] + ip[12];
- b1 = ip[4] + ip[8];
- c1 = ip[4] - ip[8];
- d1 = ip[0] - ip[12];
-
- a2 = a1 + b1;
- b2 = c1 + d1;
- c2 = a1 - b1;
- d2 = d1 - c1;
-
- a2 += (a2 > 0);
- b2 += (b2 > 0);
- c2 += (c2 > 0);
- d2 += (d2 > 0);
-
- op[0] = (a2) >> 1;
- op[4] = (b2) >> 1;
- op[8] = (c2) >> 1;
- op[12] = (d2) >> 1;
+ a1 = ip[0] + ip[8];
+ d1 = ip[4] + ip[12];
+ c1 = ip[4] - ip[12];
+ b1 = ip[0] - ip[8];
+
+ a2 = a1 + d1;
+ b2 = b1 + c1;
+ c2 = b1 - c1;
+ d2 = a1 - d1;
+
+ a2 += a2<0;
+ b2 += b2<0;
+ c2 += c2<0;
+ d2 += d2<0;
+
+ op[0] = (a2+3) >> 3;
+ op[4] = (b2+3) >> 3;
+ op[8] = (c2+3) >> 3;
+ op[12]= (d2+3) >> 3;
ip++;
op++;
diff --git a/vp8/encoder/dct.h b/vp8/encoder/dct.h
index fb307cfb3..fec3b4c37 100644
--- a/vp8/encoder/dct.h
+++ b/vp8/encoder/dct.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -31,15 +32,14 @@ extern prototype_fdct(vp8_fdct_short4x4);
#endif
extern prototype_fdct(vp8_fdct_short8x4);
+// There is no fast4x4 (for now)
#ifndef vp8_fdct_fast4x4
-#define vp8_fdct_fast4x4 vp8_fast_fdct4x4_c
+#define vp8_fdct_fast4x4 vp8_short_fdct4x4_c
#endif
-extern prototype_fdct(vp8_fdct_fast4x4);
#ifndef vp8_fdct_fast8x4
-#define vp8_fdct_fast8x4 vp8_fast_fdct8x4_c
+#define vp8_fdct_fast8x4 vp8_short_fdct8x4_c
#endif
-extern prototype_fdct(vp8_fdct_fast8x4);
#ifndef vp8_fdct_walsh_short4x4
#define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_c
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index a4e377220..85e121be3 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -16,7 +17,7 @@
#include "extend.h"
#include "entropymode.h"
#include "quant_common.h"
-#include "segmentation_common.h"
+#include "segmentation.h"
#include "setupintrarecon.h"
#include "encodeintra.h"
#include "reconinter.h"
@@ -59,10 +60,9 @@ unsigned int uv_modes[4] = {0, 0, 0, 0};
unsigned int b_modes[14] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
#endif
-// The first four entries are dummy values
static const int qrounding_factors[129] =
{
- 56, 56, 56, 56, 56, 56, 56, 56,
+ 56, 56, 56, 56, 48, 48, 56, 56,
48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48,
@@ -83,7 +83,7 @@ static const int qrounding_factors[129] =
static const int qzbin_factors[129] =
{
- 64, 64, 64, 64, 80, 80, 80, 80,
+ 72, 72, 72, 72, 80, 80, 72, 72,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
@@ -102,9 +102,64 @@ static const int qzbin_factors[129] =
80,
};
+static const int qrounding_factors_y2[129] =
+{
+ 56, 56, 56, 56, 48, 48, 56, 56,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48,
+};
+
+static const int qzbin_factors_y2[129] =
+{
+ 72, 72, 72, 72, 80, 80, 72, 72,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80, 80, 80, 80, 80, 80, 80, 80,
+ 80,
+};
+
+//#define EXACT_QUANT
+#ifdef EXACT_QUANT
+static void vp8cx_invert_quant(short *quant, short *shift, short d)
+{
+ unsigned t;
+ int l;
+ t = d;
+ for(l = 0; t > 1; l++)
+ t>>=1;
+ t = 1 + (1<<(16+l))/d;
+ *quant = (short)(t - (1<<16));
+ *shift = l;
+}
+
void vp8cx_init_quantizer(VP8_COMP *cpi)
{
- int r, c;
int i;
int quant_val;
int Q;
@@ -115,63 +170,127 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
{
// dc values
quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
- cpi->Y1quant[Q][0][0] = (1 << 16) / quant_val;
- cpi->Y1zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
- cpi->Y1round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
- cpi->common.Y1dequant[Q][0][0] = quant_val;
+ vp8cx_invert_quant(cpi->Y1quant[Q] + 0,
+ cpi->Y1quant_shift[Q] + 0, quant_val);
+ cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+ cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
+ cpi->common.Y1dequant[Q][0] = quant_val;
cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
- cpi->Y2quant[Q][0][0] = (1 << 16) / quant_val;
- cpi->Y2zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
- cpi->Y2round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
- cpi->common.Y2dequant[Q][0][0] = quant_val;
+ vp8cx_invert_quant(cpi->Y2quant[Q] + 0,
+ cpi->Y2quant_shift[Q] + 0, quant_val);
+ cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+ cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
+ cpi->common.Y2dequant[Q][0] = quant_val;
cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
- cpi->UVquant[Q][0][0] = (1 << 16) / quant_val;
- cpi->UVzbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
- cpi->UVround[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
- cpi->common.UVdequant[Q][0][0] = quant_val;
+ vp8cx_invert_quant(cpi->UVquant[Q] + 0,
+ cpi->UVquant_shift[Q] + 0, quant_val);
+ cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
+ cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
+ cpi->common.UVdequant[Q][0] = quant_val;
cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
// all the ac values = ;
for (i = 1; i < 16; i++)
{
int rc = vp8_default_zig_zag1d[i];
- r = (rc >> 2);
- c = (rc & 3);
quant_val = vp8_ac_yquant(Q);
- cpi->Y1quant[Q][r][c] = (1 << 16) / quant_val;
- cpi->Y1zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
- cpi->Y1round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
- cpi->common.Y1dequant[Q][r][c] = quant_val;
+ vp8cx_invert_quant(cpi->Y1quant[Q] + rc,
+ cpi->Y1quant_shift[Q] + rc, quant_val);
+ cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+ cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
+ cpi->common.Y1dequant[Q][rc] = quant_val;
cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
- cpi->Y2quant[Q][r][c] = (1 << 16) / quant_val;
- cpi->Y2zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
- cpi->Y2round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
- cpi->common.Y2dequant[Q][r][c] = quant_val;
+ vp8cx_invert_quant(cpi->Y2quant[Q] + rc,
+ cpi->Y2quant_shift[Q] + rc, quant_val);
+ cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+ cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
+ cpi->common.Y2dequant[Q][rc] = quant_val;
cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
- cpi->UVquant[Q][r][c] = (1 << 16) / quant_val;
- cpi->UVzbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
- cpi->UVround[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
- cpi->common.UVdequant[Q][r][c] = quant_val;
+ vp8cx_invert_quant(cpi->UVquant[Q] + rc,
+ cpi->UVquant_shift[Q] + rc, quant_val);
+ cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+ cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
+ cpi->common.UVdequant[Q][rc] = quant_val;
cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
}
}
}
+#else
+void vp8cx_init_quantizer(VP8_COMP *cpi)
+{
+ int i;
+ int quant_val;
+ int Q;
+
+ int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
+
+ for (Q = 0; Q < QINDEX_RANGE; Q++)
+ {
+ // dc values
+ quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
+ cpi->Y1quant[Q][0] = (1 << 16) / quant_val;
+ cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+ cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
+ cpi->common.Y1dequant[Q][0] = quant_val;
+ cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+
+ quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
+ cpi->Y2quant[Q][0] = (1 << 16) / quant_val;
+ cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+ cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
+ cpi->common.Y2dequant[Q][0] = quant_val;
+ cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+
+ quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
+ cpi->UVquant[Q][0] = (1 << 16) / quant_val;
+ cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
+ cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
+ cpi->common.UVdequant[Q][0] = quant_val;
+ cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+
+ // all the ac values = ;
+ for (i = 1; i < 16; i++)
+ {
+ int rc = vp8_default_zig_zag1d[i];
+ quant_val = vp8_ac_yquant(Q);
+ cpi->Y1quant[Q][rc] = (1 << 16) / quant_val;
+ cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+ cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
+ cpi->common.Y1dequant[Q][rc] = quant_val;
+ cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
+
+ quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
+ cpi->Y2quant[Q][rc] = (1 << 16) / quant_val;
+ cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+ cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
+ cpi->common.Y2dequant[Q][rc] = quant_val;
+ cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
+
+ quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
+ cpi->UVquant[Q][rc] = (1 << 16) / quant_val;
+ cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+ cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
+ cpi->common.UVdequant[Q][rc] = quant_val;
+ cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
+ }
+ }
+}
+#endif
void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
{
int i;
int QIndex;
MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = &xd->mbmi;
int zbin_extra;
// Select the baseline MB Q index.
@@ -179,12 +298,12 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
{
// Abs Value
if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
- QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id];
+ QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
// Delta Value
else
{
- QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id];
+ QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; // Clamp to valid range
}
}
@@ -192,11 +311,12 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
QIndex = cpi->common.base_qindex;
// Y
- zbin_extra = (cpi->common.Y1dequant[QIndex][0][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
+ zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
for (i = 0; i < 16; i++)
{
x->block[i].quant = cpi->Y1quant[QIndex];
+ x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
x->block[i].zbin = cpi->Y1zbin[QIndex];
x->block[i].round = cpi->Y1round[QIndex];
x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
@@ -205,11 +325,12 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
}
// UV
- zbin_extra = (cpi->common.UVdequant[QIndex][0][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
+ zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
for (i = 16; i < 24; i++)
{
x->block[i].quant = cpi->UVquant[QIndex];
+ x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
x->block[i].zbin = cpi->UVzbin[QIndex];
x->block[i].round = cpi->UVround[QIndex];
x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
@@ -218,8 +339,9 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
}
// Y2
- zbin_extra = (cpi->common.Y2dequant[QIndex][0][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
+ zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
x->block[24].quant = cpi->Y2quant[QIndex];
+ x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
x->block[24].zbin = cpi->Y2zbin[QIndex];
x->block[24].round = cpi->Y2round[QIndex];
x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
@@ -255,16 +377,15 @@ void encode_mb_row(VP8_COMP *cpi,
int i;
int recon_yoffset, recon_uvoffset;
int mb_col;
- int recon_y_stride = cm->last_frame.y_stride;
- int recon_uv_stride = cm->last_frame.uv_stride;
+ int ref_fb_idx = cm->lst_fb_idx;
+ int dst_fb_idx = cm->new_fb_idx;
+ int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
+ int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
int seg_map_index = (mb_row * cpi->common.mb_cols);
// reset above block coeffs
- xd->above_context[Y1CONTEXT] = cm->above_context[Y1CONTEXT];
- xd->above_context[UCONTEXT ] = cm->above_context[UCONTEXT ];
- xd->above_context[VCONTEXT ] = cm->above_context[VCONTEXT ];
- xd->above_context[Y2CONTEXT] = cm->above_context[Y2CONTEXT];
+ xd->above_context = cm->above_context;
xd->up_available = (mb_row != 0);
recon_yoffset = (mb_row * recon_y_stride * 16);
@@ -273,25 +394,35 @@ void encode_mb_row(VP8_COMP *cpi,
cpi->tplist[mb_row].start = *tp;
//printf("Main mb_row = %d\n", mb_row);
+ // Distance of Mb to the top & bottom edges, specified in 1/8th pel
+ // units as they are always compared to values that are in 1/8th pel units
+ xd->mb_to_top_edge = -((mb_row * 16) << 3);
+ xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
+
+ // Set up limit values for vertical motion vector components
+ // to prevent them extending beyond the UMV borders
+ x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
+ x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
+ + (VP8BORDERINPIXELS - 16);
+
// for each macroblock col in image
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
- // Distance of Mb to the various image edges.
- // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+ // Distance of Mb to the left & right edges, specified in
+ // 1/8th pel units as they are always compared to values
+ // that are in 1/8th pel units
xd->mb_to_left_edge = -((mb_col * 16) << 3);
xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
- xd->mb_to_top_edge = -((mb_row * 16) << 3);
- xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
- // Set up limit values for motion vectors used to prevent them extending outside the UMV borders
+ // Set up limit values for horizontal motion vector components
+ // to prevent them extending beyond the UMV borders
x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
- x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16);
- x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
- x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16);
+ x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
+ + (VP8BORDERINPIXELS - 16);
- xd->dst.y_buffer = cm->new_frame.y_buffer + recon_yoffset;
- xd->dst.u_buffer = cm->new_frame.u_buffer + recon_uvoffset;
- xd->dst.v_buffer = cm->new_frame.v_buffer + recon_uvoffset;
+ xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+ xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+ xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
xd->left_available = (mb_col != 0);
// Is segmentation enabled
@@ -300,14 +431,14 @@ void encode_mb_row(VP8_COMP *cpi,
{
// Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
if (cpi->segmentation_map[seg_map_index+mb_col] <= 3)
- xd->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
+ xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
else
- xd->mbmi.segment_id = 0;
+ xd->mode_info_context->mbmi.segment_id = 0;
vp8cx_mb_init_quantizer(cpi, x);
}
else
- xd->mbmi.segment_id = 0; // Set to Segment 0 by default
+ xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default
x->active_ptr = cpi->active_map + seg_map_index + mb_col;
@@ -331,14 +462,14 @@ void encode_mb_row(VP8_COMP *cpi,
for (b = 0; b < xd->mbmi.partition_count; b++)
{
- inter_b_modes[xd->mbmi.partition_bmi[b].mode] ++;
+ inter_b_modes[x->partition->bmi[b].mode] ++;
}
}
#endif
// Count of last ref frame 0,0 useage
- if ((xd->mbmi.mode == ZEROMV) && (xd->mbmi.ref_frame == LAST_FRAME))
+ if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
cpi->inter_zz_count ++;
// Special case code for cyclic refresh
@@ -346,14 +477,14 @@ void encode_mb_row(VP8_COMP *cpi,
// during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
{
- cpi->segmentation_map[seg_map_index+mb_col] = xd->mbmi.segment_id;
+ cpi->segmentation_map[seg_map_index+mb_col] = xd->mode_info_context->mbmi.segment_id;
// If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
// Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
// else mark it as dirty (1).
- if (xd->mbmi.segment_id)
+ if (xd->mode_info_context->mbmi.segment_id)
cpi->cyclic_refresh_map[seg_map_index+mb_col] = -1;
- else if ((xd->mbmi.mode == ZEROMV) && (xd->mbmi.ref_frame == LAST_FRAME))
+ else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
{
if (cpi->cyclic_refresh_map[seg_map_index+mb_col] == 1)
cpi->cyclic_refresh_map[seg_map_index+mb_col] = 0;
@@ -366,10 +497,7 @@ void encode_mb_row(VP8_COMP *cpi,
cpi->tplist[mb_row].stop = *tp;
- xd->gf_active_ptr++; // Increment pointer into gf useage flags structure for next mb
-
- // store macroblock mode info into context array
- vpx_memcpy(&xd->mode_info_context->mbmi, &xd->mbmi, sizeof(xd->mbmi));
+ x->gf_active_ptr++; // Increment pointer into gf useage flags structure for next mb
for (i = 0; i < 16; i++)
vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi));
@@ -383,27 +511,26 @@ void encode_mb_row(VP8_COMP *cpi,
recon_uvoffset += 8;
// Keep track of segment useage
- segment_counts[xd->mbmi.segment_id] ++;
+ segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
// skip to next mb
xd->mode_info_context++;
+ x->partition_info++;
- xd->above_context[Y1CONTEXT] += 4;
- xd->above_context[UCONTEXT ] += 2;
- xd->above_context[VCONTEXT ] += 2;
- xd->above_context[Y2CONTEXT] ++;
+ xd->above_context++;
cpi->current_mb_col_main = mb_col;
}
//extend the recon for intra prediction
vp8_extend_mb_row(
- &cm->new_frame,
+ &cm->yv12_fb[dst_fb_idx],
xd->dst.y_buffer + 16,
xd->dst.u_buffer + 8,
xd->dst.v_buffer + 8);
// this is to account for the border
xd->mode_info_context++;
+ x->partition_info++;
}
@@ -422,33 +549,31 @@ void vp8_encode_frame(VP8_COMP *cpi)
int segment_counts[MAX_MB_SEGMENTS];
int totalrate;
- if (cm->frame_type != KEY_FRAME)
+ // Functions setup for all frame types so we can use MC in AltRef
+ if (cm->mcomp_filter_type == SIXTAP)
{
- if (cm->mcomp_filter_type == SIXTAP)
- {
- xd->subpixel_predict = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, sixtap4x4);
- xd->subpixel_predict8x4 = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, sixtap8x4);
- xd->subpixel_predict8x8 = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, sixtap8x8);
- xd->subpixel_predict16x16 = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, sixtap16x16);
- }
- else
- {
- xd->subpixel_predict = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, bilinear4x4);
- xd->subpixel_predict8x4 = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, bilinear8x4);
- xd->subpixel_predict8x8 = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, bilinear8x8);
- xd->subpixel_predict16x16 = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, bilinear16x16);
- }
+ xd->subpixel_predict = SUBPIX_INVOKE(
+ &cpi->common.rtcd.subpix, sixtap4x4);
+ xd->subpixel_predict8x4 = SUBPIX_INVOKE(
+ &cpi->common.rtcd.subpix, sixtap8x4);
+ xd->subpixel_predict8x8 = SUBPIX_INVOKE(
+ &cpi->common.rtcd.subpix, sixtap8x8);
+ xd->subpixel_predict16x16 = SUBPIX_INVOKE(
+ &cpi->common.rtcd.subpix, sixtap16x16);
+ }
+ else
+ {
+ xd->subpixel_predict = SUBPIX_INVOKE(
+ &cpi->common.rtcd.subpix, bilinear4x4);
+ xd->subpixel_predict8x4 = SUBPIX_INVOKE(
+ &cpi->common.rtcd.subpix, bilinear8x4);
+ xd->subpixel_predict8x8 = SUBPIX_INVOKE(
+ &cpi->common.rtcd.subpix, bilinear8x8);
+ xd->subpixel_predict16x16 = SUBPIX_INVOKE(
+ &cpi->common.rtcd.subpix, bilinear16x16);
}
- //else // Key Frame
- //{
- // For key frames make sure the intra ref frame probability value
- // is set to "all intra"
- //cpi->prob_intra_coded = 255;
- //}
-
-
- xd->gf_active_ptr = (signed char *)cm->gf_active_flags; // Point to base of GF active flags data structure
+ x->gf_active_ptr = (signed char *)cpi->gf_active_flags; // Point to base of GF active flags data structure
x->vector_range = 32;
@@ -467,13 +592,13 @@ void vp8_encode_frame(VP8_COMP *cpi)
#if 0
// Experimental code
- cpi->frame_distortion = 0;
+ cpi->frame_distortion = 0;
cpi->last_mb_distortion = 0;
#endif
totalrate = 0;
- xd->mode_info = cm->mi - 1;
+ x->partition_info = x->pi;
xd->mode_info_context = cm->mi;
xd->mode_info_stride = cm->mode_info_stride;
@@ -509,12 +634,12 @@ void vp8_encode_frame(VP8_COMP *cpi)
// Copy data over into macro block data sturctures.
x->src = * cpi->Source;
- xd->pre = cm->last_frame;
- xd->dst = cm->new_frame;
+ xd->pre = cm->yv12_fb[cm->lst_fb_idx];
+ xd->dst = cm->yv12_fb[cm->new_fb_idx];
// set up frame new frame for intra coded blocks
- vp8_setup_intra_recon(&cm->new_frame);
+ vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
vp8_build_block_offsets(x);
@@ -539,10 +664,10 @@ void vp8_encode_frame(VP8_COMP *cpi)
//x->rdmult = (int)(cpi->RDMULT * pow( (cpi->rate_correction_factor * 2.0), 0.75 ));
#endif
- xd->mbmi.mode = DC_PRED;
- xd->mbmi.uv_mode = DC_PRED;
+ xd->mode_info_context->mbmi.mode = DC_PRED;
+ xd->mode_info_context->mbmi.uv_mode = DC_PRED;
- xd->left_context = cm->left_context;
+ xd->left_context = &cm->left_context;
vp8_zero(cpi->count_mb_ref_frame_usage)
vp8_zero(cpi->ymode_count)
@@ -550,17 +675,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
x->mvc = cm->fc.mvc;
- // vp8_zero( entropy_stats)
- {
- ENTROPY_CONTEXT **p = cm->above_context;
- const size_t L = cm->mb_cols;
-
- vp8_zero_array(p [Y1CONTEXT], L * 4)
- vp8_zero_array(p [ UCONTEXT], L * 2)
- vp8_zero_array(p [ VCONTEXT], L * 2)
- vp8_zero_array(p [Y2CONTEXT], L)
- }
-
+ vpx_memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
{
struct vpx_usec_timer emr_timer;
@@ -619,6 +734,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
x->src.v_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
+ x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count;
if (mb_row < cm->mb_rows - 1)
//WaitForSingleObject(cpi->h_event_main, INFINITE);
@@ -894,8 +1010,8 @@ void vp8_build_block_offsets(MACROBLOCK *x)
static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)
{
const MACROBLOCKD *xd = & x->e_mbd;
- const MB_PREDICTION_MODE m = xd->mbmi.mode;
- const MB_PREDICTION_MODE uvm = xd->mbmi.uv_mode;
+ const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode;
+ const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode;
#ifdef MODE_STATS
const int is_key = cpi->common.frame_type == KEY_FRAME;
@@ -933,7 +1049,7 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
int rateuv_tokenonly = 0;
int i;
- x->e_mbd.mbmi.ref_frame = INTRA_FRAME;
+ x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
#if !(CONFIG_REALTIME_ONLY)
@@ -949,15 +1065,13 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
error_uv = vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
- x->e_mbd.mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
-
vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
rate += rateuv;
if (Error4x4 < Error16x16)
{
rate += rate4x4;
- x->e_mbd.mbmi.mode = B_PRED;
+ x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
// get back the intra block modes
for (i = 0; i < 16; i++)
@@ -997,7 +1111,7 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
for (mode = DC_PRED; mode <= TM_PRED; mode ++)
{
- x->e_mbd.mbmi.mode = mode;
+ x->e_mbd.mode_info_context->mbmi.mode = mode;
vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
rate2 = x->mbmode_cost[x->e_mbd.frame_type][mode];
@@ -1017,17 +1131,15 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
else
Error4x4 = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
- x->e_mbd.mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
-
if (Error4x4 < Error16x16)
{
- x->e_mbd.mbmi.mode = B_PRED;
+ x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
cpi->prediction_error += Error4x4;
}
else
{
- x->e_mbd.mbmi.mode = best_mode;
+ x->e_mbd.mode_info_context->mbmi.mode = best_mode;
vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
cpi->prediction_error += Error16x16;
}
@@ -1044,7 +1156,7 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
extern int cnt_pm;
#endif
-extern void vp8_fix_contexts(VP8_COMP *cpi, MACROBLOCKD *x);
+extern void vp8_fix_contexts(MACROBLOCKD *x);
int vp8cx_encode_inter_macroblock
(
@@ -1061,7 +1173,7 @@ int vp8cx_encode_inter_macroblock
x->skip = 0;
if (xd->segmentation_enabled)
- x->encode_breakout = cpi->segment_encode_breakout[xd->mbmi.segment_id];
+ x->encode_breakout = cpi->segment_encode_breakout[xd->mode_info_context->mbmi.segment_id];
else
x->encode_breakout = cpi->oxcf.encode_breakout;
@@ -1092,17 +1204,17 @@ int vp8cx_encode_inter_macroblock
if (cpi->cyclic_refresh_mode_enabled)
{
// Clear segment_id back to 0 if not coded (last frame 0,0)
- if ((xd->mbmi.segment_id == 1) &&
- ((xd->mbmi.ref_frame != LAST_FRAME) || (xd->mbmi.mode != ZEROMV)))
+ if ((xd->mode_info_context->mbmi.segment_id == 1) &&
+ ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV)))
{
- xd->mbmi.segment_id = 0;
+ xd->mode_info_context->mbmi.segment_id = 0;
}
}
// Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
if (cpi->zbin_mode_boost_enabled)
{
- if ((xd->mbmi.mode == ZEROMV) && (xd->mbmi.ref_frame != LAST_FRAME))
+ if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME))
cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
else
cpi->zbin_mode_boost = 0;
@@ -1111,15 +1223,13 @@ int vp8cx_encode_inter_macroblock
vp8cx_mb_init_quantizer(cpi, x);
}
- cpi->count_mb_ref_frame_usage[xd->mbmi.ref_frame] ++;
+ cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;
- if (xd->mbmi.ref_frame == INTRA_FRAME)
+ if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
{
- x->e_mbd.mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
-
vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
- if (xd->mbmi.mode == B_PRED)
+ if (xd->mode_info_context->mbmi.mode == B_PRED)
{
vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
}
@@ -1135,36 +1245,25 @@ int vp8cx_encode_inter_macroblock
MV best_ref_mv;
MV nearest, nearby;
int mdcounts[4];
+ int ref_fb_idx;
vp8_find_near_mvs(xd, xd->mode_info_context,
- &nearest, &nearby, &best_ref_mv, mdcounts, xd->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
+ &nearest, &nearby, &best_ref_mv, mdcounts, xd->mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
vp8_build_uvmvs(xd, cpi->common.full_pixel);
- // store motion vectors in our motion vector list
- if (xd->mbmi.ref_frame == LAST_FRAME)
- {
- // Set up pointers for this macro block into the previous frame recon buffer
- xd->pre.y_buffer = cpi->common.last_frame.y_buffer + recon_yoffset;
- xd->pre.u_buffer = cpi->common.last_frame.u_buffer + recon_uvoffset;
- xd->pre.v_buffer = cpi->common.last_frame.v_buffer + recon_uvoffset;
- }
- else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
- {
- // Set up pointers for this macro block into the golden frame recon buffer
- xd->pre.y_buffer = cpi->common.golden_frame.y_buffer + recon_yoffset;
- xd->pre.u_buffer = cpi->common.golden_frame.u_buffer + recon_uvoffset;
- xd->pre.v_buffer = cpi->common.golden_frame.v_buffer + recon_uvoffset;
- }
+ if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
+ ref_fb_idx = cpi->common.lst_fb_idx;
+ else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
+ ref_fb_idx = cpi->common.gld_fb_idx;
else
- {
- // Set up pointers for this macro block into the alternate reference frame recon buffer
- xd->pre.y_buffer = cpi->common.alt_ref_frame.y_buffer + recon_yoffset;
- xd->pre.u_buffer = cpi->common.alt_ref_frame.u_buffer + recon_uvoffset;
- xd->pre.v_buffer = cpi->common.alt_ref_frame.v_buffer + recon_uvoffset;
- }
+ ref_fb_idx = cpi->common.alt_fb_idx;
+
+ xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
+ xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
+ xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
- if (xd->mbmi.mode == SPLITMV)
+ if (xd->mode_info_context->mbmi.mode == SPLITMV)
{
int i;
@@ -1177,19 +1276,19 @@ int vp8cx_encode_inter_macroblock
}
}
}
- else if (xd->mbmi.mode == NEWMV)
+ else if (xd->mode_info_context->mbmi.mode == NEWMV)
{
cpi->MVcount[0][mv_max+((xd->block[0].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
cpi->MVcount[1][mv_max+((xd->block[0].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
}
- if (!x->skip && !x->e_mbd.mbmi.force_no_skip)
+ if (!x->skip && !x->e_mbd.mode_info_context->mbmi.force_no_skip)
{
vp8_encode_inter16x16(IF_RTCD(&cpi->rtcd), x);
// Clear mb_skip_coeff if mb_no_coeff_skip is not set
if (!cpi->common.mb_no_coeff_skip)
- xd->mbmi.mb_skip_coeff = 0;
+ xd->mode_info_context->mbmi.mb_skip_coeff = 0;
}
else
@@ -1202,19 +1301,19 @@ int vp8cx_encode_inter_macroblock
{
if (cpi->common.mb_no_coeff_skip)
{
- if (xd->mbmi.mode != B_PRED && xd->mbmi.mode != SPLITMV)
- xd->mbmi.dc_diff = 0;
+ if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
+ xd->mode_info_context->mbmi.dc_diff = 0;
else
- xd->mbmi.dc_diff = 1;
+ xd->mode_info_context->mbmi.dc_diff = 1;
- xd->mbmi.mb_skip_coeff = 1;
+ xd->mode_info_context->mbmi.mb_skip_coeff = 1;
cpi->skip_true_count ++;
- vp8_fix_contexts(cpi, xd);
+ vp8_fix_contexts(xd);
}
else
{
vp8_stuff_mb(cpi, xd, t);
- xd->mbmi.mb_skip_coeff = 0;
+ xd->mode_info_context->mbmi.mb_skip_coeff = 0;
cpi->skip_false_count ++;
}
}
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 403d0204a..1c72b90f1 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -52,8 +53,6 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x, BLOCK
x->quantize_b(be, b);
- x->e_mbd.mbmi.mb_skip_coeff &= (!b->eob);
-
vp8_inverse_transform_b(IF_RTCD(&rtcd->common->idct), b, 32);
RECON_INVOKE(&rtcd->common->recon, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
@@ -65,11 +64,9 @@ void vp8_encode_intra4x4block_rd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x, BL
ENCODEMB_INVOKE(&rtcd->encodemb, subb)(be, b, 16);
- x->short_fdct4x4rd(be->src_diff, be->coeff, 32);
-
- x->quantize_brd(be, b);
+ x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
- x->e_mbd.mbmi.mb_skip_coeff &= (!b->eob);
+ x->quantize_b(be, b);
IDCT_INVOKE(&rtcd->common->idct, idct16)(b->dqcoeff, b->diff, 32);
@@ -108,8 +105,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
#if !(CONFIG_REALTIME_ONLY)
#if 1
-
- if (x->optimize && x->rddiv > 1)
+ if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
vp8_optimize_mby(x, rtcd);
#endif
@@ -117,14 +113,15 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
- vp8_recon16x16mby(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
+ RECON_INVOKE(&rtcd->common->recon, recon_mby)
+ (IF_RTCD(&rtcd->common->recon), &x->e_mbd);
// make sure block modes are set the way we want them for context updates
for (b = 0; b < 16; b++)
{
BLOCKD *d = &x->e_mbd.block[b];
- switch (x->e_mbd.mbmi.mode)
+ switch (x->e_mbd.mode_info_context->mbmi.mode)
{
case DC_PRED:
@@ -155,23 +152,21 @@ void vp8_encode_intra16x16mbyrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, x->src.y_stride);
- vp8_transform_intra_mbyrd(x);
-
- x->e_mbd.mbmi.mb_skip_coeff = 1;
-
- vp8_quantize_mbyrd(x);
+ vp8_transform_intra_mby(x);
+ vp8_quantize_mby(x);
vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
- vp8_recon16x16mby(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
+ RECON_INVOKE(&rtcd->common->recon, recon_mby)
+ (IF_RTCD(&rtcd->common->recon), &x->e_mbd);
// make sure block modes are set the way we want them for context updates
for (b = 0; b < 16; b++)
{
BLOCKD *d = &x->e_mbd.block[b];
- switch (x->e_mbd.mbmi.mode)
+ switch (x->e_mbd.mode_info_context->mbmi.mode)
{
case DC_PRED:
@@ -207,7 +202,7 @@ void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
#if !(CONFIG_REALTIME_ONLY)
#if 1
- if (x->optimize && x->rddiv > 1)
+ if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
vp8_optimize_mbuv(x, rtcd);
#endif
@@ -224,11 +219,9 @@ void vp8_encode_intra16x16mbuvrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
- vp8_transform_mbuvrd(x);
-
- vp8_quantize_mbuvrd(x);
-
+ vp8_transform_mbuv(x);
+ vp8_quantize_mbuv(x);
vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
diff --git a/vp8/encoder/encodeintra.h b/vp8/encoder/encodeintra.h
index 4a43ab275..5be23d12b 100644
--- a/vp8/encoder/encodeintra.h
+++ b/vp8/encoder/encodeintra.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index bb43d3d5b..043eac219 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -12,6 +13,7 @@
#include "encodemb.h"
#include "reconinter.h"
#include "quantize.h"
+#include "tokenize.h"
#include "invtrans.h"
#include "recon.h"
#include "reconintra.h"
@@ -119,19 +121,11 @@ void vp8_transform_mbuv(MACROBLOCK *x)
for (i = 16; i < 24; i += 2)
{
- x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
+ x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+ &x->block[i].coeff[0], 16);
}
}
-void vp8_transform_mbuvrd(MACROBLOCK *x)
-{
- int i;
-
- for (i = 16; i < 24; i += 2)
- {
- x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
- }
-}
void vp8_transform_intra_mby(MACROBLOCK *x)
{
@@ -139,32 +133,19 @@ void vp8_transform_intra_mby(MACROBLOCK *x)
for (i = 0; i < 16; i += 2)
{
- x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
+ x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+ &x->block[i].coeff[0], 32);
}
// build dc block from 16 y dc values
vp8_build_dcblock(x);
// do 2nd order transform on the dc block
- x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
+ x->short_walsh4x4(&x->block[24].src_diff[0],
+ &x->block[24].coeff[0], 8);
}
-void vp8_transform_intra_mbyrd(MACROBLOCK *x)
-{
- int i;
-
- for (i = 0; i < 16; i += 2)
- {
- x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
- }
-
- // build dc block from 16 y dc values
- vp8_build_dcblock(x);
-
- // do 2nd order transform on the dc block
- x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
-}
void vp8_transform_mb(MACROBLOCK *x)
{
@@ -172,21 +153,24 @@ void vp8_transform_mb(MACROBLOCK *x)
for (i = 0; i < 16; i += 2)
{
- x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
+ x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+ &x->block[i].coeff[0], 32);
}
// build dc block from 16 y dc values
- if (x->e_mbd.mbmi.mode != SPLITMV)
+ if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
vp8_build_dcblock(x);
for (i = 16; i < 24; i += 2)
{
- x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
+ x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+ &x->block[i].coeff[0], 16);
}
// do 2nd order transform on the dc block
- if (x->e_mbd.mbmi.mode != SPLITMV)
- x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
+ if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
+ x->short_walsh4x4(&x->block[24].src_diff[0],
+ &x->block[24].coeff[0], 8);
}
@@ -196,39 +180,19 @@ void vp8_transform_mby(MACROBLOCK *x)
for (i = 0; i < 16; i += 2)
{
- x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
+ x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+ &x->block[i].coeff[0], 32);
}
// build dc block from 16 y dc values
- if (x->e_mbd.mbmi.mode != SPLITMV)
+ if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
{
vp8_build_dcblock(x);
- x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
+ x->short_walsh4x4(&x->block[24].src_diff[0],
+ &x->block[24].coeff[0], 8);
}
}
-void vp8_transform_mbrd(MACROBLOCK *x)
-{
- int i;
-
- for (i = 0; i < 16; i += 2)
- {
- x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
- }
-
- // build dc block from 16 y dc values
- if (x->e_mbd.mbmi.mode != SPLITMV)
- vp8_build_dcblock(x);
-
- for (i = 16; i < 24; i += 2)
- {
- x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
- }
-
- // do 2nd order transform on the dc block
- if (x->e_mbd.mbmi.mode != SPLITMV)
- x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
-}
void vp8_stuff_inter16x16(MACROBLOCK *x)
{
@@ -265,787 +229,394 @@ void vp8_stuff_inter16x16(MACROBLOCK *x)
}
#if !(CONFIG_REALTIME_ONLY)
-extern const TOKENEXTRA vp8_dct_value_tokens[DCT_MAX_VALUE*2];
-extern const TOKENEXTRA *vp8_dct_value_tokens_ptr;
-extern int vp8_dct_value_cost[DCT_MAX_VALUE*2];
-extern int *vp8_dct_value_cost_ptr;
-
-static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
-{
- int c = !type; /* start at coef 0, unless Y with Y2 */
- int eob = b->eob;
- int pt ; /* surrounding block/prev coef predictor */
- int cost = 0;
- short *qcoeff_ptr = b->qcoeff;
-
- VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
-
-# define QC( I) ( qcoeff_ptr [vp8_default_zig_zag1d[I]] )
-
- for (; c < eob; c++)
- {
- int v = QC(c);
- int t = vp8_dct_value_tokens_ptr[v].Token;
- cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [t];
- cost += vp8_dct_value_cost_ptr[v];
- pt = vp8_prev_token_class[t];
- }
+#define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
+#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
-# undef QC
+typedef struct vp8_token_state vp8_token_state;
- if (c < 16)
- cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN];
+struct vp8_token_state{
+ int rate;
+ int error;
+ signed char next;
+ signed char token;
+ short qc;
+};
- return cost;
-}
+// TODO: experiments to find optimal multiple numbers
+#define Y1_RD_MULT 1
+#define UV_RD_MULT 1
+#define Y2_RD_MULT 4
-static int mbycost_coeffs(MACROBLOCK *mb)
+static const int plane_rd_mult[4]=
{
- int cost = 0;
- int b;
- TEMP_CONTEXT t;
- int type = 0;
-
- MACROBLOCKD *x = &mb->e_mbd;
-
- vp8_setup_temp_context(&t, x->above_context[Y1CONTEXT], x->left_context[Y1CONTEXT], 4);
-
- if (x->mbmi.mode == SPLITMV)
- type = 3;
-
- for (b = 0; b < 16; b++)
- cost += cost_coeffs(mb, x->block + b, type,
- t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
-
- return cost;
-}
-
-#define RDFUNC(RM,DM,R,D,target_rd) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
-
-void vp8_optimize_b(MACROBLOCK *x, int i, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, const VP8_ENCODER_RTCD *rtcd)
+ Y1_RD_MULT,
+ Y2_RD_MULT,
+ UV_RD_MULT,
+ Y1_RD_MULT
+};
+
+void vp8_optimize_b(MACROBLOCK *mb, int ib, int type,
+ ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
+ const VP8_ENCODER_RTCD *rtcd)
{
- BLOCK *b = &x->block[i];
- BLOCKD *bd = &x->e_mbd.block[i];
- short *dequant_ptr = &bd->dequant[0][0];
- int nzpos[16] = {0};
- short saved_qcoefs[16];
- short saved_dqcoefs[16];
- int baserate, baseerror, baserd;
- int rate, error, thisrd;
- int k;
- int nzcoefcount = 0;
- int nc, bestnc = 0;
- int besteob;
-
- // count potential coefficient to be optimized
- for (k = !type; k < 16; k++)
- {
- int qcoef = abs(bd->qcoeff[k]);
- int coef = abs(b->coeff[k]);
- int dq = dequant_ptr[k];
-
- if (qcoef && (qcoef * dq > coef) && (qcoef * dq < coef + dq))
- {
- nzpos[nzcoefcount] = k;
- nzcoefcount++;
- }
- }
-
- // if nothing here, do nothing for this block.
- if (!nzcoefcount)
- {
- *a = *l = (bd->eob != !type);
- return;
- }
-
- // save a copy of quantized coefficients
- vpx_memcpy(saved_qcoefs, bd->qcoeff, 32);
- vpx_memcpy(saved_dqcoefs, bd->dqcoeff, 32);
-
- besteob = bd->eob;
- baserate = cost_coeffs(x, bd, type, a, l);
- baseerror = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 2;
- baserd = RDFUNC(x->rdmult, x->rddiv, baserate, baseerror, 100);
+ BLOCK *b;
+ BLOCKD *d;
+ vp8_token_state tokens[17][2];
+ unsigned best_mask[2];
+ const short *dequant_ptr;
+ const short *coeff_ptr;
+ short *qcoeff_ptr;
+ short *dqcoeff_ptr;
+ int eob;
+ int i0;
+ int rc;
+ int x;
+ int sz;
+ int next;
+ int path;
+ int rdmult;
+ int rddiv;
+ int final_eob;
+ int rd_cost0;
+ int rd_cost1;
+ int rate0;
+ int rate1;
+ int error0;
+ int error1;
+ int t0;
+ int t1;
+ int best;
+ int band;
+ int pt;
+ int i;
+ int err_mult = plane_rd_mult[type];
- for (nc = 1; nc < (1 << nzcoefcount); nc++)
- {
- //reset coefficients
- vpx_memcpy(bd->qcoeff, saved_qcoefs, 32);
- vpx_memcpy(bd->dqcoeff, saved_dqcoefs, 32);
+ b = &mb->block[ib];
+ d = &mb->e_mbd.block[ib];
- for (k = 0; k < nzcoefcount; k++)
- {
- int pos = nzpos[k];
+ /* Enable this to test the effect of RDO as a replacement for the dynamic
+ * zero bin instead of an augmentation of it.
+ */
+#if 0
+ vp8_strict_quantize_b(b, d);
+#endif
- if ((nc & (1 << k)))
+ dequant_ptr = d->dequant;
+ coeff_ptr = b->coeff;
+ qcoeff_ptr = d->qcoeff;
+ dqcoeff_ptr = d->dqcoeff;
+ i0 = !type;
+ eob = d->eob;
+
+ /* Now set up a Viterbi trellis to evaluate alternative roundings. */
+ /* TODO: These should vary with the block type, since the quantizer does. */
+ rdmult = (mb->rdmult << 2)*err_mult;
+ rddiv = mb->rddiv;
+ best_mask[0] = best_mask[1] = 0;
+ /* Initialize the sentinel node of the trellis. */
+ tokens[eob][0].rate = 0;
+ tokens[eob][0].error = 0;
+ tokens[eob][0].next = 16;
+ tokens[eob][0].token = DCT_EOB_TOKEN;
+ tokens[eob][0].qc = 0;
+ *(tokens[eob] + 1) = *(tokens[eob] + 0);
+ next = eob;
+ for (i = eob; i-- > i0;)
+ {
+ int base_bits;
+ int d2;
+ int dx;
+
+ rc = vp8_default_zig_zag1d[i];
+ x = qcoeff_ptr[rc];
+ /* Only add a trellis state for non-zero coefficients. */
+ if (x)
+ {
+ int shortcut=0;
+ error0 = tokens[next][0].error;
+ error1 = tokens[next][1].error;
+ /* Evaluate the first possibility for this state. */
+ rate0 = tokens[next][0].rate;
+ rate1 = tokens[next][1].rate;
+ t0 = (vp8_dct_value_tokens_ptr + x)->Token;
+ /* Consider both possible successor states. */
+ if (next < 16)
{
- int cur_qcoef = bd->qcoeff[pos];
-
- if (cur_qcoef < 0)
- {
- bd->qcoeff[pos]++;
- bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
- }
- else
- {
- bd->qcoeff[pos]--;
- bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
- }
+ band = vp8_coef_bands[i + 1];
+ pt = vp8_prev_token_class[t0];
+ rate0 +=
+ mb->token_costs[type][band][pt][tokens[next][0].token];
+ rate1 +=
+ mb->token_costs[type][band][pt][tokens[next][1].token];
}
- }
-
- {
- int eob = -1;
- int rc;
- int m;
-
- for (m = 0; m < 16; m++)
+ rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
+ rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
+ if (rd_cost0 == rd_cost1)
{
- rc = vp8_default_zig_zag1d[m];
-
- if (bd->qcoeff[rc])
- eob = m;
+ rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
+ rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
}
-
- bd->eob = eob + 1;
- }
-
- rate = cost_coeffs(x, bd, type, a, l);
- error = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 2;
- thisrd = RDFUNC(x->rdmult, x->rddiv, rate, error, 100);
-
- if (thisrd < baserd)
- {
- baserd = thisrd;
- bestnc = nc;
- besteob = bd->eob;
- }
- }
-
- //reset coefficients
- vpx_memcpy(bd->qcoeff, saved_qcoefs, 32);
- vpx_memcpy(bd->dqcoeff, saved_dqcoefs, 32);
-
- if (bestnc)
- {
- for (k = 0; k < nzcoefcount; k++)
- {
- int pos = nzpos[k];
-
- if (bestnc & (1 << k))
+ /* And pick the best. */
+ best = rd_cost1 < rd_cost0;
+ base_bits = *(vp8_dct_value_cost_ptr + x);
+ dx = dqcoeff_ptr[rc] - coeff_ptr[rc];
+ d2 = dx*dx;
+ tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
+ tokens[i][0].error = d2 + (best ? error1 : error0);
+ tokens[i][0].next = next;
+ tokens[i][0].token = t0;
+ tokens[i][0].qc = x;
+ best_mask[0] |= best << i;
+ /* Evaluate the second possibility for this state. */
+ rate0 = tokens[next][0].rate;
+ rate1 = tokens[next][1].rate;
+
+ if((abs(x)*dequant_ptr[rc]>abs(coeff_ptr[rc])) &&
+ (abs(x)*dequant_ptr[rc]<abs(coeff_ptr[rc])+dequant_ptr[rc]))
+ shortcut = 1;
+ else
+ shortcut = 0;
+
+ if(shortcut)
{
- int cur_qcoef = bd->qcoeff[pos];
-
- if (cur_qcoef < 0)
- {
- bd->qcoeff[pos]++;
- bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
- }
- else
- {
- bd->qcoeff[pos]--;
- bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
- }
+ sz = -(x < 0);
+ x -= 2*sz + 1;
}
- }
-
-#if 0
- {
- int eob = -1;
- int rc;
- int m;
- for (m = 0; m < 16; m++)
+ /* Consider both possible successor states. */
+ if (!x)
{
- rc = vp8_default_zig_zag1d[m];
-
- if (bd->qcoeff[rc])
- eob = m;
+ /* If we reduced this coefficient to zero, check to see if
+ * we need to move the EOB back here.
+ */
+ t0 = tokens[next][0].token == DCT_EOB_TOKEN ?
+ DCT_EOB_TOKEN : ZERO_TOKEN;
+ t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
+ DCT_EOB_TOKEN : ZERO_TOKEN;
}
-
- bd->eob = eob + 1;
- }
-#endif
- }
-
-#if 1
- bd->eob = besteob;
-#endif
-#if 0
- {
- int eob = -1;
- int rc;
- int m;
-
- for (m = 0; m < 16; m++)
- {
- rc = vp8_default_zig_zag1d[m];
-
- if (bd->qcoeff[rc])
- eob = m;
- }
-
- bd->eob = eob + 1;
- }
-
-#endif
- *a = *l = (bd->eob != !type);
- return;
-}
-
-void vp8_optimize_bplus(MACROBLOCK *x, int i, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, const VP8_ENCODER_RTCD *rtcd)
-{
- BLOCK *b = &x->block[i];
- BLOCKD *bd = &x->e_mbd.block[i];
- short *dequant_ptr = &bd->dequant[0][0];
- int nzpos[16] = {0};
- short saved_qcoefs[16];
- short saved_dqcoefs[16];
- int baserate, baseerror, baserd;
- int rate, error, thisrd;
- int k;
- int nzcoefcount = 0;
- int nc, bestnc = 0;
- int besteob;
-
- // count potential coefficient to be optimized
- for (k = !type; k < 16; k++)
- {
- int qcoef = abs(bd->qcoeff[k]);
- int coef = abs(b->coeff[k]);
- int dq = dequant_ptr[k];
-
- if (qcoef && (qcoef * dq < coef) && (coef < (qcoef * dq + dq)))
- {
- nzpos[nzcoefcount] = k;
- nzcoefcount++;
- }
- }
-
- // if nothing here, do nothing for this block.
- if (!nzcoefcount)
- {
- //do not update context, we need do the other half.
- //*a = *l = (bd->eob != !type);
- return;
- }
-
- // save a copy of quantized coefficients
- vpx_memcpy(saved_qcoefs, bd->qcoeff, 32);
- vpx_memcpy(saved_dqcoefs, bd->dqcoeff, 32);
-
- besteob = bd->eob;
- baserate = cost_coeffs(x, bd, type, a, l);
- baseerror = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 2;
- baserd = RDFUNC(x->rdmult, x->rddiv, baserate, baseerror, 100);
-
- for (nc = 1; nc < (1 << nzcoefcount); nc++)
- {
- //reset coefficients
- vpx_memcpy(bd->qcoeff, saved_qcoefs, 32);
- vpx_memcpy(bd->dqcoeff, saved_dqcoefs, 32);
-
- for (k = 0; k < nzcoefcount; k++)
- {
- int pos = nzpos[k];
-
- if ((nc & (1 << k)))
+ else
{
- int cur_qcoef = bd->qcoeff[pos];
-
- if (cur_qcoef < 0)
+ t0=t1 = (vp8_dct_value_tokens_ptr + x)->Token;
+ }
+ if (next < 16)
+ {
+ band = vp8_coef_bands[i + 1];
+ if(t0!=DCT_EOB_TOKEN)
{
- bd->qcoeff[pos]--;
- bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
+ pt = vp8_prev_token_class[t0];
+ rate0 += mb->token_costs[type][band][pt][
+ tokens[next][0].token];
}
- else
+ if(t1!=DCT_EOB_TOKEN)
{
- bd->qcoeff[pos]++;
- bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
+ pt = vp8_prev_token_class[t1];
+ rate1 += mb->token_costs[type][band][pt][
+ tokens[next][1].token];
}
}
- }
-
- {
- int eob = -1;
- int rc;
- int m;
- for (m = 0; m < 16; m++)
+ rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
+ rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
+ if (rd_cost0 == rd_cost1)
{
- rc = vp8_default_zig_zag1d[m];
-
- if (bd->qcoeff[rc])
- eob = m;
+ rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
+ rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
}
+ /* And pick the best. */
+ best = rd_cost1 < rd_cost0;
+ base_bits = *(vp8_dct_value_cost_ptr + x);
- bd->eob = eob + 1;
- }
-
- rate = cost_coeffs(x, bd, type, a, l);
- error = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 2;
- thisrd = RDFUNC(x->rdmult, x->rddiv, rate, error, 100);
-
- if (thisrd < baserd)
- {
- baserd = thisrd;
- bestnc = nc;
- besteob = bd->eob;
- }
- }
-
- //reset coefficients
- vpx_memcpy(bd->qcoeff, saved_qcoefs, 32);
- vpx_memcpy(bd->dqcoeff, saved_dqcoefs, 32);
-
- if (bestnc)
- {
- for (k = 0; k < nzcoefcount; k++)
- {
- int pos = nzpos[k];
-
- if (bestnc & (1 << k))
+ if(shortcut)
{
- int cur_qcoef = bd->qcoeff[pos];
-
- if (cur_qcoef < 0)
- {
- bd->qcoeff[pos]++;
- bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
- }
- else
- {
- bd->qcoeff[pos]--;
- bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
- }
+ dx -= (dequant_ptr[rc] + sz) ^ sz;
+ d2 = dx*dx;
}
- }
- }
-
- bd->eob = besteob;
- //do not update context, we need do the other half.
- //*a = *l = (bd->eob != !type);
- return;
-}
-
-void vp8_optimize_y2b(MACROBLOCK *x, int i, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, const VP8_ENCODER_RTCD *rtcd)
-{
-
- BLOCK *b = &x->block[i];
- BLOCKD *bd = &x->e_mbd.block[i];
- short *dequant_ptr = &bd->dequant[0][0];
-
- int baserate, baseerror, baserd;
- int rate, error, thisrd;
- int k;
-
- if (bd->eob == 0)
- return;
-
- baserate = cost_coeffs(x, bd, type, a, l);
- baseerror = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 4;
- baserd = RDFUNC(x->rdmult, x->rddiv, baserate, baseerror, 100);
-
- for (k = 0; k < 16; k++)
- {
- int cur_qcoef = bd->qcoeff[k];
-
- if (!cur_qcoef)
- continue;
-
- if (cur_qcoef < 0)
- {
- bd->qcoeff[k]++;
- bd->dqcoeff[k] = bd->qcoeff[k] * dequant_ptr[k];
- }
+ tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
+ tokens[i][1].error = d2 + (best ? error1 : error0);
+ tokens[i][1].next = next;
+ tokens[i][1].token =best?t1:t0;
+ tokens[i][1].qc = x;
+ best_mask[1] |= best << i;
+ /* Finally, make this the new head of the trellis. */
+ next = i;
+ }
+ /* There's no choice to make for a zero coefficient, so we don't
+ * add a new trellis node, but we do need to update the costs.
+ */
else
{
- bd->qcoeff[k]--;
- bd->dqcoeff[k] = bd->qcoeff[k] * dequant_ptr[k];
- }
-
- if (bd->qcoeff[k] == 0)
- {
- int eob = -1;
- int rc;
- int l;
-
- for (l = 0; l < 16; l++)
+ band = vp8_coef_bands[i + 1];
+ t0 = tokens[next][0].token;
+ t1 = tokens[next][1].token;
+ /* Update the cost of each path if we're past the EOB token. */
+ if (t0 != DCT_EOB_TOKEN)
{
- rc = vp8_default_zig_zag1d[l];
-
- if (bd->qcoeff[rc])
- eob = l;
+ tokens[next][0].rate += mb->token_costs[type][band][0][t0];
+ tokens[next][0].token = ZERO_TOKEN;
}
-
- bd->eob = eob + 1;
- }
-
- rate = cost_coeffs(x, bd, type, a, l);
- error = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 4;
- thisrd = RDFUNC(x->rdmult, x->rddiv, rate, error, 100);
-
- if (thisrd > baserd)
- {
- bd->qcoeff[k] = cur_qcoef;
- bd->dqcoeff[k] = cur_qcoef * dequant_ptr[k];
- }
- else
- {
- baserd = thisrd;
- }
-
- }
-
- {
- int eob = -1;
- int rc;
-
- for (k = 0; k < 16; k++)
- {
- rc = vp8_default_zig_zag1d[k];
-
- if (bd->qcoeff[rc])
- eob = k;
+ if (t1 != DCT_EOB_TOKEN)
+ {
+ tokens[next][1].rate += mb->token_costs[type][band][0][t1];
+ tokens[next][1].token = ZERO_TOKEN;
+ }
+ /* Don't update next, because we didn't add a new node. */
}
-
- bd->eob = eob + 1;
}
- return;
+ /* Now pick the best path through the whole trellis. */
+ band = vp8_coef_bands[i + 1];
+ VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
+ rate0 = tokens[next][0].rate;
+ rate1 = tokens[next][1].rate;
+ error0 = tokens[next][0].error;
+ error1 = tokens[next][1].error;
+ t0 = tokens[next][0].token;
+ t1 = tokens[next][1].token;
+ rate0 += mb->token_costs[type][band][pt][t0];
+ rate1 += mb->token_costs[type][band][pt][t1];
+ rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
+ rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
+ if (rd_cost0 == rd_cost1)
+ {
+ rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
+ rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
+ }
+ best = rd_cost1 < rd_cost0;
+ final_eob = i0 - 1;
+ for (i = next; i < eob; i = next)
+ {
+ x = tokens[i][best].qc;
+ if (x)
+ final_eob = i;
+ rc = vp8_default_zig_zag1d[i];
+ qcoeff_ptr[rc] = x;
+ dqcoeff_ptr[rc] = x * dequant_ptr[rc];
+ next = tokens[i][best].next;
+ best = (best_mask[best] >> i) & 1;
+ }
+ final_eob++;
+
+ d->eob = final_eob;
+ *a = *l = (d->eob != !type);
}
-
void vp8_optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
{
int b;
- TEMP_CONTEXT t, t2;
- int type = 0;
+ int type;
+ int has_2nd_order;
+ ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT *ta;
+ ENTROPY_CONTEXT *tl;
+
+ vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
- vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT], x->e_mbd.left_context[Y1CONTEXT], 4);
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
- if (x->e_mbd.mbmi.mode == SPLITMV || x->e_mbd.mbmi.mode == B_PRED)
- type = 3;
+ has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
+ && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
+ type = has_2nd_order ? 0 : 3;
for (b = 0; b < 16; b++)
{
- //vp8_optimize_bplus(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
- vp8_optimize_b(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
+ vp8_optimize_b(x, b, type,
+ ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
}
- vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT], x->e_mbd.left_context[UCONTEXT], 2);
- vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT], x->e_mbd.left_context[VCONTEXT], 2);
-
for (b = 16; b < 20; b++)
{
- //vp8_optimize_bplus(x, b, vp8_block2type[b], t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
- vp8_optimize_b(x, b, vp8_block2type[b], t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
+ vp8_optimize_b(x, b, vp8_block2type[b],
+ ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
}
for (b = 20; b < 24; b++)
{
- //vp8_optimize_bplus(x, b, vp8_block2type[b], t2.a + vp8_block2above[b], t2.l + vp8_block2left[b]);
- vp8_optimize_b(x, b, vp8_block2type[b], t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd);
- }
-}
-
-
-
-void vp8_super_slow_yquant_optimization(MACROBLOCK *x, int type, const VP8_ENCODER_RTCD *rtcd)
-{
- BLOCK *b = &x->block[0];
- BLOCKD *bd = &x->e_mbd.block[0];
- short *dequant_ptr = &bd->dequant[0][0];
- struct
- {
- int block;
- int pos;
- } nzpos[256];
- short saved_qcoefs[256];
- short saved_dqcoefs[256];
- short *coef_ptr = x->coeff;
- short *qcoef_ptr = x->e_mbd.qcoeff;
- short *dqcoef_ptr = x->e_mbd.dqcoeff;
-
- int baserate, baseerror, baserd;
- int rate, error, thisrd;
- int i, k;
- int nzcoefcount = 0;
- int nc, bestnc = 0;
- int besteob;
-
- //this code has assumption in macroblock coeff buffer layout
- for (i = 0; i < 16; i++)
- {
- // count potential coefficient to be optimized
- for (k = !type; k < 16; k++)
- {
- int qcoef = abs(qcoef_ptr[i*16 + k]);
- int coef = abs(coef_ptr[i*16 + k]);
- int dq = dequant_ptr[k];
-
- if (qcoef && (qcoef * dq > coef) && (qcoef * dq < coef + dq))
- {
- nzpos[nzcoefcount].block = i;
- nzpos[nzcoefcount].pos = k;
- nzcoefcount++;
- }
- }
- }
-
- // if nothing here, do nothing for this macro_block.
- if (!nzcoefcount || nzcoefcount > 15)
- {
- return;
- }
-
- /******************************************************************************
- looking from each coeffient's perspective, each identifed coefficent above could
- have 2 values:roundeddown(x) and roundedup(x). Therefore the total number of
- different states is less than 2**nzcoefcount.
- ******************************************************************************/
- // save the qunatized coefficents and dequantized coefficicents
- vpx_memcpy(saved_qcoefs, x->e_mbd.qcoeff, 256);
- vpx_memcpy(saved_dqcoefs, x->e_mbd.dqcoeff, 256);
-
- baserate = mbycost_coeffs(x);
- baseerror = ENCODEMB_INVOKE(&rtcd->encodemb, mberr)(x, !type);
- baserd = RDFUNC(x->rdmult, x->rddiv, baserate, baseerror, 100);
-
- for (nc = 1; nc < (1 << nzcoefcount); nc++)
- {
- //reset coefficients
- vpx_memcpy(x->e_mbd.qcoeff, saved_qcoefs, 256);
- vpx_memcpy(x->e_mbd.dqcoeff, saved_dqcoefs, 256);
-
- for (k = 0; k < nzcoefcount; k++)
- {
- int bk = nzpos[k].block;
- int pos = nzpos[k].pos;
- int mbkpos = bk * 16 + pos;
-
- if ((nc & (1 << k)))
- {
- int cur_qcoef = x->e_mbd.qcoeff[mbkpos];
-
- if (cur_qcoef < 0)
- {
- x->e_mbd.qcoeff[mbkpos]++;
- x->e_mbd.dqcoeff[mbkpos] = x->e_mbd.qcoeff[mbkpos] * dequant_ptr[pos];
- }
- else
- {
- x->e_mbd.qcoeff[mbkpos]--;
- x->e_mbd.dqcoeff[mbkpos] = x->e_mbd.qcoeff[mbkpos] * dequant_ptr[pos];
- }
- }
- }
-
- for (i = 0; i < 16; i++)
- {
- BLOCKD *bd = &x->e_mbd.block[i];
- {
- int eob = -1;
- int rc;
- int l;
-
- for (l = 0; l < 16; l++)
- {
- rc = vp8_default_zig_zag1d[l];
-
- if (bd->qcoeff[rc])
- eob = l;
- }
-
- bd->eob = eob + 1;
- }
- }
-
- rate = mbycost_coeffs(x);
- error = ENCODEMB_INVOKE(&rtcd->encodemb, mberr)(x, !type);;
- thisrd = RDFUNC(x->rdmult, x->rddiv, rate, error, 100);
-
- if (thisrd < baserd)
- {
- baserd = thisrd;
- bestnc = nc;
- besteob = bd->eob;
- }
- }
-
- //reset coefficients
- vpx_memcpy(x->e_mbd.qcoeff, saved_qcoefs, 256);
- vpx_memcpy(x->e_mbd.dqcoeff, saved_dqcoefs, 256);
-
- if (bestnc)
- {
- for (k = 0; k < nzcoefcount; k++)
- {
- int bk = nzpos[k].block;
- int pos = nzpos[k].pos;
- int mbkpos = bk * 16 + pos;
-
- if ((nc & (1 << k)))
- {
- int cur_qcoef = x->e_mbd.qcoeff[mbkpos];
-
- if (cur_qcoef < 0)
- {
- x->e_mbd.qcoeff[mbkpos]++;
- x->e_mbd.dqcoeff[mbkpos] = x->e_mbd.qcoeff[mbkpos] * dequant_ptr[pos];
- }
- else
- {
- x->e_mbd.qcoeff[mbkpos]--;
- x->e_mbd.dqcoeff[mbkpos] = x->e_mbd.qcoeff[mbkpos] * dequant_ptr[pos];
- }
- }
- }
- }
-
- for (i = 0; i < 16; i++)
- {
- BLOCKD *bd = &x->e_mbd.block[i];
- {
- int eob = -1;
- int rc;
- int l;
-
- for (l = 0; l < 16; l++)
- {
- rc = vp8_default_zig_zag1d[l];
-
- if (bd->qcoeff[rc])
- eob = l;
- }
-
- bd->eob = eob + 1;
- }
+ vp8_optimize_b(x, b, vp8_block2type[b],
+ ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
}
- return;
-}
-static void vp8_find_mb_skip_coef(MACROBLOCK *x)
-{
- int i;
-
- x->e_mbd.mbmi.mb_skip_coeff = 1;
-
- if (x->e_mbd.mbmi.mode != B_PRED && x->e_mbd.mbmi.mode != SPLITMV)
- {
- for (i = 0; i < 16; i++)
- {
- x->e_mbd.mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2);
- }
-
- for (i = 16; i < 25; i++)
- {
- x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
- }
- }
- else
+ if (has_2nd_order)
{
- for (i = 0; i < 24; i++)
- {
- x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
- }
+ b=24;
+ vp8_optimize_b(x, b, vp8_block2type[b],
+ ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
}
}
-void vp8_optimize_mb_slow(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
+void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
{
int b;
- TEMP_CONTEXT t, t2;
- int type = 0;
+ int type;
+ int has_2nd_order;
+ ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT *ta;
+ ENTROPY_CONTEXT *tl;
- vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT], x->e_mbd.left_context[Y1CONTEXT], 4);
+ if (!x->e_mbd.above_context)
+ return;
- if (x->e_mbd.mbmi.mode == SPLITMV || x->e_mbd.mbmi.mode == B_PRED)
- type = 3;
+ if (!x->e_mbd.left_context)
+ return;
- vp8_super_slow_yquant_optimization(x, type, rtcd);
- /*
- for(b=0;b<16;b++)
- {
- vp8_optimize_b(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
- }
- */
+ vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
- vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT], x->e_mbd.left_context[UCONTEXT], 2);
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
- for (b = 16; b < 20; b++)
- {
- vp8_optimize_b(x, b, vp8_block2type[b], t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
- }
-
- vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT], x->e_mbd.left_context[VCONTEXT], 2);
+ has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
+ && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
+ type = has_2nd_order ? 0 : 3;
- for (b = 20; b < 24; b++)
+ for (b = 0; b < 16; b++)
{
- vp8_optimize_b(x, b, vp8_block2type[b], t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd);
+ vp8_optimize_b(x, b, type,
+ ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
}
-}
-void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
-{
- int b;
- TEMP_CONTEXT t;
- int type = 0;
-
- if (!x->e_mbd.above_context[Y1CONTEXT])
- return;
-
- if (!x->e_mbd.left_context[Y1CONTEXT])
- return;
-
- vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT], x->e_mbd.left_context[Y1CONTEXT], 4);
-
- if (x->e_mbd.mbmi.mode == SPLITMV || x->e_mbd.mbmi.mode == B_PRED)
- type = 3;
-
- for (b = 0; b < 16; b++)
+ if (has_2nd_order)
{
- vp8_optimize_b(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
+ b=24;
+ vp8_optimize_b(x, b, vp8_block2type[b],
+ ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
}
-
}
void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
{
int b;
- TEMP_CONTEXT t, t2;
-
- if (!x->e_mbd.above_context[UCONTEXT])
- return;
+ ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT *ta;
+ ENTROPY_CONTEXT *tl;
- if (!x->e_mbd.left_context[UCONTEXT])
+ if (!x->e_mbd.above_context)
return;
- if (!x->e_mbd.above_context[VCONTEXT])
- return;
-
- if (!x->e_mbd.left_context[VCONTEXT])
+ if (!x->e_mbd.left_context)
return;
+ vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
- vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT], x->e_mbd.left_context[UCONTEXT], 2);
- vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT], x->e_mbd.left_context[VCONTEXT], 2);
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
for (b = 16; b < 20; b++)
{
vp8_optimize_b(x, b, vp8_block2type[b],
- t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
-
+ ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
}
for (b = 20; b < 24; b++)
{
vp8_optimize_b(x, b, vp8_block2type[b],
- t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd);
+ ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
}
}
@@ -1062,20 +633,14 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
vp8_quantize_mb(x);
#if !(CONFIG_REALTIME_ONLY)
-#if 1
-
- if (x->optimize && x->rddiv > 1)
- {
+ if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
vp8_optimize_mb(x, rtcd);
- vp8_find_mb_skip_coef(x);
- }
-
-#endif
#endif
vp8_inverse_transform_mb(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
- vp8_recon16x16mb(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
+ RECON_INVOKE(&rtcd->common->recon, recon_mb)
+ (IF_RTCD(&rtcd->common->recon), &x->e_mbd);
}
@@ -1092,7 +657,8 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
- vp8_recon16x16mby(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
+ RECON_INVOKE(&rtcd->common->recon, recon_mby)
+ (IF_RTCD(&rtcd->common->recon), &x->e_mbd);
}
@@ -1117,8 +683,8 @@ void vp8_encode_inter16x16uvrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
vp8_build_inter_predictors_mbuv(&x->e_mbd);
ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
- vp8_transform_mbuvrd(x);
+ vp8_transform_mbuv(x);
- vp8_quantize_mbuvrd(x);
+ vp8_quantize_mbuv(x);
}
diff --git a/vp8/encoder/encodemb.h b/vp8/encoder/encodemb.h
index 91ca8f552..08f75c3b1 100644
--- a/vp8/encoder/encodemb.h
+++ b/vp8/encoder/encodemb.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -99,9 +100,7 @@ extern void vp8_stuff_inter16x16(MACROBLOCK *x);
void vp8_build_dcblock(MACROBLOCK *b);
void vp8_transform_mb(MACROBLOCK *mb);
void vp8_transform_mbuv(MACROBLOCK *x);
-void vp8_transform_mbuvrd(MACROBLOCK *x);
void vp8_transform_intra_mby(MACROBLOCK *x);
-void vp8_transform_intra_mbyrd(MACROBLOCK *x);
void Encode16x16Y(MACROBLOCK *x);
void Encode16x16UV(MACROBLOCK *x);
void vp8_encode_inter16x16uv(const struct VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x);
diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c
index 2320b413a..cce753013 100644
--- a/vp8/encoder/encodemv.c
+++ b/vp8/encoder/encodemv.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -252,7 +253,7 @@ static void write_component_probs(
vp8_writer *const w,
struct mv_context *cur_mvc,
const struct mv_context *default_mvc_,
- const struct mv_context *update_mvc,
+ const struct mv_context *update_mvc,
const unsigned int events [MVvals],
unsigned int rc,
int *updated
diff --git a/vp8/encoder/encodemv.h b/vp8/encoder/encodemv.h
index 1c1f450a0..e4481bff0 100644
--- a/vp8/encoder/encodemv.h
+++ b/vp8/encoder/encodemv.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index a0b50d2a1..962e74174 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -27,7 +28,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
int ithread = ((ENCODETHREAD_DATA *)p_data)->ithread;
VP8_COMP *cpi = (VP8_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr1);
MB_ROW_COMP *mbri = (MB_ROW_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr2);
- ENTROPY_CONTEXT mb_row_left_context[4][4];
+ ENTROPY_CONTEXT_PLANES mb_row_left_context;
//printf("Started thread %d\n", ithread);
@@ -55,8 +56,10 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
int i;
int recon_yoffset, recon_uvoffset;
int mb_col;
- int recon_y_stride = cm->last_frame.y_stride;
- int recon_uv_stride = cm->last_frame.uv_stride;
+ int ref_fb_idx = cm->lst_fb_idx;
+ int dst_fb_idx = cm->new_fb_idx;
+ int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
+ int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
volatile int *last_row_current_mb_col;
if (ithread > 0)
@@ -65,11 +68,8 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
last_row_current_mb_col = &cpi->current_mb_col_main;
// reset above block coeffs
- xd->above_context[Y1CONTEXT] = cm->above_context[Y1CONTEXT];
- xd->above_context[UCONTEXT ] = cm->above_context[UCONTEXT ];
- xd->above_context[VCONTEXT ] = cm->above_context[VCONTEXT ];
- xd->above_context[Y2CONTEXT] = cm->above_context[Y2CONTEXT];
- xd->left_context = mb_row_left_context;
+ xd->above_context = cm->above_context;
+ xd->left_context = &mb_row_left_context;
vp8_zero(mb_row_left_context);
@@ -106,9 +106,9 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16);
- xd->dst.y_buffer = cm->new_frame.y_buffer + recon_yoffset;
- xd->dst.u_buffer = cm->new_frame.u_buffer + recon_uvoffset;
- xd->dst.v_buffer = cm->new_frame.v_buffer + recon_uvoffset;
+ xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+ xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+ xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
xd->left_available = (mb_col != 0);
// Is segmentation enabled
@@ -117,14 +117,14 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
{
// Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
if (cpi->segmentation_map[seg_map_index+mb_col] <= 3)
- xd->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
+ xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
else
- xd->mbmi.segment_id = 0;
+ xd->mode_info_context->mbmi.segment_id = 0;
vp8cx_mb_init_quantizer(cpi, x);
}
else
- xd->mbmi.segment_id = 0; // Set to Segment 0 by default
+ xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default
if (cm->frame_type == KEY_FRAME)
@@ -147,24 +147,21 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
for (b = 0; b < xd->mbmi.partition_count; b++)
{
- inter_b_modes[xd->mbmi.partition_bmi[b].mode] ++;
+ inter_b_modes[x->partition->bmi[b].mode] ++;
}
}
#endif
// Count of last ref frame 0,0 useage
- if ((xd->mbmi.mode == ZEROMV) && (xd->mbmi.ref_frame == LAST_FRAME))
+ if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
cpi->inter_zz_count ++;
}
cpi->tplist[mb_row].stop = *tp;
- xd->gf_active_ptr++; // Increment pointer into gf useage flags structure for next mb
-
- // store macroblock mode info into context array
- vpx_memcpy(&xd->mode_info_context->mbmi, &xd->mbmi, sizeof(xd->mbmi));
+ x->gf_active_ptr++; // Increment pointer into gf useage flags structure for next mb
for (i = 0; i < 16; i++)
vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi));
@@ -178,15 +175,13 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
recon_uvoffset += 8;
// Keep track of segment useage
- segment_counts[xd->mbmi.segment_id] ++;
+ segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
// skip to next mb
xd->mode_info_context++;
+ x->partition_info++;
- xd->above_context[Y1CONTEXT] += 4;
- xd->above_context[UCONTEXT ] += 2;
- xd->above_context[VCONTEXT ] += 2;
- xd->above_context[Y2CONTEXT] ++;
+ xd->above_context++;
cpi->mb_row_ei[ithread].current_mb_col = mb_col;
@@ -194,19 +189,21 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
//extend the recon for intra prediction
vp8_extend_mb_row(
- &cm->new_frame,
+ &cm->yv12_fb[dst_fb_idx],
xd->dst.y_buffer + 16,
xd->dst.u_buffer + 8,
xd->dst.v_buffer + 8);
// this is to account for the border
xd->mode_info_context++;
+ x->partition_info++;
x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
x->src.v_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
+ x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count;
if (ithread == (cpi->encoding_thread_count - 1) || mb_row == cm->mb_rows - 1)
{
@@ -256,13 +253,8 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
z->vp8_short_fdct4x4 = x->vp8_short_fdct4x4;
z->vp8_short_fdct8x4 = x->vp8_short_fdct8x4;
- z->short_fdct4x4rd = x->short_fdct4x4rd;
- z->short_fdct8x4rd = x->short_fdct8x4rd;
- z->short_fdct8x4rd = x->short_fdct8x4rd;
- z->vp8_short_fdct4x4_ptr = x->vp8_short_fdct4x4_ptr;
z->short_walsh4x4 = x->short_walsh4x4;
z->quantize_b = x->quantize_b;
- z->quantize_brd = x->quantize_brd;
/*
z->mvc = x->mvc;
@@ -290,6 +282,7 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
for (i = 0; i < 25; i++)
{
z->block[i].quant = x->block[i].quant;
+ z->block[i].quant_shift = x->block[i].quant_shift;
z->block[i].zbin = x->block[i].zbin;
z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost;
z->block[i].round = x->block[i].round;
@@ -334,11 +327,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
zd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
- /*
- memcpy(zd->above_context, xd->above_context, sizeof(xd->above_context));
- memcpy(zd->mb_segment_tree_probs, xd->mb_segment_tree_probs, sizeof(xd->mb_segment_tree_probs));
- memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
- */
for (i = 0; i < 25; i++)
{
zd->block[i].dequant = xd->block[i].dequant;
@@ -372,14 +360,15 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
#if CONFIG_RUNTIME_CPU_DETECT
mbd->rtcd = xd->rtcd;
#endif
- mbd->gf_active_ptr = xd->gf_active_ptr;
+ mb->gf_active_ptr = x->gf_active_ptr;
mb->vector_range = 32;
vpx_memset(mbr_ei[i].segment_counts, 0, sizeof(mbr_ei[i].segment_counts));
mbr_ei[i].totalrate = 0;
- mbd->mode_info = cm->mi - 1;
+ mb->partition_info = x->pi + x->e_mbd.mode_info_stride * (i + 1);
+
mbd->mode_info_context = cm->mi + x->e_mbd.mode_info_stride * (i + 1);
mbd->mode_info_stride = cm->mode_info_stride;
@@ -389,8 +378,8 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
mbd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame;
mb->src = * cpi->Source;
- mbd->pre = cm->last_frame;
- mbd->dst = cm->new_frame;
+ mbd->pre = cm->yv12_fb[cm->lst_fb_idx];
+ mbd->dst = cm->yv12_fb[cm->new_fb_idx];
mb->src.y_buffer += 16 * x->src.y_stride * (i + 1);
mb->src.u_buffer += 8 * x->src.uv_stride * (i + 1);
@@ -406,10 +395,7 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
mb->rddiv = cpi->RDDIV;
mb->rdmult = cpi->RDMULT;
- mbd->mbmi.mode = DC_PRED;
- mbd->mbmi.uv_mode = DC_PRED;
-
- mbd->left_context = cm->left_context;
+ mbd->left_context = &cm->left_context;
mb->mvc = cm->fc.mvc;
setup_mbby_copy(&mbr_ei[i].mb, x);
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index d5d430906..8a94fa369 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -29,7 +30,6 @@
#include "encodemv.h"
//#define OUTPUT_FPF 1
-//#define FIRSTPASS_MM 1
#if CONFIG_RUNTIME_CPU_DETECT
#define IF_RTCD(x) (x)
@@ -77,9 +77,9 @@ int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred)
if (use_dc_pred)
{
- x->e_mbd.mbmi.mode = DC_PRED;
- x->e_mbd.mbmi.uv_mode = DC_PRED;
- x->e_mbd.mbmi.ref_frame = INTRA_FRAME;
+ x->e_mbd.mode_info_context->mbmi.mode = DC_PRED;
+ x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
+ x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
}
@@ -107,15 +107,6 @@ static void reset_fpf_position(VP8_COMP *cpi, FIRSTPASS_STATS *Position)
static int lookup_next_frame_stats(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame)
{
- /*FIRSTPASS_STATS * start_pos;
- int ret_val;
-
- start_pos = cpi->stats_in;
- ret_val = vp8_input_stats(cpi, next_frame);
- reset_fpf_position(cpi, start_pos);
-
- return ret_val;*/
-
if (cpi->stats_in >= cpi->stats_in_end)
return EOF;
@@ -126,7 +117,7 @@ static int lookup_next_frame_stats(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame)
// Calculate a modified Error used in distributing bits between easier and harder frames
static double calculate_modified_err(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
{
- double av_err = cpi->total_stats.ssim_weighted_pred_err;
+ double av_err = cpi->total_stats->ssim_weighted_pred_err;
double this_err = this_frame->ssim_weighted_pred_err;
double modified_err;
@@ -216,7 +207,7 @@ int frame_max_bits(VP8_COMP *cpi)
// If we are running below the optimal level then we need to gradually tighten up on max_bits.
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
{
- double buffer_fullness_ratio = (double)DOUBLE_DIVIDE_CHECK(cpi->buffer_level) / (double)cpi->oxcf.optimal_buffer_level;
+ double buffer_fullness_ratio = (double)cpi->buffer_level / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.optimal_buffer_level);
// For CBR base this on the target average bits per frame plus the maximum sedction rate passed in by the user
max_bits = (int)(cpi->av_per_frame_bandwidth * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
@@ -237,7 +228,7 @@ int frame_max_bits(VP8_COMP *cpi)
else
{
// For VBR base this on the bits and frames left plus the two_pass_vbrmax_section rate passed in by the user
- max_bits = (int)(((double)cpi->bits_left / (cpi->total_stats.count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
+ max_bits = (int)(((double)cpi->bits_left / (cpi->total_stats->count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
}
// Trap case where we are out of bits
@@ -247,17 +238,35 @@ int frame_max_bits(VP8_COMP *cpi)
return max_bits;
}
-void vp8_output_stats(struct vpx_codec_pkt_list *pktlist,
+
+extern size_t vp8_firstpass_stats_sz(unsigned int mb_count)
+{
+ /* Calculate the size of a stats packet, which is dependent on the frame
+ * resolution. The FIRSTPASS_STATS struct has a single element array,
+ * motion_map, which is virtually expanded to have one element per
+ * macroblock.
+ */
+ size_t stats_sz;
+ FIRSTPASS_STATS stats;
+
+ stats_sz = sizeof(FIRSTPASS_STATS) + mb_count;
+ stats_sz = (stats_sz + 7) & ~7;
+ return stats_sz;
+}
+
+
+void vp8_output_stats(const VP8_COMP *cpi,
+ struct vpx_codec_pkt_list *pktlist,
FIRSTPASS_STATS *stats)
{
struct vpx_codec_cx_pkt pkt;
pkt.kind = VPX_CODEC_STATS_PKT;
pkt.data.twopass_stats.buf = stats;
- pkt.data.twopass_stats.sz = sizeof(*stats);
+ pkt.data.twopass_stats.sz = vp8_firstpass_stats_sz(cpi->common.MBs);
vpx_codec_pkt_list_add(pktlist, &pkt);
// TEMP debug code
-#ifdef OUTPUT_FPF
+#if OUTPUT_FPF
{
FILE *fpfile;
fpfile = fopen("firstpass.stt", "a");
@@ -279,16 +288,24 @@ void vp8_output_stats(struct vpx_codec_pkt_list *pktlist,
stats->mv_in_out_count,
stats->count);
fclose(fpfile);
+
+
+ fpfile = fopen("fpmotionmap.stt", "a");
+ if(fwrite(cpi->fp_motion_map, 1, cpi->common.MBs, fpfile));
+ fclose(fpfile);
}
#endif
}
int vp8_input_stats(VP8_COMP *cpi, FIRSTPASS_STATS *fps)
{
+ size_t stats_sz = vp8_firstpass_stats_sz(cpi->common.MBs);
+
if (cpi->stats_in >= cpi->stats_in_end)
return EOF;
- *fps = *cpi->stats_in++;
+ *fps = *cpi->stats_in;
+ cpi->stats_in = (void*)((char *)cpi->stats_in + stats_sz);
return 1;
}
@@ -351,76 +368,47 @@ void vp8_avg_stats(FIRSTPASS_STATS *section)
section->duration /= section->count;
}
-int vp8_fpmm_get_pos(VP8_COMP *cpi)
+unsigned char *vp8_fpmm_get_pos(VP8_COMP *cpi)
{
- return ftell(cpi->fp_motion_mapfile);
+ return cpi->fp_motion_map_stats;
}
-void vp8_fpmm_reset_pos(VP8_COMP *cpi, int target_pos)
+void vp8_fpmm_reset_pos(VP8_COMP *cpi, unsigned char *target_pos)
{
int Offset;
- if (cpi->fp_motion_mapfile)
- {
- Offset = ftell(cpi->fp_motion_mapfile) - target_pos;
- fseek(cpi->fp_motion_mapfile, (int) - Offset, SEEK_CUR);
- }
+ cpi->fp_motion_map_stats = target_pos;
}
void vp8_advance_fpmm(VP8_COMP *cpi, int count)
{
-#ifdef FIRSTPASS_MM
- fseek(cpi->fp_motion_mapfile, (int)(count * cpi->common.MBs), SEEK_CUR);
-#endif
+ cpi->fp_motion_map_stats = (void*)((char*)cpi->fp_motion_map_stats +
+ count * vp8_firstpass_stats_sz(cpi->common.MBs));
}
-void vp8_input_fpmm(VP8_COMP *cpi, int count)
+void vp8_input_fpmm(VP8_COMP *cpi)
{
-#ifdef FIRSTPASS_MM
-
- unsigned char *tmp_motion_map;
- int i, j;
-
- if (!cpi->fp_motion_mapfile)
- return; // Error
-
- // Create the first pass motion map structure and set to 0
- CHECK_MEM_ERROR(tmp_motion_map, vpx_calloc(cpi->common.MBs, 1));
-
- // Reset the state of the global map
- vpx_memset(cpi->fp_motion_map, 0, cpi->common.MBs);
+ unsigned char *fpmm = cpi->fp_motion_map;
+ int MBs = cpi->common.MBs;
+ int max_frames = cpi->active_arnr_frames;
+ int i;
- // Read the specified number of frame maps and set the global map to the highest value seen for each mb.
- for (i = 0; i < count; i++)
+ for (i=0; i<max_frames; i++)
{
- if (fread(tmp_motion_map, 1, cpi->common.MBs, cpi->fp_motion_mapfile) == cpi->common.MBs)
- {
- for (j = 0; j < cpi->common.MBs; j++)
- {
- if (tmp_motion_map[j] > 1)
- cpi->fp_motion_map[j] += 5; // Intra is flagged
- else
- cpi->fp_motion_map[j] += tmp_motion_map[j];
- }
- }
- else
- break; // Read error
+ char *motion_map = (char*)cpi->fp_motion_map_stats
+ + sizeof(FIRSTPASS_STATS);
+ memcpy(fpmm, motion_map, MBs);
+ fpmm += MBs;
+ vp8_advance_fpmm(cpi, 1);
}
- if (tmp_motion_map != 0)
- vpx_free(tmp_motion_map);
-
-#endif
-
+ // Flag the use of weights in the temporal filter
+ cpi->use_weighted_temporal_filter = 1;
}
void vp8_init_first_pass(VP8_COMP *cpi)
{
- vp8_zero_stats(&cpi->total_stats);
-
-#ifdef FIRSTPASS_MM
- cpi->fp_motion_mapfile = fopen("fpmotionmap.stt", "wb");
-#endif
+ vp8_zero_stats(cpi->total_stats);
// TEMP debug code
#ifdef OUTPUT_FPF
@@ -428,6 +416,8 @@ void vp8_init_first_pass(VP8_COMP *cpi)
FILE *fpfile;
fpfile = fopen("firstpass.stt", "w");
fclose(fpfile);
+ fpfile = fopen("fpmotionmap.stt", "wb");
+ fclose(fpfile);
}
#endif
@@ -435,16 +425,10 @@ void vp8_init_first_pass(VP8_COMP *cpi)
void vp8_end_first_pass(VP8_COMP *cpi)
{
- vp8_output_stats(cpi->output_pkt_list, &cpi->total_stats);
-
-#ifdef FIRSTPASS_MM
-
- if (cpi->fp_motion_mapfile)
- fclose(cpi->fp_motion_mapfile);
+ vp8_output_stats(cpi, cpi->output_pkt_list, cpi->total_stats);
+}
-#endif
-}
void vp8_zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG * recon_buffer, int * best_motion_err, int recon_yoffset )
{
MACROBLOCKD * const xd = & x->e_mbd;
@@ -478,12 +462,11 @@ void vp8_first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, MV *
int step_param = 3; //3; // Dont search over full range for first pass
int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; //3;
int n;
- vp8_variance_fn_ptr_t v_fn_ptr;
+ vp8_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
int new_mv_mode_penalty = 256;
+ // override the default variance function to use MSE
v_fn_ptr.vf = VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16);
- v_fn_ptr.sdf = cpi->fn_ptr.sdf;
- v_fn_ptr.sdx4df = cpi->fn_ptr.sdx4df;
// Set up pointers for this macro block recon buffer
xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
@@ -535,8 +518,11 @@ void vp8_first_pass(VP8_COMP *cpi)
int col_blocks = 4 * cm->mb_cols;
int recon_yoffset, recon_uvoffset;
- int recon_y_stride = cm->last_frame.y_stride;
- int recon_uv_stride = cm->last_frame.uv_stride;
+ YV12_BUFFER_CONFIG *lst_yv12 = &cm->yv12_fb[cm->lst_fb_idx];
+ YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx];
+ YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx];
+ int recon_y_stride = lst_yv12->y_stride;
+ int recon_uv_stride = lst_yv12->uv_stride;
int intra_error = 0;
int coded_error = 0;
@@ -558,8 +544,12 @@ void vp8_first_pass(VP8_COMP *cpi)
vp8_clear_system_state(); //__asm emms;
x->src = * cpi->Source;
- xd->pre = cm->last_frame;
- xd->dst = cm->new_frame;
+ xd->pre = *lst_yv12;
+ xd->dst = *new_yv12;
+
+ x->partition_info = x->pi;
+
+ xd->mode_info_context = cm->mi;
vp8_build_block_offsets(x);
@@ -568,7 +558,7 @@ void vp8_first_pass(VP8_COMP *cpi)
vp8_setup_block_ptrs(x);
// set up frame new frame for intra coded blocks
- vp8_setup_intra_recon(&cm->new_frame);
+ vp8_setup_intra_recon(new_yv12);
vp8cx_frame_init_quantizer(cpi);
// Initialise the MV cost table to the defaults
@@ -595,12 +585,14 @@ void vp8_first_pass(VP8_COMP *cpi)
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
int this_error;
+ int zero_error;
+ int zz_to_best_ratio;
int gf_motion_error = INT_MAX;
int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
- xd->dst.y_buffer = cm->new_frame.y_buffer + recon_yoffset;
- xd->dst.u_buffer = cm->new_frame.u_buffer + recon_uvoffset;
- xd->dst.v_buffer = cm->new_frame.v_buffer + recon_uvoffset;
+ xd->dst.y_buffer = new_yv12->y_buffer + recon_yoffset;
+ xd->dst.u_buffer = new_yv12->u_buffer + recon_uvoffset;
+ xd->dst.v_buffer = new_yv12->v_buffer + recon_uvoffset;
xd->left_available = (mb_col != 0);
// do intra 16x16 prediction
@@ -616,7 +608,7 @@ void vp8_first_pass(VP8_COMP *cpi)
intra_error += this_error;
// Indicate default assumption of intra in the motion map
- *fp_motion_map_ptr = 2;
+ *fp_motion_map_ptr = 0;
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
@@ -634,18 +626,25 @@ void vp8_first_pass(VP8_COMP *cpi)
int motion_error = INT_MAX;
// Simple 0,0 motion with no mv overhead
- vp8_zz_motion_search( cpi, x, &cm->last_frame, &motion_error, recon_yoffset );
+ vp8_zz_motion_search( cpi, x, lst_yv12, &motion_error, recon_yoffset );
d->bmi.mv.as_mv.row = 0;
d->bmi.mv.as_mv.col = 0;
- // Test last reference frame using the previous best mv as the starting point (best reference) for the search
- vp8_first_pass_motion_search(cpi, x, &best_ref_mv, &d->bmi.mv.as_mv, &cm->last_frame, &motion_error, recon_yoffset);
+ // Save (0,0) error for later use
+ zero_error = motion_error;
+
+ // Test last reference frame using the previous best mv as the
+ // starting point (best reference) for the search
+ vp8_first_pass_motion_search(cpi, x, &best_ref_mv,
+ &d->bmi.mv.as_mv, lst_yv12,
+ &motion_error, recon_yoffset);
// If the current best reference mv is not centred on 0,0 then do a 0,0 based search as well
if ((best_ref_mv.col != 0) || (best_ref_mv.row != 0))
{
tmp_err = INT_MAX;
- vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, &cm->last_frame, &motion_error, recon_yoffset);
+ vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv,
+ lst_yv12, &tmp_err, recon_yoffset);
if ( tmp_err < motion_error )
{
@@ -659,7 +658,7 @@ void vp8_first_pass(VP8_COMP *cpi)
// Experimental search in a second reference frame ((0,0) based only)
if (cm->current_video_frame > 1)
{
- vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, &cm->golden_frame, &gf_motion_error, recon_yoffset);
+ vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, gld_yv12, &gf_motion_error, recon_yoffset);
if ((gf_motion_error < motion_error) && (gf_motion_error < this_error))
{
@@ -677,9 +676,9 @@ void vp8_first_pass(VP8_COMP *cpi)
// Reset to last frame as reference buffer
- xd->pre.y_buffer = cm->last_frame.y_buffer + recon_yoffset;
- xd->pre.u_buffer = cm->last_frame.u_buffer + recon_uvoffset;
- xd->pre.v_buffer = cm->last_frame.v_buffer + recon_uvoffset;
+ xd->pre.y_buffer = lst_yv12->y_buffer + recon_yoffset;
+ xd->pre.u_buffer = lst_yv12->u_buffer + recon_uvoffset;
+ xd->pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset;
}
if (motion_error <= this_error)
@@ -707,8 +706,6 @@ void vp8_first_pass(VP8_COMP *cpi)
{
mvcount++;
- *fp_motion_map_ptr = 1;
-
// Does the Row vector point inwards or outwards
if (mb_row < cm->mb_rows / 2)
{
@@ -740,12 +737,30 @@ void vp8_first_pass(VP8_COMP *cpi)
else if (d->bmi.mv.as_mv.col < 0)
sum_in_vectors--;
}
+
+ // Compute how close (0,0) predictor is to best
+ // predictor in terms of their prediction error
+ zz_to_best_ratio = (10*zero_error + this_error/2)
+ / (this_error+!this_error);
+
+ if ((zero_error < 50000) &&
+ (zz_to_best_ratio <= 11) )
+ *fp_motion_map_ptr = 1;
+ else
+ *fp_motion_map_ptr = 0;
}
else
- *fp_motion_map_ptr = 0; // 0,0 mv was best
+ {
+ // 0,0 mv was best
+ if( zero_error<50000 )
+ *fp_motion_map_ptr = 2;
+ else
+ *fp_motion_map_ptr = 1;
+ }
}
else
{
+ // Intra was best
best_ref_mv.row = 0;
best_ref_mv.col = 0;
}
@@ -771,7 +786,7 @@ void vp8_first_pass(VP8_COMP *cpi)
x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
//extend the recon for intra prediction
- vp8_extend_mb_row(&cm->new_frame, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
+ vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
vp8_clear_system_state(); //__asm emms;
}
@@ -823,31 +838,32 @@ void vp8_first_pass(VP8_COMP *cpi)
fps.duration = cpi->source_end_time_stamp - cpi->source_time_stamp;
// don't want to do outputstats with a stack variable!
- cpi->this_frame_stats = fps;
- vp8_output_stats(cpi->output_pkt_list, &cpi->this_frame_stats);
- vp8_accumulate_stats(&cpi->total_stats, &fps);
-
-#ifdef FIRSTPASS_MM
- fwrite(cpi->fp_motion_map, 1, cpi->common.MBs, cpi->fp_motion_mapfile);
-#endif
+ memcpy(cpi->this_frame_stats,
+ &fps,
+ sizeof(FIRSTPASS_STATS));
+ memcpy((char*)cpi->this_frame_stats + sizeof(FIRSTPASS_STATS),
+ cpi->fp_motion_map,
+ sizeof(cpi->fp_motion_map[0]) * cpi->common.MBs);
+ vp8_output_stats(cpi, cpi->output_pkt_list, cpi->this_frame_stats);
+ vp8_accumulate_stats(cpi->total_stats, &fps);
}
// Copy the previous Last Frame into the GF buffer if specific conditions for doing so are met
if ((cm->current_video_frame > 0) &&
- (cpi->this_frame_stats.pcnt_inter > 0.20) &&
- ((cpi->this_frame_stats.intra_error / cpi->this_frame_stats.coded_error) > 2.0))
+ (cpi->this_frame_stats->pcnt_inter > 0.20) &&
+ ((cpi->this_frame_stats->intra_error / cpi->this_frame_stats->coded_error) > 2.0))
{
- vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame);
+ vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12);
}
// swap frame pointers so last frame refers to the frame we just compressed
- vp8_swap_yv12_buffer(&cm->last_frame, &cm->new_frame);
- vp8_yv12_extend_frame_borders(&cm->last_frame);
+ vp8_swap_yv12_buffer(lst_yv12, new_yv12);
+ vp8_yv12_extend_frame_borders(lst_yv12);
// Special case for the first frame. Copy into the GF buffer as a second reference.
if (cm->current_video_frame == 0)
{
- vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame);
+ vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12);
}
@@ -863,7 +879,7 @@ void vp8_first_pass(VP8_COMP *cpi)
else
recon_file = fopen(filename, "ab");
- fwrite(cm->last_frame.buffer_alloc, cm->last_frame.frame_size, 1, recon_file);
+ if(fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file));
fclose(recon_file);
}
@@ -1104,33 +1120,33 @@ void vp8_init_second_pass(VP8_COMP *cpi)
double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100);
- vp8_zero_stats(&cpi->total_stats);
+ vp8_zero_stats(cpi->total_stats);
if (!cpi->stats_in_end)
return;
- cpi->total_stats = *cpi->stats_in_end;
+ *cpi->total_stats = *cpi->stats_in_end;
- cpi->total_error_left = cpi->total_stats.ssim_weighted_pred_err;
- cpi->total_intra_error_left = cpi->total_stats.intra_error;
- cpi->total_coded_error_left = cpi->total_stats.coded_error;
+ cpi->total_error_left = cpi->total_stats->ssim_weighted_pred_err;
+ cpi->total_intra_error_left = cpi->total_stats->intra_error;
+ cpi->total_coded_error_left = cpi->total_stats->coded_error;
cpi->start_tot_err_left = cpi->total_error_left;
- //cpi->bits_left = (long long)(cpi->total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
- //cpi->bits_left -= (long long)(cpi->total_stats.count * two_pass_min_rate / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
+ //cpi->bits_left = (long long)(cpi->total_stats->count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
+ //cpi->bits_left -= (long long)(cpi->total_stats->count * two_pass_min_rate / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
// each frame can have a different duration, as the frame rate in the source
// isn't guaranteed to be constant. The frame rate prior to the first frame
// encoded in the second pass is a guess. However the sum duration is not.
// Its calculated based on the actual durations of all frames from the first
// pass.
- vp8_new_frame_rate(cpi, 10000000.0 * cpi->total_stats.count / cpi->total_stats.duration);
+ vp8_new_frame_rate(cpi, 10000000.0 * cpi->total_stats->count / cpi->total_stats->duration);
cpi->output_frame_rate = cpi->oxcf.frame_rate;
- cpi->bits_left = (long long)(cpi->total_stats.duration * cpi->oxcf.target_bandwidth / 10000000.0) ;
- cpi->bits_left -= (long long)(cpi->total_stats.duration * two_pass_min_rate / 10000000.0);
+ cpi->bits_left = (long long)(cpi->total_stats->duration * cpi->oxcf.target_bandwidth / 10000000.0) ;
+ cpi->bits_left -= (long long)(cpi->total_stats->duration * two_pass_min_rate / 10000000.0);
- vp8_avg_stats(&cpi->total_stats);
+ vp8_avg_stats(cpi->total_stats);
// Scan the first pass file and calculate an average Intra / Inter error score ratio for the sequence
{
@@ -1146,7 +1162,7 @@ void vp8_init_second_pass(VP8_COMP *cpi)
sum_iiratio += IIRatio;
}
- cpi->avg_iiratio = sum_iiratio / DOUBLE_DIVIDE_CHECK((double)cpi->total_stats.count);
+ cpi->avg_iiratio = sum_iiratio / DOUBLE_DIVIDE_CHECK((double)cpi->total_stats->count);
// Reset file position
reset_fpf_position(cpi, start_pos);
@@ -1168,21 +1184,11 @@ void vp8_init_second_pass(VP8_COMP *cpi)
}
-#ifdef FIRSTPASS_MM
- cpi->fp_motion_mapfile = 0;
- cpi->fp_motion_mapfile = fopen("fpmotionmap.stt", "rb");
-#endif
-
+ cpi->fp_motion_map_stats = (unsigned char *)cpi->stats_in;
}
void vp8_end_second_pass(VP8_COMP *cpi)
{
-#ifdef FIRSTPASS_MM
-
- if (cpi->fp_motion_mapfile)
- fclose(cpi->fp_motion_mapfile);
-
-#endif
}
// Analyse and define a gf/arf group .
@@ -1191,7 +1197,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
FIRSTPASS_STATS next_frame;
FIRSTPASS_STATS *start_pos;
int i;
- int image_size = cpi->common.last_frame.y_width * cpi->common.last_frame.y_height;
+ int y_width = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_width;
+ int y_height = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_height;
+ int image_size = y_width * y_height;
double boost_score = 0.0;
double old_boost_score = 0.0;
double gf_group_err = 0.0;
@@ -1200,10 +1208,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
double mv_accumulator_rabs = 0.0;
double mv_accumulator_cabs = 0.0;
- double this_mv_rabs;
- double this_mv_cabs;
double mv_ratio_accumulator = 0.0;
- double distance_factor = 0.0;
double decay_accumulator = 1.0;
double boost_factor = IIFACTOR;
@@ -1216,21 +1221,19 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
int max_bits = frame_max_bits(cpi); // Max for a single frame
-#ifdef FIRSTPASS_MM
- int fpmm_pos;
-#endif
+ unsigned char *fpmm_pos;
cpi->gf_group_bits = 0;
cpi->gf_decay_rate = 0;
vp8_clear_system_state(); //__asm emms;
-#ifdef FIRSTPASS_MM
fpmm_pos = vp8_fpmm_get_pos(cpi);
-#endif
start_pos = cpi->stats_in;
+ vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean
+
// Preload the stats for the next frame.
mod_frame_err = calculate_modified_err(cpi, this_frame);
@@ -1250,9 +1253,10 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
while (((i < cpi->max_gf_interval) || ((cpi->frames_to_key - i) < MIN_GF_INTERVAL)) && (i < cpi->frames_to_key))
{
double r;
- double motion_factor;
double this_frame_mvr_ratio;
double this_frame_mvc_ratio;
+ double motion_decay;
+ double motion_pct = next_frame.pcnt_motion;
i++; // Increment the loop counter
@@ -1267,12 +1271,8 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
break;
// Accumulate motion stats.
- motion_factor = next_frame.pcnt_motion;
- this_mv_rabs = fabs(next_frame.mvr_abs * motion_factor);
- this_mv_cabs = fabs(next_frame.mvc_abs * motion_factor);
-
- mv_accumulator_rabs += fabs(next_frame.mvr_abs * motion_factor);
- mv_accumulator_cabs += fabs(next_frame.mvc_abs * motion_factor);
+ mv_accumulator_rabs += fabs(next_frame.mvr_abs * motion_pct);
+ mv_accumulator_cabs += fabs(next_frame.mvc_abs * motion_pct);
//Accumulate Motion In/Out of frame stats
this_frame_mv_in_out = next_frame.mv_in_out_count * next_frame.pcnt_motion;
@@ -1280,13 +1280,23 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
abs_mv_in_out_accumulator += fabs(next_frame.mv_in_out_count * next_frame.pcnt_motion);
// If there is a significant amount of motion
- if (motion_factor > 0.05)
+ if (motion_pct > 0.05)
{
- this_frame_mvr_ratio = fabs(next_frame.mvr_abs) / DOUBLE_DIVIDE_CHECK(fabs(next_frame.MVr));
- this_frame_mvc_ratio = fabs(next_frame.mvc_abs) / DOUBLE_DIVIDE_CHECK(fabs(next_frame.MVc));
+ this_frame_mvr_ratio = fabs(next_frame.mvr_abs) /
+ DOUBLE_DIVIDE_CHECK(fabs(next_frame.MVr));
+
+ this_frame_mvc_ratio = fabs(next_frame.mvc_abs) /
+ DOUBLE_DIVIDE_CHECK(fabs(next_frame.MVc));
- mv_ratio_accumulator += (this_frame_mvr_ratio < next_frame.mvr_abs) ? (this_frame_mvr_ratio * motion_factor) : next_frame.mvr_abs * motion_factor;
- mv_ratio_accumulator += (this_frame_mvc_ratio < next_frame.mvc_abs) ? (this_frame_mvc_ratio * motion_factor) : next_frame.mvc_abs * motion_factor;
+ mv_ratio_accumulator +=
+ (this_frame_mvr_ratio < next_frame.mvr_abs)
+ ? (this_frame_mvr_ratio * motion_pct)
+ : next_frame.mvr_abs * motion_pct;
+
+ mv_ratio_accumulator +=
+ (this_frame_mvc_ratio < next_frame.mvc_abs)
+ ? (this_frame_mvc_ratio * motion_pct)
+ : next_frame.mvc_abs * motion_pct;
}
else
{
@@ -1314,14 +1324,26 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
loop_decay_rate = next_frame.pcnt_inter;
// High % motion -> somewhat higher decay rate
- if ((1.0 - (next_frame.pcnt_motion / 10.0)) < loop_decay_rate)
- loop_decay_rate = (1.0 - (next_frame.pcnt_motion / 10.0));
-
- distance_factor = sqrt((this_mv_rabs * this_mv_rabs) + (this_mv_cabs * this_mv_cabs)) / 300.0;
- distance_factor = ((distance_factor > 1.0) ? 0.0 : (1.0 - distance_factor));
+ motion_decay = (1.0 - (motion_pct / 20.0));
+ if (motion_decay < loop_decay_rate)
+ loop_decay_rate = motion_decay;
- if (distance_factor < loop_decay_rate)
- loop_decay_rate = distance_factor;
+ // Adjustment to decay rate based on speed of motion
+ {
+ double this_mv_rabs;
+ double this_mv_cabs;
+ double distance_factor;
+
+ this_mv_rabs = fabs(next_frame.mvr_abs * motion_pct);
+ this_mv_cabs = fabs(next_frame.mvc_abs * motion_pct);
+
+ distance_factor = sqrt((this_mv_rabs * this_mv_rabs) +
+ (this_mv_cabs * this_mv_cabs)) / 250.0;
+ distance_factor = ((distance_factor > 1.0)
+ ? 0.0 : (1.0 - distance_factor));
+ if (distance_factor < loop_decay_rate)
+ loop_decay_rate = distance_factor;
+ }
// Cumulative effect of decay
decay_accumulator = decay_accumulator * loop_decay_rate;
@@ -1387,6 +1409,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
// Should we use the alternate refernce frame
if (cpi->oxcf.play_alternate &&
+ cpi->oxcf.lag_in_frames &&
(i >= MIN_GF_INTERVAL) &&
(i <= (cpi->frames_to_key - MIN_GF_INTERVAL)) && // dont use ARF very near next kf
(((next_frame.pcnt_inter > 0.75) &&
@@ -1435,6 +1458,11 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
// Only use an arf if it is likely we will be able to code it at a lower Q than the surrounding frames.
if (tmp_q < cpi->worst_quality)
{
+ int half_gf_int;
+ int frames_after_arf;
+ int frames_bwd = cpi->oxcf.arnr_max_frames - 1;
+ int frames_fwd = cpi->oxcf.arnr_max_frames - 1;
+
cpi->source_alt_ref_pending = TRUE;
// For alt ref frames the error score for the end frame of the group (the alt ref frame) should not contribute to the group total and hence
@@ -1445,22 +1473,63 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
// The future frame itself is part of the next group
cpi->baseline_gf_interval = i - 1;
-#ifdef FIRSTPASS_MM
- // Read through the motion map to load up the entry for the ARF
+ // Define the arnr filter width for this group of frames:
+ // We only filter frames that lie within a distance of half
+ // the GF interval from the ARF frame. We also have to trap
+ // cases where the filter extends beyond the end of clip.
+ // Note: this_frame->frame has been updated in the loop
+ // so it now points at the ARF frame.
+ half_gf_int = cpi->baseline_gf_interval >> 1;
+ frames_after_arf = cpi->total_stats->count - this_frame->frame - 1;
+
+ switch (cpi->oxcf.arnr_type)
{
- int j;
+ case 1: // Backward filter
+ frames_fwd = 0;
+ if (frames_bwd > half_gf_int)
+ frames_bwd = half_gf_int;
+ break;
- // Advance to the region of interest
- // Current default 2 frames before to 2 frames after the ARF frame itsef
- vp8_fpmm_reset_pos(cpi, cpi->fpmm_pos);
+ case 2: // Forward filter
+ if (frames_fwd > half_gf_int)
+ frames_fwd = half_gf_int;
+ if (frames_fwd > frames_after_arf)
+ frames_fwd = frames_after_arf;
+ frames_bwd = 0;
+ break;
+
+ case 3: // Centered filter
+ default:
+ frames_fwd >>= 1;
+ if (frames_fwd > frames_after_arf)
+ frames_fwd = frames_after_arf;
+ if (frames_fwd > half_gf_int)
+ frames_fwd = half_gf_int;
- for (j = 0; j < cpi->baseline_gf_interval - 2; j++)
- vp8_advance_fpmm(cpi, 1);
+ frames_bwd = frames_fwd;
+
+ // For even length filter there is one more frame backward
+ // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff.
+ if (frames_bwd < half_gf_int)
+ frames_bwd += (cpi->oxcf.arnr_max_frames+1) & 0x1;
+ break;
+ }
+
+ cpi->active_arnr_frames = frames_bwd + 1 + frames_fwd;
+
+ {
+ // Advance to & read in the motion map for those frames
+ // to be considered for filtering based on the position
+ // of the ARF
+ vp8_fpmm_reset_pos(cpi, cpi->fp_motion_map_stats_save);
+
+ // Position at the 'earliest' frame to be filtered
+ vp8_advance_fpmm(cpi,
+ cpi->baseline_gf_interval - frames_bwd);
// Read / create a motion map for the region of interest
- vp8_input_fpmm(cpi, 5);
+ vp8_input_fpmm(cpi);
}
-#endif
}
else
{
@@ -1496,7 +1565,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
// Now decide how many bits should be allocated to the GF group as a proportion of those remaining in the kf group.
// The final key frame group in the clip is treated as a special case where cpi->kf_group_bits is tied to cpi->bits_left.
// This is also important for short clips where there may only be one key frame.
- if (cpi->frames_to_key >= (int)(cpi->total_stats.count - cpi->common.current_video_frame))
+ if (cpi->frames_to_key >= (int)(cpi->total_stats->count - cpi->common.current_video_frame))
{
cpi->kf_group_bits = (cpi->bits_left > 0) ? cpi->bits_left : 0;
}
@@ -1565,26 +1634,36 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
// Calculate the number of bits to be spent on the gf or arf based on the boost number
cpi->gf_bits = (int)((double)Boost * (cpi->gf_group_bits / (double)allocation_chunks));
- // If the frame that is to be boosted is simpler than the average for the gf/arf group then use an alternative calculation
+ // If the frame that is to be boosted is simpler than the average for
+ // the gf/arf group then use an alternative calculation
// based on the error score of the frame itself
if (mod_frame_err < gf_group_err / (double)cpi->baseline_gf_interval)
{
double alt_gf_grp_bits;
int alt_gf_bits;
- alt_gf_grp_bits = ((double)cpi->kf_group_bits * (mod_frame_err * (double)cpi->baseline_gf_interval) / (double)cpi->kf_group_error_left) ;
- alt_gf_bits = (int)((double)Boost * (alt_gf_grp_bits / (double)allocation_chunks));
+ alt_gf_grp_bits =
+ (double)cpi->kf_group_bits *
+ (mod_frame_err * (double)cpi->baseline_gf_interval) /
+ DOUBLE_DIVIDE_CHECK((double)cpi->kf_group_error_left);
+
+ alt_gf_bits = (int)((double)Boost * (alt_gf_grp_bits /
+ (double)allocation_chunks));
if (cpi->gf_bits > alt_gf_bits)
{
cpi->gf_bits = alt_gf_bits;
}
}
- // Else if it is harder than other frames in the group make sure it at least receives an allocation in keeping with
- // its relative error score, otherwise it may be worse off than an "un-boosted" frame
+ // Else if it is harder than other frames in the group make sure it at
+ // least receives an allocation in keeping with its relative error
+ // score, otherwise it may be worse off than an "un-boosted" frame
else
{
- int alt_gf_bits = (int)((double)cpi->kf_group_bits * (mod_frame_err / (double)cpi->kf_group_error_left));
+ int alt_gf_bits =
+ (int)((double)cpi->kf_group_bits *
+ mod_frame_err /
+ DOUBLE_DIVIDE_CHECK((double)cpi->kf_group_error_left));
if (alt_gf_bits > cpi->gf_bits)
{
@@ -1686,10 +1765,8 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
reset_fpf_position(cpi, start_pos);
}
-#ifdef FIRSTPASS_MM
// Reset the First pass motion map file position
vp8_fpmm_reset_pos(cpi, fpmm_pos);
-#endif
}
// Allocate bits to a normal frame that is neither a gf an arf or a key frame.
@@ -1703,7 +1780,7 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
int max_bits = frame_max_bits(cpi); // Max for a single frame
// The final few frames have special treatment
- if (cpi->frames_till_gf_update_due >= (int)(cpi->total_stats.count - cpi->common.current_video_frame))
+ if (cpi->frames_till_gf_update_due >= (int)(cpi->total_stats->count - cpi->common.current_video_frame))
{
cpi->gf_group_bits = (cpi->bits_left > 0) ? cpi->bits_left : 0;;
}
@@ -1748,7 +1825,7 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
void vp8_second_pass(VP8_COMP *cpi)
{
int tmp_q;
- int frames_left = (int)(cpi->total_stats.count - cpi->common.current_video_frame);
+ int frames_left = (int)(cpi->total_stats->count - cpi->common.current_video_frame);
FIRSTPASS_STATS this_frame;
FIRSTPASS_STATS this_frame_copy;
@@ -1771,11 +1848,12 @@ void vp8_second_pass(VP8_COMP *cpi)
if (EOF == vp8_input_stats(cpi, &this_frame))
return;
-#ifdef FIRSTPASS_MM
- vpx_memset(cpi->fp_motion_map, 0, cpi->common.MBs);
- cpi->fpmm_pos = vp8_fpmm_get_pos(cpi);
- vp8_advance_fpmm(cpi, 1); // Read this frame's first pass motion map
-#endif
+ vpx_memset(cpi->fp_motion_map, 0,
+ cpi->oxcf.arnr_max_frames*cpi->common.MBs);
+ cpi->fp_motion_map_stats_save = vp8_fpmm_get_pos(cpi);
+
+ // Step over this frame's first pass motion map
+ vp8_advance_fpmm(cpi, 1);
this_frame_error = this_frame.ssim_weighted_pred_err;
this_frame_intra_error = this_frame.intra_error;
@@ -1868,6 +1946,18 @@ void vp8_second_pass(VP8_COMP *cpi)
}
}
+ // Keep a globally available copy of this and the next frame's iiratio.
+ cpi->this_iiratio = this_frame_intra_error /
+ DOUBLE_DIVIDE_CHECK(this_frame_coded_error);
+ {
+ FIRSTPASS_STATS next_frame;
+ if ( lookup_next_frame_stats(cpi, &next_frame) != EOF )
+ {
+ cpi->next_iiratio = next_frame.intra_error /
+ DOUBLE_DIVIDE_CHECK(next_frame.coded_error);
+ }
+ }
+
// Set nominal per second bandwidth for this frame
cpi->target_bandwidth = cpi->per_frame_bandwidth * cpi->output_frame_rate;
if (cpi->target_bandwidth < 0)
@@ -2025,6 +2115,8 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
double kf_group_coded_err = 0.0;
double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100);
+ vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean
+
vp8_clear_system_state(); //__asm emms;
start_position = cpi->stats_in;
@@ -2041,7 +2133,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
// Take a copy of the initial frame details
vpx_memcpy(&first_frame, this_frame, sizeof(*this_frame));
- cpi->kf_group_bits = 0; // Estimate of total bits avaialable to kf group
+ cpi->kf_group_bits = 0; // Total bits avaialable to kf group
cpi->kf_group_error_left = 0; // Group modified error score.
kf_mod_err = calculate_modified_err(cpi, this_frame);
@@ -2057,33 +2149,34 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
kf_group_intra_err += this_frame->intra_error;
kf_group_coded_err += this_frame->coded_error;
+ // load a the next frame's stats
vpx_memcpy(&last_frame, this_frame, sizeof(*this_frame));
+ vp8_input_stats(cpi, this_frame);
// Provided that we are not at the end of the file...
- if (EOF != vp8_input_stats(cpi, this_frame))
+ if (cpi->oxcf.auto_key
+ && lookup_next_frame_stats(cpi, &next_frame) != EOF)
{
- if (lookup_next_frame_stats(cpi, &next_frame) != EOF)
- {
- if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame))
- break;
- }
- }
-
- // Step on to the next frame
- cpi->frames_to_key ++;
+ if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame))
+ break;
- // If we don't have a real key frame within the next two
- // forcekeyframeevery intervals then break out of the loop.
- if (cpi->frames_to_key >= 2 *(int)cpi->key_frame_frequency)
- break;
+ // Step on to the next frame
+ cpi->frames_to_key ++;
+ // If we don't have a real key frame within the next two
+ // forcekeyframeevery intervals then break out of the loop.
+ if (cpi->frames_to_key >= 2 *(int)cpi->key_frame_frequency)
+ break;
+ } else
+ cpi->frames_to_key ++;
}
// If there is a max kf interval set by the user we must obey it.
// We already breakout of the loop above at 2x max.
// This code centers the extra kf if the actual natural
// interval is between 1x and 2x
- if ( cpi->frames_to_key > (int)cpi->key_frame_frequency )
+ if (cpi->oxcf.auto_key
+ && cpi->frames_to_key > (int)cpi->key_frame_frequency )
{
cpi->frames_to_key /= 2;
@@ -2108,39 +2201,64 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
// Calculate the number of bits that should be assigned to the kf group.
if ((cpi->bits_left > 0) && ((int)cpi->modified_total_error_left > 0))
{
- int max_bits = frame_max_bits(cpi); // Max for a single normal frame (not key frame)
+ // Max for a single normal frame (not key frame)
+ int max_bits = frame_max_bits(cpi);
+
+ // Maximum bits for the kf group
+ long long max_grp_bits;
- // Default allocation based on bits left and relative complexity of the section
- cpi->kf_group_bits = (int)(cpi->bits_left * (kf_group_err / cpi->modified_total_error_left));
+ // Default allocation based on bits left and relative
+ // complexity of the section
+ cpi->kf_group_bits = (long long)( cpi->bits_left *
+ ( kf_group_err /
+ cpi->modified_total_error_left ));
// Clip based on maximum per frame rate defined by the user.
- if (cpi->kf_group_bits > max_bits * cpi->frames_to_key)
- cpi->kf_group_bits = max_bits * cpi->frames_to_key;
+ max_grp_bits = (long long)max_bits * (long long)cpi->frames_to_key;
+ if (cpi->kf_group_bits > max_grp_bits)
+ cpi->kf_group_bits = max_grp_bits;
// Additional special case for CBR if buffer is getting full.
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
{
- // If the buffer is near or above the optimal and this kf group is not being allocated much
- // then increase the allocation a bit.
- if (cpi->buffer_level >= cpi->oxcf.optimal_buffer_level)
+ int opt_buffer_lvl = cpi->oxcf.optimal_buffer_level;
+ int buffer_lvl = cpi->buffer_level;
+
+ // If the buffer is near or above the optimal and this kf group is
+ // not being allocated much then increase the allocation a bit.
+ if (buffer_lvl >= opt_buffer_lvl)
{
- int high_water_mark = (cpi->oxcf.optimal_buffer_level + cpi->oxcf.maximum_buffer_size) >> 1;
- int min_group_bits;
+ int high_water_mark = (opt_buffer_lvl +
+ cpi->oxcf.maximum_buffer_size) >> 1;
+
+ long long av_group_bits;
+
+ // Av bits per frame * number of frames
+ av_group_bits = (long long)cpi->av_per_frame_bandwidth *
+ (long long)cpi->frames_to_key;
// We are at or above the maximum.
if (cpi->buffer_level >= high_water_mark)
{
- min_group_bits = (cpi->av_per_frame_bandwidth * cpi->frames_to_key) + (cpi->buffer_level - high_water_mark);
+ long long min_group_bits;
+
+ min_group_bits = av_group_bits +
+ (long long)(buffer_lvl -
+ high_water_mark);
if (cpi->kf_group_bits < min_group_bits)
cpi->kf_group_bits = min_group_bits;
}
// We are above optimal but below the maximum
- else if (cpi->kf_group_bits < (cpi->av_per_frame_bandwidth * cpi->frames_to_key))
+ else if (cpi->kf_group_bits < av_group_bits)
{
- int bits_below_av = (cpi->av_per_frame_bandwidth * cpi->frames_to_key) - cpi->kf_group_bits;
- cpi->kf_group_bits += (int)((double)bits_below_av * (double)(cpi->buffer_level - cpi->oxcf.optimal_buffer_level) /
- (double)(high_water_mark - cpi->oxcf.optimal_buffer_level));
+ long long bits_below_av = av_group_bits -
+ cpi->kf_group_bits;
+
+ cpi->kf_group_bits +=
+ (long long)((double)bits_below_av *
+ (double)(buffer_lvl - opt_buffer_lvl) /
+ (double)(high_water_mark - opt_buffer_lvl));
}
}
}
@@ -2159,6 +2277,8 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
for (i = 0 ; i < cpi->frames_to_key ; i++)
{
double r;
+ double motion_decay;
+ double motion_pct = next_frame.pcnt_motion;
if (EOF == vp8_input_stats(cpi, &next_frame))
break;
@@ -2172,10 +2292,30 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
//if ( next_frame.pcnt_inter < loop_decay_rate )
loop_decay_rate = next_frame.pcnt_inter;
- if ((1.0 - (next_frame.pcnt_motion / 10.0)) < loop_decay_rate)
- loop_decay_rate = (1.0 - (next_frame.pcnt_motion / 10.0));
+ // High % motion -> somewhat higher decay rate
+ motion_decay = (1.0 - (motion_pct / 20.0));
+ if (motion_decay < loop_decay_rate)
+ loop_decay_rate = motion_decay;
+
+ // Adjustment to decay rate based on speed of motion
+ {
+ double this_mv_rabs;
+ double this_mv_cabs;
+ double distance_factor;
+
+ this_mv_rabs = fabs(next_frame.mvr_abs * motion_pct);
+ this_mv_cabs = fabs(next_frame.mvc_abs * motion_pct);
+
+ distance_factor = sqrt((this_mv_rabs * this_mv_rabs) +
+ (this_mv_cabs * this_mv_cabs)) / 250.0;
+ distance_factor = ((distance_factor > 1.0)
+ ? 0.0 : (1.0 - distance_factor));
+ if (distance_factor < loop_decay_rate)
+ loop_decay_rate = distance_factor;
+ }
decay_accumulator = decay_accumulator * loop_decay_rate;
+ decay_accumulator = decay_accumulator < 0.1 ? 0.1 : decay_accumulator;
boost_score += (decay_accumulator * r);
@@ -2266,7 +2406,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
int allocation_chunks;
int Counter = cpi->frames_to_key;
int alt_kf_bits;
-
+ YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
// Min boost based on kf interval
#if 0
@@ -2286,10 +2426,10 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
}
// bigger frame sizes need larger kf boosts, smaller frames smaller boosts...
- if ((cpi->common.last_frame.y_width * cpi->common.last_frame.y_height) > (320 * 240))
- kf_boost += 2 * (cpi->common.last_frame.y_width * cpi->common.last_frame.y_height) / (320 * 240);
- else if ((cpi->common.last_frame.y_width * cpi->common.last_frame.y_height) < (320 * 240))
- kf_boost -= 4 * (320 * 240) / (cpi->common.last_frame.y_width * cpi->common.last_frame.y_height);
+ if ((lst_yv12->y_width * lst_yv12->y_height) > (320 * 240))
+ kf_boost += 2 * (lst_yv12->y_width * lst_yv12->y_height) / (320 * 240);
+ else if ((lst_yv12->y_width * lst_yv12->y_height) < (320 * 240))
+ kf_boost -= 4 * (320 * 240) / (lst_yv12->y_width * lst_yv12->y_height);
kf_boost = (int)((double)kf_boost * 100.0) >> 4; // Scale 16 to 100
@@ -2325,23 +2465,34 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
cpi->kf_bits = (3 * cpi->buffer_level) >> 2;
}
- // If the key frame is actually easier than the average for the kf group (which does sometimes happen... eg a blank intro frame)
- // Then use an alternate calculation based on the kf error score which should give a smaller key frame.
+ // If the key frame is actually easier than the average for the
+ // kf group (which does sometimes happen... eg a blank intro frame)
+ // Then use an alternate calculation based on the kf error score
+ // which should give a smaller key frame.
if (kf_mod_err < kf_group_err / cpi->frames_to_key)
{
- double alt_kf_grp_bits = ((double)cpi->bits_left * (kf_mod_err * (double)cpi->frames_to_key) / cpi->modified_total_error_left) ;
+ double alt_kf_grp_bits =
+ ((double)cpi->bits_left *
+ (kf_mod_err * (double)cpi->frames_to_key) /
+ DOUBLE_DIVIDE_CHECK(cpi->modified_total_error_left));
- alt_kf_bits = (int)((double)kf_boost * (alt_kf_grp_bits / (double)allocation_chunks));
+ alt_kf_bits = (int)((double)kf_boost *
+ (alt_kf_grp_bits / (double)allocation_chunks));
if (cpi->kf_bits > alt_kf_bits)
{
cpi->kf_bits = alt_kf_bits;
}
}
- // Else if it is much harder than other frames in the group make sure it at least receives an allocation in keeping with its relative error score
+ // Else if it is much harder than other frames in the group make sure
+ // it at least receives an allocation in keeping with its relative
+ // error score
else
{
- alt_kf_bits = (int)((double)cpi->bits_left * (kf_mod_err / cpi->modified_total_error_left));
+ alt_kf_bits =
+ (int)((double)cpi->bits_left *
+ (kf_mod_err /
+ DOUBLE_DIVIDE_CHECK(cpi->modified_total_error_left)));
if (alt_kf_bits > cpi->kf_bits)
{
@@ -2391,7 +2542,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
cpi->common.vert_scale = NORMAL;
// Calculate Average bits per frame.
- //av_bits_per_frame = cpi->bits_left/(double)(cpi->total_stats.count - cpi->common.current_video_frame);
+ //av_bits_per_frame = cpi->bits_left/(double)(cpi->total_stats->count - cpi->common.current_video_frame);
av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate);
//if ( av_bits_per_frame < 0.0 )
// av_bits_per_frame = 0.0
@@ -2435,7 +2586,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
if (0)
{
FILE *f = fopen("Subsamle.stt", "a");
- fprintf(f, " %8d %8d %8d %8d %12.0f %8d %8d %8d\n", cpi->common.current_video_frame, kf_q, cpi->common.horiz_scale, cpi->common.vert_scale, kf_group_err / cpi->frames_to_key, cpi->kf_group_bits / cpi->frames_to_key, new_height, new_width);
+ fprintf(f, " %8d %8d %8d %8d %12.0f %8d %8d %8d\n", cpi->common.current_video_frame, kf_q, cpi->common.horiz_scale, cpi->common.vert_scale, kf_group_err / cpi->frames_to_key, (int)(cpi->kf_group_bits / cpi->frames_to_key), new_height, new_width);
fclose(f);
}
@@ -2454,7 +2605,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
}
else
{
- long long clip_bits = (long long)(cpi->total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
+ long long clip_bits = (long long)(cpi->total_stats->count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
long long over_spend = cpi->oxcf.starting_buffer_level - cpi->buffer_level;
long long over_spend2 = cpi->oxcf.starting_buffer_level - projected_buffer_level;
@@ -2493,7 +2644,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
if (0)
{
FILE *f = fopen("Subsamle.stt", "a");
- fprintf(f, "******** %8d %8d %8d %12.0f %8d %8d %8d\n", kf_q, cpi->common.horiz_scale, cpi->common.vert_scale, kf_group_err / cpi->frames_to_key, cpi->kf_group_bits / cpi->frames_to_key, new_height, new_width);
+ fprintf(f, "******** %8d %8d %8d %12.0f %8d %8d %8d\n", kf_q, cpi->common.horiz_scale, cpi->common.vert_scale, kf_group_err / cpi->frames_to_key, (int)(cpi->kf_group_bits / cpi->frames_to_key), new_height, new_width);
fclose(f);
}
}
diff --git a/vp8/encoder/firstpass.h b/vp8/encoder/firstpass.h
index d7b52f3f3..95e1e5463 100644
--- a/vp8/encoder/firstpass.h
+++ b/vp8/encoder/firstpass.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -19,4 +20,5 @@ extern void vp8_init_second_pass(VP8_COMP *cpi);
extern void vp8_second_pass(VP8_COMP *cpi);
extern void vp8_end_second_pass(VP8_COMP *cpi);
+extern size_t vp8_firstpass_stats_sz(unsigned int mb_count);
#endif
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index 52aab6642..824af5e46 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -14,6 +15,7 @@
void vp8_arch_x86_encoder_init(VP8_COMP *cpi);
+void vp8_arch_arm_encoder_init(VP8_COMP *cpi);
void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d);
@@ -38,6 +40,12 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_c;
cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_c;
+ cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_c;
+ cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_c;
+ cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_c;
+ cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_c;
+ cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_c;
+
cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_c;
cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_c;
cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_c;
@@ -55,6 +63,9 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c;
+ cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c;
+ cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_c;
+ cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_c;
cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_c;
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c;
@@ -67,8 +78,8 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c;
cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c;
- cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_c;
- cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_c;
+ cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_c;
+ cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_c;
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c;
cpi->rtcd.encodemb.berr = vp8_block_error_c;
@@ -93,4 +104,8 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
vp8_arch_x86_encoder_init(cpi);
#endif
+#if ARCH_ARM
+ vp8_arch_arm_encoder_init(cpi);
+#endif
+
}
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index 2a2de3d0a..bb85afa6f 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -185,7 +186,7 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
#define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
#define PRE(r,c) (*(d->base_pre) + d->pre + ((r)>>2) * d->pre_stride + ((c)>>2)) // pointer to predictor base of a motionvector
#define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc
-#define DIST(r,c) svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
+#define DIST(r,c) vfp->svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
#define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
#define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
@@ -194,7 +195,7 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
//#define CHECK_BETTER(v,r,c) if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }
-int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
+int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
{
unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
unsigned char *z = (*(b->base_src) + b->src);
@@ -219,7 +220,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
bestmv->col <<= 3;
// calculate central point error
- besterr = vf(y, d->pre_stride, z, b->src_stride, &sse);
+ besterr = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
besterr += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
// TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
@@ -308,7 +309,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
#undef CHECK_BETTER
#undef MIN
#undef MAX
-int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
+int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
{
int bestmse = INT_MAX;
MV startmv;
@@ -335,13 +336,13 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
startmv = *bestmv;
// calculate central point error
- bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse);
+ bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
// go left then right and check error
this_mv.row = startmv.row;
this_mv.col = ((startmv.col - 8) | 4);
- left = svf(y - 1, d->pre_stride, 4, 0, z, b->src_stride, &sse);
+ left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (left < bestmse)
@@ -351,7 +352,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
}
this_mv.col += 8;
- right = svf(y, d->pre_stride, 4, 0, z, b->src_stride, &sse);
+ right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (right < bestmse)
@@ -363,7 +364,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
// go up then down and check error
this_mv.col = startmv.col;
this_mv.row = ((startmv.row - 8) | 4);
- up = svf(y - d->pre_stride, d->pre_stride, 0, 4, z, b->src_stride, &sse);
+ up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (up < bestmse)
@@ -373,7 +374,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
}
this_mv.row += 8;
- down = svf(y, d->pre_stride, 0, 4, z, b->src_stride, &sse);
+ down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (down < bestmse)
@@ -385,10 +386,6 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
// now check 1 more diagonal
whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
- // whichdir must be 0-4. Therefore, one of the cases below
- // must run through. However, because there is no default
- // and diag is not set elsewhere, we get a compile warning
- diag = 0;
//for(whichdir =0;whichdir<4;whichdir++)
//{
this_mv = startmv;
@@ -398,22 +395,22 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
case 0:
this_mv.col = (this_mv.col - 8) | 4;
this_mv.row = (this_mv.row - 8) | 4;
- diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+ diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
break;
case 1:
this_mv.col += 4;
this_mv.row = (this_mv.row - 8) | 4;
- diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+ diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
break;
case 2:
this_mv.col = (this_mv.col - 8) | 4;
this_mv.row += 4;
- diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+ diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
break;
case 3:
this_mv.col += 4;
this_mv.row += 4;
- diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+ diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
break;
}
@@ -445,12 +442,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
if (startmv.col & 7)
{
this_mv.col = startmv.col - 2;
- left = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+ left = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
}
else
{
this_mv.col = (startmv.col - 8) | 6;
- left = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);
+ left = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);
}
left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
@@ -462,7 +459,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
}
this_mv.col += 4;
- right = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+ right = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (right < bestmse)
@@ -477,12 +474,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
if (startmv.row & 7)
{
this_mv.row = startmv.row - 2;
- up = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+ up = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
}
else
{
this_mv.row = (startmv.row - 8) | 6;
- up = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
+ up = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
}
up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
@@ -494,7 +491,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
}
this_mv.row += 4;
- down = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+ down = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (down < bestmse)
@@ -522,12 +519,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
if (startmv.col & 7)
{
this_mv.col -= 2;
- diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+ diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
}
else
{
this_mv.col = (startmv.col - 8) | 6;
- diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
+ diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
}
}
else
@@ -537,12 +534,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
if (startmv.col & 7)
{
this_mv.col -= 2;
- diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
+ diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
}
else
{
this_mv.col = (startmv.col - 8) | 6;
- diag = svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);
+ diag = vfp->svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);
}
}
@@ -553,12 +550,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
if (startmv.row & 7)
{
this_mv.row -= 2;
- diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+ diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
}
else
{
this_mv.row = (startmv.row - 8) | 6;
- diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
+ diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
}
break;
@@ -568,19 +565,19 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
if (startmv.col & 7)
{
this_mv.col -= 2;
- diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+ diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
}
else
{
this_mv.col = (startmv.col - 8) | 6;
- diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
+ diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
}
break;
case 3:
this_mv.col += 2;
this_mv.row += 2;
- diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+ diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
break;
}
@@ -597,7 +594,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
return bestmse;
}
-int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
+int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
{
int bestmse = INT_MAX;
MV startmv;
@@ -622,13 +619,13 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
startmv = *bestmv;
// calculate central point error
- bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse);
+ bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
// go left then right and check error
this_mv.row = startmv.row;
this_mv.col = ((startmv.col - 8) | 4);
- left = svf(y - 1, d->pre_stride, 4, 0, z, b->src_stride, &sse);
+ left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (left < bestmse)
@@ -638,7 +635,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
}
this_mv.col += 8;
- right = svf(y, d->pre_stride, 4, 0, z, b->src_stride, &sse);
+ right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (right < bestmse)
@@ -650,7 +647,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
// go up then down and check error
this_mv.col = startmv.col;
this_mv.row = ((startmv.row - 8) | 4);
- up = svf(y - d->pre_stride, d->pre_stride, 0, 4, z, b->src_stride, &sse);
+ up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (up < bestmse)
@@ -660,7 +657,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
}
this_mv.row += 8;
- down = svf(y, d->pre_stride, 0, 4, z, b->src_stride, &sse);
+ down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (down < bestmse)
@@ -680,22 +677,22 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
case 0:
this_mv.col = (this_mv.col - 8) | 4;
this_mv.row = (this_mv.row - 8) | 4;
- diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+ diag = vfp->svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
break;
case 1:
this_mv.col += 4;
this_mv.row = (this_mv.row - 8) | 4;
- diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+ diag = vfp->svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
break;
case 2:
this_mv.col = (this_mv.col - 8) | 4;
this_mv.row += 4;
- diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+ diag = vfp->svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
break;
case 3:
this_mv.col += 4;
this_mv.row += 4;
- diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+ diag = vfp->svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
break;
}
@@ -710,7 +707,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
#else
this_mv.col = (this_mv.col - 8) | 4;
this_mv.row = (this_mv.row - 8) | 4;
- diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+ diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (diag < bestmse)
@@ -720,7 +717,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
}
this_mv.col += 8;
- diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+ diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (diag < bestmse)
@@ -731,7 +728,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
this_mv.col = (this_mv.col - 8) | 4;
this_mv.row = startmv.row + 4;
- diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+ diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (diag < bestmse)
@@ -741,7 +738,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
}
this_mv.col += 8;
- diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+ diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (diag < bestmse)
@@ -757,10 +754,18 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
#define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
#define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector
-#define DIST(r,c,v) sf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score.
+#define DIST(r,c,v) vfp->sdf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score.
#define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost
#define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best
-
+static const MV next_chkpts[6][3] =
+{
+ {{ -2, 0}, { -1, -2}, {1, -2}},
+ {{ -1, -2}, {1, -2}, {2, 0}},
+ {{1, -2}, {2, 0}, {1, 2}},
+ {{2, 0}, {1, 2}, { -1, 2}},
+ {{1, 2}, { -1, 2}, { -2, 0}},
+ {{ -1, 2}, { -2, 0}, { -1, -2}}
+};
int vp8_hex_search
(
MACROBLOCK *x,
@@ -771,44 +776,72 @@ int vp8_hex_search
int search_param,
int error_per_bit,
int *num00,
- vp8_variance_fn_t vf,
- vp8_sad_fn_t sf,
+ const vp8_variance_fn_ptr_t *vfp,
int *mvsadcost[2],
int *mvcost[2]
)
{
- MV hex[6] = { { -2, 0}, { -1, -2}, { -1, 2}, {2, 0}, {1, 2}, {1, -2} } ;
+ MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
MV neighbors[8] = { { -1, -1}, { -1, 0}, { -1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1} } ;
int i, j;
unsigned char *src = (*(b->base_src) + b->src);
int src_stride = b->src_stride;
- int rr = ref_mv->row, rc = ref_mv->col, br = rr, bc = rc, tr, tc;
+ int rr = ref_mv->row, rc = ref_mv->col, br = rr >> 3, bc = rc >> 3, tr, tc;
unsigned int besterr, thiserr = 0x7fffffff;
+ int k = -1, tk;
- if (rc < x->mv_col_min) bc = x->mv_col_min;
+ if (bc < x->mv_col_min) bc = x->mv_col_min;
- if (rc > x->mv_col_max) bc = x->mv_col_max;
+ if (bc > x->mv_col_max) bc = x->mv_col_max;
- if (rr < x->mv_row_min) br = x->mv_row_min;
+ if (br < x->mv_row_min) br = x->mv_row_min;
- if (rr > x->mv_row_max) br = x->mv_row_max;
+ if (br > x->mv_row_max) br = x->mv_row_max;
rr >>= 1;
rc >>= 1;
- br >>= 3;
- bc >>= 3;
besterr = ERR(br, bc, thiserr);
- // hex search jbb changed to 127 to avoid max 256 problem steping by 2.
- for (j = 0; j < 127; j++)
+ // hex search
+ //j=0
+ tr = br;
+ tc = bc;
+
+ for (i = 0; i < 6; i++)
+ {
+ int nr = tr + hex[i].row, nc = tc + hex[i].col;
+
+ if (nc < x->mv_col_min) continue;
+
+ if (nc > x->mv_col_max) continue;
+
+ if (nr < x->mv_row_min) continue;
+
+ if (nr > x->mv_row_max) continue;
+
+ //CHECK_BETTER(thiserr,nr,nc);
+ if ((thiserr = ERR(nr, nc, besterr)) < besterr)
+ {
+ besterr = thiserr;
+ br = nr;
+ bc = nc;
+ k = i;
+ }
+ }
+
+ if (tr == br && tc == bc)
+ goto cal_neighbors;
+
+ for (j = 1; j < 127; j++)
{
tr = br;
tc = bc;
+ tk = k;
- for (i = 0; i < 6; i++)
+ for (i = 0; i < 3; i++)
{
- int nr = tr + hex[i].row, nc = tc + hex[i].col;
+ int nr = tr + next_chkpts[tk][i].row, nc = tc + next_chkpts[tk][i].col;
if (nc < x->mv_col_min) continue;
@@ -818,7 +851,17 @@ int vp8_hex_search
if (nr > x->mv_row_max) continue;
- CHECK_BETTER(thiserr, nr, nc);
+ //CHECK_BETTER(thiserr,nr,nc);
+ if ((thiserr = ERR(nr, nc, besterr)) < besterr)
+ {
+ besterr = thiserr;
+ br = nr;
+ bc = nc; //k=(tk+5+i)%6;}
+ k = tk + 5 + i;
+
+ if (k >= 12) k -= 12;
+ else if (k >= 6) k -= 6;
+ }
}
if (tr == br && tc == bc)
@@ -826,6 +869,7 @@ int vp8_hex_search
}
// check 8 1 away neighbors
+cal_neighbors:
tr = br;
tc = bc;
@@ -847,7 +891,7 @@ int vp8_hex_search
best_mv->row = br;
best_mv->col = bc;
- return vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ;
+ return vfp->vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ;
}
#undef MVC
#undef PRE
@@ -855,6 +899,8 @@ int vp8_hex_search
#undef DIST
#undef ERR
#undef CHECK_BETTER
+
+
int vp8_diamond_search_sad
(
MACROBLOCK *x,
@@ -996,7 +1042,7 @@ int vp8_diamond_search_sadx4
int tot_steps;
MV this_mv;
- unsigned int bestsad = UINT_MAX;
+ int bestsad = INT_MAX;
int best_site = 0;
int last_site = 0;
@@ -1034,84 +1080,73 @@ int vp8_diamond_search_sadx4
for (step = 0; step < tot_steps ; step++)
{
- int check_row_min, check_col_min, check_row_max, check_col_max;
+ int all_in = 1, t;
- check_row_min = x->mv_row_min - best_mv->row;
- check_row_max = x->mv_row_max - best_mv->row;
- check_col_min = x->mv_col_min - best_mv->col;
- check_col_max = x->mv_col_max - best_mv->col;
+ // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of
+ // checking 4 bounds for each points.
+ all_in &= ((best_mv->row + ss[i].mv.row)> x->mv_row_min);
+ all_in &= ((best_mv->row + ss[i+1].mv.row) < x->mv_row_max);
+ all_in &= ((best_mv->col + ss[i+2].mv.col) > x->mv_col_min);
+ all_in &= ((best_mv->col + ss[i+3].mv.col) < x->mv_col_max);
- for (j = 0 ; j < x->searches_per_step ; j += 4)
+ if (all_in)
{
- unsigned char *block_offset[4];
- unsigned int valid_block[4];
- int all_in = 1, t;
+ unsigned int sad_array[4];
- for (t = 0; t < 4; t++)
+ for (j = 0 ; j < x->searches_per_step ; j += 4)
{
- valid_block [t] = (ss[t+i].mv.col > check_col_min);
- valid_block [t] &= (ss[t+i].mv.col < check_col_max);
- valid_block [t] &= (ss[t+i].mv.row > check_row_min);
- valid_block [t] &= (ss[t+i].mv.row < check_row_max);
-
- all_in &= valid_block[t];
- block_offset[t] = ss[i+t].offset + best_address;
- }
+ unsigned char *block_offset[4];
- if (all_in)
- {
- unsigned int sad_array[4];
+ for (t = 0; t < 4; t++)
+ block_offset[t] = ss[i+t].offset + best_address;
fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
for (t = 0; t < 4; t++, i++)
{
- thissad = sad_array[t];
-
- if (thissad < bestsad)
+ if (sad_array[t] < bestsad)
{
this_mv.row = (best_mv->row + ss[i].mv.row) << 3;
this_mv.col = (best_mv->col + ss[i].mv.col) << 3;
- thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
+ sad_array[t] += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
- if (thissad < bestsad)
+ if (sad_array[t] < bestsad)
{
- bestsad = thissad;
+ bestsad = sad_array[t];
best_site = i;
}
}
}
}
- else
+ }
+ else
+ {
+ for (j = 0 ; j < x->searches_per_step ; j++)
{
- int t;
+ // Trap illegal vectors
+ this_row_offset = best_mv->row + ss[i].mv.row;
+ this_col_offset = best_mv->col + ss[i].mv.col;
- for (t = 0; t < 4; i++, t++)
+ if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
+ (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
{
- // Trap illegal vectors
- if (valid_block[t])
+ check_here = ss[i].offset + best_address;
+ thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
+ if (thissad < bestsad)
{
- check_here = block_offset[t];
- thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
+ this_mv.row = this_row_offset << 3;
+ this_mv.col = this_col_offset << 3;
+ thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
if (thissad < bestsad)
{
- this_row_offset = best_mv->row + ss[i].mv.row;
- this_col_offset = best_mv->col + ss[i].mv.col;
-
- this_mv.row = this_row_offset << 3;
- this_mv.col = this_col_offset << 3;
- thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
-
- if (thissad < bestsad)
- {
- bestsad = thissad;
- best_site = i;
- }
+ bestsad = thissad;
+ best_site = i;
}
}
}
+ i++;
}
}
@@ -1137,6 +1172,7 @@ int vp8_diamond_search_sadx4
}
+#if !(CONFIG_REALTIME_ONLY)
int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
{
unsigned char *what = (*(b->base_src) + b->src);
@@ -1237,7 +1273,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
unsigned char *bestaddress;
MV *best_mv = &d->bmi.mv.as_mv;
MV this_mv;
- unsigned int bestsad = UINT_MAX;
+ int bestsad = INT_MAX;
int r, c;
unsigned char *check_here;
@@ -1287,7 +1323,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
check_here = r * mv_stride + in_what + col_min;
c = col_min;
- while ((c + 3) < col_max)
+ while ((c + 2) < col_max)
{
int i;
@@ -1349,6 +1385,160 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
else
return INT_MAX;
}
+#endif
+
+
+int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
+{
+ unsigned char *what = (*(b->base_src) + b->src);
+ int what_stride = b->src_stride;
+ unsigned char *in_what;
+ int in_what_stride = d->pre_stride;
+ int mv_stride = d->pre_stride;
+ unsigned char *bestaddress;
+ MV *best_mv = &d->bmi.mv.as_mv;
+ MV this_mv;
+ int bestsad = INT_MAX;
+ int r, c;
+
+ unsigned char *check_here;
+ unsigned int thissad;
+
+ int ref_row = ref_mv->row >> 3;
+ int ref_col = ref_mv->col >> 3;
+
+ int row_min = ref_row - distance;
+ int row_max = ref_row + distance;
+ int col_min = ref_col - distance;
+ int col_max = ref_col + distance;
+
+ unsigned short sad_array8[8];
+ unsigned int sad_array[3];
+
+ // Work out the mid point for the search
+ in_what = *(d->base_pre) + d->pre;
+ bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
+
+ best_mv->row = ref_row;
+ best_mv->col = ref_col;
+
+ // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
+ if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
+ (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
+ {
+ // Baseline value at the centre
+ bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
+ }
+
+ // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
+ if (col_min < x->mv_col_min)
+ col_min = x->mv_col_min;
+
+ if (col_max > x->mv_col_max)
+ col_max = x->mv_col_max;
+
+ if (row_min < x->mv_row_min)
+ row_min = x->mv_row_min;
+
+ if (row_max > x->mv_row_max)
+ row_max = x->mv_row_max;
+
+ for (r = row_min; r < row_max ; r++)
+ {
+ this_mv.row = r << 3;
+ check_here = r * mv_stride + in_what + col_min;
+ c = col_min;
+
+ while ((c + 7) < col_max)
+ {
+ int i;
+
+ fn_ptr->sdx8f(what, what_stride, check_here , in_what_stride, sad_array8);
+
+ for (i = 0; i < 8; i++)
+ {
+ thissad = (unsigned int)sad_array8[i];
+
+ if (thissad < bestsad)
+ {
+ this_mv.col = c << 3;
+ thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
+
+ if (thissad < bestsad)
+ {
+ bestsad = thissad;
+ best_mv->row = r;
+ best_mv->col = c;
+ bestaddress = check_here;
+ }
+ }
+
+ check_here++;
+ c++;
+ }
+ }
+
+ while ((c + 2) < col_max)
+ {
+ int i;
+
+ fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
+
+ for (i = 0; i < 3; i++)
+ {
+ thissad = sad_array[i];
+
+ if (thissad < bestsad)
+ {
+ this_mv.col = c << 3;
+ thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
+
+ if (thissad < bestsad)
+ {
+ bestsad = thissad;
+ best_mv->row = r;
+ best_mv->col = c;
+ bestaddress = check_here;
+ }
+ }
+
+ check_here++;
+ c++;
+ }
+ }
+
+ while (c < col_max)
+ {
+ thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
+
+ if (thissad < bestsad)
+ {
+ this_mv.col = c << 3;
+ thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
+
+ if (thissad < bestsad)
+ {
+ bestsad = thissad;
+ best_mv->row = r;
+ best_mv->col = c;
+ bestaddress = check_here;
+ }
+ }
+
+ check_here ++;
+ c ++;
+ }
+ }
+
+ this_mv.row = best_mv->row << 3;
+ this_mv.col = best_mv->col << 3;
+
+ if (bestsad < INT_MAX)
+ return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
+ + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+ else
+ return INT_MAX;
+}
#ifdef ENTROPY_STATS
void print_mode_context(void)
diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h
index 921206fec..7d6036248 100644
--- a/vp8/encoder/mcomp.h
+++ b/vp8/encoder/mcomp.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -41,14 +42,15 @@ extern int vp8_hex_search
int search_param,
int error_per_bit,
int *num00,
- vp8_variance_fn_t vf,
- vp8_sad_fn_t sf,
+ const vp8_variance_fn_ptr_t *vf,
int *mvsadcost[2],
int *mvcost[2]
);
-typedef int (fractional_mv_step_fp)(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]);
+typedef int (fractional_mv_step_fp)
+ (MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv,
+ int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2]);
extern fractional_mv_step_fp vp8_find_best_sub_pixel_step_iteratively;
extern fractional_mv_step_fp vp8_find_best_sub_pixel_step;
extern fractional_mv_step_fp vp8_find_best_half_pixel_step;
@@ -91,6 +93,7 @@ extern fractional_mv_step_fp vp8_skip_fractional_mv_step;
typedef prototype_full_search_sad(*vp8_full_search_fn_t);
extern prototype_full_search_sad(vp8_full_search_sad);
extern prototype_full_search_sad(vp8_full_search_sadx3);
+extern prototype_full_search_sad(vp8_full_search_sadx8);
typedef prototype_diamond_search_sad(*vp8_diamond_search_fn_t);
extern prototype_diamond_search_sad(vp8_diamond_search_sad);
diff --git a/vp8/encoder/modecosts.c b/vp8/encoder/modecosts.c
index 73170cf52..d23c97e6e 100644
--- a/vp8/encoder/modecosts.c
+++ b/vp8/encoder/modecosts.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/encoder/modecosts.h b/vp8/encoder/modecosts.h
index 5ade26566..99ef119d5 100644
--- a/vp8/encoder/modecosts.h
+++ b/vp8/encoder/modecosts.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 56516fcab..5f02a5a02 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -20,7 +21,7 @@
#include "extend.h"
#include "ratectrl.h"
#include "quant_common.h"
-#include "segmentation_common.h"
+#include "segmentation.h"
#include "g_common.h"
#include "vpx_scale/yv12extend.h"
#include "postproc.h"
@@ -28,6 +29,12 @@
#include "swapyv12buffer.h"
#include "threading.h"
#include "vpx_ports/vpx_timer.h"
+#include "vpxerrors.h"
+#include "temporal_filter.h"
+#if ARCH_ARM
+#include "vpx_ports/arm.h"
+#endif
+
#include <math.h>
#include <stdio.h>
#include <limits.h>
@@ -67,7 +74,7 @@ int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const
int vp8_calc_low_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd);
-static void mode_ref_lf_test_function(VP8_COMP *cpi);
+static void set_default_lf_deltas(VP8_COMP *cpi);
extern const int vp8_gf_interval_table[101];
@@ -136,8 +143,6 @@ extern unsigned int inter_b_modes[15];
extern void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
extern void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
-extern void (*vp8_fast_fdct4x4)(short *input, short *output, int pitch);
-extern void (*vp8_fast_fdct8x4)(short *input, short *output, int pitch);
extern const int vp8_bits_per_mb[2][QINDEX_RANGE];
@@ -146,6 +151,95 @@ extern const int qzbin_factors[129];
extern void vp8cx_init_quantizer(VP8_COMP *cpi);
extern const int vp8cx_base_skip_false_prob[128];
+// Tables relating active max Q to active min Q
+static const int kf_low_motion_minq[QINDEX_RANGE] =
+{
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4,
+ 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 10,10,
+ 11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,
+ 19,19,20,20,21,21,22,22,23,23,24,24,25,25,26,26,
+ 27,27,28,28,29,29,30,30,31,32,33,34,35,36,37,38,
+};
+static const int kf_high_motion_minq[QINDEX_RANGE] =
+{
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
+ 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
+ 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10,10,
+ 11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,
+ 19,19,20,20,21,21,22,22,23,23,24,24,25,25,26,26,
+ 27,27,28,28,29,29,30,30,31,31,32,32,33,33,34,34,
+ 35,35,36,36,37,38,39,40,41,42,43,44,45,46,47,48,
+};
+/*static const int kf_minq[QINDEX_RANGE] =
+{
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 6, 6,
+ 7, 7, 8, 8, 9, 9, 10,10,11,11,12,12,13,13,14,14,
+ 15,15,16,16,17,17,18,18,19,19,20,20,21,21,22,22,
+ 23,23,24,24,25,25,26,26,27,27,28,28,29,29,30,30,
+ 31,31,32,32,33,33,34,34,35,35,36,36,37,37,38,38
+};*/
+static const int gf_low_motion_minq[QINDEX_RANGE] =
+{
+ 0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,
+ 3,3,3,3,4,4,4,4,5,5,5,5,6,6,6,6,
+ 7,7,7,7,8,8,8,8,9,9,9,9,10,10,10,10,
+ 11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,
+ 19,19,20,20,21,21,22,22,23,23,24,24,25,25,26,26,
+ 27,27,28,28,29,29,30,30,31,31,32,32,33,33,34,34,
+ 35,35,36,36,37,37,38,38,39,39,40,40,41,41,42,42,
+ 43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58
+};
+static const int gf_mid_motion_minq[QINDEX_RANGE] =
+{
+ 0,0,0,0,1,1,1,1,1,1,2,2,3,3,3,4,
+ 4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9,
+ 9,10,10,10,10,11,11,11,12,12,12,12,13,13,13,14,
+ 14,14,15,15,16,16,17,17,18,18,19,19,20,20,21,21,
+ 22,22,23,23,24,24,25,25,26,26,27,27,28,28,29,29,
+ 30,30,31,31,32,32,33,33,34,34,35,35,36,36,37,37,
+ 38,39,39,40,40,41,41,42,42,43,43,44,45,46,47,48,
+ 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,
+};
+static const int gf_high_motion_minq[QINDEX_RANGE] =
+{
+ 0,0,0,0,1,1,1,1,1,2,2,2,3,3,3,4,
+ 4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9,
+ 9,10,10,10,11,11,12,12,13,13,14,14,15,15,16,16,
+ 17,17,18,18,19,19,20,20,21,21,22,22,23,23,24,24,
+ 25,25,26,26,27,27,28,28,29,29,30,30,31,31,32,32,
+ 33,33,34,34,35,35,36,36,37,37,38,38,39,39,40,40,
+ 41,41,42,42,43,44,45,46,47,48,49,50,51,52,53,54,
+ 55,56,57,58,59,60,62,64,66,68,70,72,74,76,78,80,
+};
+/*static const int gf_arf_minq[QINDEX_RANGE] =
+{
+ 0,0,0,0,1,1,1,1,1,1,2,2,3,3,3,4,
+ 4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9,
+ 9,10,10,10,11,11,11,12,12,12,13,13,13,14,14,14,
+ 15,15,16,16,17,17,18,18,19,19,20,20,21,21,22,22,
+ 23,23,24,24,25,25,26,26,27,27,28,28,29,29,30,30,
+ 31,31,32,32,33,33,34,34,35,35,36,36,37,37,38,39,
+ 39,40,40,41,41,42,42,43,43,44,45,46,47,48,49,50,
+ 51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66
+};*/
+static const int inter_minq[QINDEX_RANGE] =
+{
+ 0,0,0,0,1,1,2,3,3,4,4,5,6,6,7,7,
+ 8,8,9,9,10,11,11,12,12,13,13,14,14,15,15,16,
+ 16,17,17,17,18,18,19,19,20,20,21,21,22,22,22,23,
+ 23,24,24,24,25,25,26,27,28,28,29,30,31,32,33,34,
+ 35,35,36,37,38,39,39,40,41,42,43,43,44,45,46,47,
+ 47,48,49,49,51,52,53,54,54,55,56,56,57,57,58,58,
+ 59,59,60,61,61,62,62,63,64,64,65,66,67,67,68,69,
+ 69,70,71,71,72,73,74,75,76,76,77,78,79,80,81,81,
+};
void vp8_initialize()
{
@@ -179,9 +273,10 @@ static void setup_features(VP8_COMP *cpi)
cpi->mb.e_mbd.mode_ref_lf_delta_update = 0;
vpx_memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
vpx_memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
+ vpx_memset(cpi->mb.e_mbd.last_ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
+ vpx_memset(cpi->mb.e_mbd.last_mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
- // jbb trial !
- mode_ref_lf_test_function(cpi);
+ set_default_lf_deltas(cpi);
}
@@ -225,6 +320,19 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi)
vpx_free(cpi->tok);
cpi->tok = 0;
+ // Structure used to minitor GF useage
+ if (cpi->gf_active_flags != 0)
+ vpx_free(cpi->gf_active_flags);
+
+ cpi->gf_active_flags = 0;
+
+ if(cpi->mb.pip)
+ vpx_free(cpi->mb.pip);
+
+ cpi->mb.pip = 0;
+
+ vpx_free(cpi->total_stats);
+ vpx_free(cpi->this_frame_stats);
}
static void enable_segmentation(VP8_PTR ptr)
@@ -428,7 +536,7 @@ static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment)
}
-static void mode_ref_lf_test_function(VP8_COMP *cpi)
+static void set_default_lf_deltas(VP8_COMP *cpi)
{
cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 1;
cpi->mb.e_mbd.mode_ref_lf_delta_update = 1;
@@ -544,7 +652,8 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->thresh_mult[THR_NEWG ] = INT_MAX;
sf->thresh_mult[THR_SPLITG ] = INT_MAX;
}
- else if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
+
+ if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
{
sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
sf->thresh_mult[THR_ZEROA ] = INT_MAX;
@@ -556,7 +665,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
break;
case 1:
case 3:
- sf->optimize_coefficients = 0;
sf->thresh_mult[THR_NEARESTMV] = 0;
sf->thresh_mult[THR_ZEROMV ] = 0;
sf->thresh_mult[THR_DC ] = 0;
@@ -596,7 +704,8 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->thresh_mult[THR_NEARMV ] = INT_MAX;
sf->thresh_mult[THR_SPLITMV ] = INT_MAX;
}
- else if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG))
+
+ if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG))
{
sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
sf->thresh_mult[THR_ZEROG ] = INT_MAX;
@@ -604,7 +713,8 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->thresh_mult[THR_NEWG ] = INT_MAX;
sf->thresh_mult[THR_SPLITG ] = INT_MAX;
}
- else if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
+
+ if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
{
sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
sf->thresh_mult[THR_ZEROA ] = INT_MAX;
@@ -615,6 +725,9 @@ void vp8_set_speed_features(VP8_COMP *cpi)
if (Speed > 0)
{
+ // Disable coefficient optimization above speed 0
+ sf->optimize_coefficients = 0;
+
cpi->mode_check_freq[THR_SPLITG] = 4;
cpi->mode_check_freq[THR_SPLITA] = 4;
cpi->mode_check_freq[THR_SPLITMV] = 2;
@@ -762,7 +875,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
cpi->mode_check_freq[THR_NEWA] = 4;
}
- if (cpi->ref_frame_flags & VP8_LAST_FLAG & VP8_GOLD_FLAG)
+ if (cpi->ref_frame_flags & VP8_GOLD_FLAG)
{
sf->thresh_mult[THR_NEARESTG ] = 2000;
sf->thresh_mult[THR_ZEROG ] = 2000;
@@ -770,7 +883,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->thresh_mult[THR_NEWG ] = 4000;
}
- if (cpi->ref_frame_flags & VP8_LAST_FLAG & VP8_ALT_FLAG)
+ if (cpi->ref_frame_flags & VP8_ALT_FLAG)
{
sf->thresh_mult[THR_NEARESTA ] = 2000;
sf->thresh_mult[THR_ZEROA ] = 2000;
@@ -810,7 +923,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->full_freq[1] = 31;
sf->search_method = NSTEP;
- if (!cpi->ref_frame_flags & VP8_LAST_FLAG)
+ if (!(cpi->ref_frame_flags & VP8_LAST_FLAG))
{
sf->thresh_mult[THR_NEWMV ] = INT_MAX;
sf->thresh_mult[THR_NEARESTMV] = INT_MAX;
@@ -819,7 +932,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->thresh_mult[THR_SPLITMV ] = INT_MAX;
}
- if (!cpi->ref_frame_flags & VP8_GOLD_FLAG)
+ if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG))
{
sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
sf->thresh_mult[THR_ZEROG ] = INT_MAX;
@@ -828,7 +941,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->thresh_mult[THR_SPLITG ] = INT_MAX;
}
- if (!cpi->ref_frame_flags & VP8_ALT_FLAG)
+ if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
{
sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
sf->thresh_mult[THR_ZEROA ] = INT_MAX;
@@ -1121,40 +1234,33 @@ void vp8_set_speed_features(VP8_COMP *cpi)
if (cpi->sf.search_method == NSTEP)
{
- vp8_init3smotion_compensation(&cpi->mb, cm->last_frame.y_stride);
+ vp8_init3smotion_compensation(&cpi->mb, cm->yv12_fb[cm->lst_fb_idx].y_stride);
}
else if (cpi->sf.search_method == DIAMOND)
{
- vp8_init_dsmotion_compensation(&cpi->mb, cm->last_frame.y_stride);
+ vp8_init_dsmotion_compensation(&cpi->mb, cm->yv12_fb[cm->lst_fb_idx].y_stride);
}
if (cpi->sf.improved_dct)
{
cpi->mb.vp8_short_fdct8x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x4);
cpi->mb.vp8_short_fdct4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short4x4);
- cpi->mb.short_fdct8x4rd = FDCT_INVOKE(&cpi->rtcd.fdct, short8x4);
- cpi->mb.short_fdct4x4rd = FDCT_INVOKE(&cpi->rtcd.fdct, short4x4);
}
else
{
cpi->mb.vp8_short_fdct8x4 = FDCT_INVOKE(&cpi->rtcd.fdct, fast8x4);
cpi->mb.vp8_short_fdct4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, fast4x4);
- cpi->mb.short_fdct8x4rd = FDCT_INVOKE(&cpi->rtcd.fdct, fast8x4);
- cpi->mb.short_fdct4x4rd = FDCT_INVOKE(&cpi->rtcd.fdct, fast4x4);
}
- cpi->mb.vp8_short_fdct4x4_ptr = FDCT_INVOKE(&cpi->rtcd.fdct, short4x4);
cpi->mb.short_walsh4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, walsh_short4x4);
if (cpi->sf.improved_quant)
{
cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
- cpi->mb.quantize_brd = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
}
else
{
cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
- cpi->mb.quantize_brd = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
}
#if CONFIG_RUNTIME_CPU_DETECT
@@ -1179,7 +1285,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
}
if (cpi->sf.optimize_coefficients == 1)
- cpi->mb.optimize = 1;
+ cpi->mb.optimize = 1 + cpi->is_next_src_alt_ref;
else
cpi->mb.optimize = 0;
@@ -1220,6 +1326,20 @@ static void alloc_raw_frame_buffers(VP8_COMP *cpi)
cpi->source_buffer_count = 0;
}
+
+static int vp8_alloc_partition_data(VP8_COMP *cpi)
+{
+ cpi->mb.pip = vpx_calloc((cpi->common.mb_cols + 1) *
+ (cpi->common.mb_rows + 1),
+ sizeof(PARTITION_INFO));
+ if(!cpi->mb.pip)
+ return ALLOC_FAILURE;
+
+ cpi->mb.pi = cpi->mb.pip + cpi->common.mode_info_stride + 1;
+
+ return 0;
+}
+
void vp8_alloc_compressor_data(VP8_COMP *cpi)
{
VP8_COMMON *cm = & cpi->common;
@@ -1231,6 +1351,11 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate frame buffers");
+ if (vp8_alloc_partition_data(cpi))
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate partition data");
+
+
if ((width & 0xf) != 0)
width += 16 - (width & 0xf);
@@ -1261,6 +1386,21 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
cpi->inter_zz_count = 0;
cpi->gf_bad_count = 0;
cpi->gf_update_recommended = 0;
+
+
+ // Structures used to minitor GF usage
+ if (cpi->gf_active_flags != 0)
+ vpx_free(cpi->gf_active_flags);
+
+ CHECK_MEM_ERROR(cpi->gf_active_flags, vpx_calloc(1, cm->mb_rows * cm->mb_cols));
+
+ cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
+
+ cpi->total_stats = vpx_calloc(1, vp8_firstpass_stats_sz(cpi->common.MBs));
+ cpi->this_frame_stats = vpx_calloc(1, vp8_firstpass_stats_sz(cpi->common.MBs));
+ if(!cpi->total_stats || !cpi->this_frame_stats)
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate firstpass stats");
}
@@ -1289,16 +1429,14 @@ int vp8_reverse_trans(int x)
};
void vp8_new_frame_rate(VP8_COMP *cpi, double framerate)
{
+ if(framerate < .1)
+ framerate = 30;
+
cpi->oxcf.frame_rate = framerate;
cpi->output_frame_rate = cpi->oxcf.frame_rate;
cpi->per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth / cpi->output_frame_rate);
cpi->av_per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth / cpi->output_frame_rate);
cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100);
- cpi->rolling_target_bits = cpi->av_per_frame_bandwidth;
- cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth;
-
- cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth;
- cpi->long_rolling_actual_bits = cpi->av_per_frame_bandwidth;
cpi->max_gf_interval = (int)(cpi->output_frame_rate / 2) + 2;
//cpi->max_gf_interval = (int)(cpi->output_frame_rate * 2 / 3) + 1;
@@ -1308,14 +1446,26 @@ void vp8_new_frame_rate(VP8_COMP *cpi, double framerate)
cpi->max_gf_interval = 12;
- // Special conditions when altr ref frame enabled
- if (cpi->oxcf.play_alternate)
+ // Special conditions when altr ref frame enabled in lagged compress mode
+ if (cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames)
{
if (cpi->max_gf_interval > cpi->oxcf.lag_in_frames - 1)
cpi->max_gf_interval = cpi->oxcf.lag_in_frames - 1;
}
}
+
+static int
+rescale(int val, int num, int denom)
+{
+ int64_t llnum = num;
+ int64_t llden = denom;
+ int64_t llval = val;
+
+ return llval * llnum / llden;
+}
+
+
void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
{
VP8_COMP *cpi = (VP8_COMP *)(ptr);
@@ -1343,9 +1493,9 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
cpi->oxcf.worst_allowed_q = MAXQ;
cpi->oxcf.end_usage = USAGE_STREAM_FROM_SERVER;
- cpi->oxcf.starting_buffer_level = 4;
- cpi->oxcf.optimal_buffer_level = 5;
- cpi->oxcf.maximum_buffer_size = 6;
+ cpi->oxcf.starting_buffer_level = 4000;
+ cpi->oxcf.optimal_buffer_level = 5000;
+ cpi->oxcf.maximum_buffer_size = 6000;
cpi->oxcf.under_shoot_pct = 90;
cpi->oxcf.allow_df = 0;
cpi->oxcf.drop_frames_water_mark = 20;
@@ -1494,26 +1644,32 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
// local file playback mode == really big buffer
if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK)
{
- cpi->oxcf.starting_buffer_level = 60;
- cpi->oxcf.optimal_buffer_level = 60;
- cpi->oxcf.maximum_buffer_size = 240;
+ cpi->oxcf.starting_buffer_level = 60000;
+ cpi->oxcf.optimal_buffer_level = 60000;
+ cpi->oxcf.maximum_buffer_size = 240000;
}
// Convert target bandwidth from Kbit/s to Bit/s
cpi->oxcf.target_bandwidth *= 1000;
- cpi->oxcf.starting_buffer_level *= cpi->oxcf.target_bandwidth;
+ cpi->oxcf.starting_buffer_level =
+ rescale(cpi->oxcf.starting_buffer_level,
+ cpi->oxcf.target_bandwidth, 1000);
if (cpi->oxcf.optimal_buffer_level == 0)
cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
else
- cpi->oxcf.optimal_buffer_level *= cpi->oxcf.target_bandwidth;
+ cpi->oxcf.optimal_buffer_level =
+ rescale(cpi->oxcf.optimal_buffer_level,
+ cpi->oxcf.target_bandwidth, 1000);
if (cpi->oxcf.maximum_buffer_size == 0)
cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8;
else
- cpi->oxcf.maximum_buffer_size *= cpi->oxcf.target_bandwidth;
+ cpi->oxcf.maximum_buffer_size =
+ rescale(cpi->oxcf.maximum_buffer_size,
+ cpi->oxcf.target_bandwidth, 1000);
cpi->buffer_level = cpi->oxcf.starting_buffer_level;
cpi->bits_off_target = cpi->oxcf.starting_buffer_level;
@@ -1526,6 +1682,10 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
cpi->active_best_quality = cpi->oxcf.best_allowed_q;
cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE;
+ cpi->rolling_target_bits = cpi->av_per_frame_bandwidth;
+ cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth;
+ cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth;
+ cpi->long_rolling_actual_bits = cpi->av_per_frame_bandwidth;
cpi->total_actual_bits = 0;
cpi->total_target_vs_actual = 0;
@@ -1569,9 +1729,9 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs;
}
- if (((cm->Width + 15) & 0xfffffff0) != cm->last_frame.y_width ||
- ((cm->Height + 15) & 0xfffffff0) != cm->last_frame.y_height ||
- cm->last_frame.y_width == 0)
+ if (((cm->Width + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_width ||
+ ((cm->Height + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_height ||
+ cm->yv12_fb[cm->lst_fb_idx].y_width == 0)
{
alloc_raw_frame_buffers(cpi);
vp8_alloc_compressor_data(cpi);
@@ -1598,16 +1758,10 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS)
cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS;
- // force play_alternate to 0 if allow_lag is 0, lag_in_frames is too small, Mode is real time or one pass compress enabled.
- if (cpi->oxcf.allow_lag == 0 || cpi->oxcf.lag_in_frames <= 5 || (cpi->oxcf.Mode < MODE_SECONDPASS))
- {
- cpi->oxcf.play_alternate = 0;
- cpi->ref_frame_flags = cpi->ref_frame_flags & ~VP8_ALT_FLAG;
- }
-
// YX Temp
cpi->last_alt_ref_sei = -1;
cpi->is_src_frame_alt_ref = 0;
+ cpi->is_next_src_alt_ref = 0;
#if 0
// Experimental RD Code
@@ -1616,13 +1770,16 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
#endif
#if VP8_TEMPORAL_ALT_REF
+
+ cpi->use_weighted_temporal_filter = 0;
+
{
int i;
cpi->fixed_divide[0] = 0;
- for (i = 1; i < 255; i++)
- cpi->fixed_divide[i] = 0x10000 / i;
+ for (i = 1; i < 512; i++)
+ cpi->fixed_divide[i] = 0x80000 / i;
}
#endif
}
@@ -1773,26 +1930,32 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
// local file playback mode == really big buffer
if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK)
{
- cpi->oxcf.starting_buffer_level = 60;
- cpi->oxcf.optimal_buffer_level = 60;
- cpi->oxcf.maximum_buffer_size = 240;
+ cpi->oxcf.starting_buffer_level = 60000;
+ cpi->oxcf.optimal_buffer_level = 60000;
+ cpi->oxcf.maximum_buffer_size = 240000;
}
// Convert target bandwidth from Kbit/s to Bit/s
cpi->oxcf.target_bandwidth *= 1000;
- cpi->oxcf.starting_buffer_level *= cpi->oxcf.target_bandwidth;
+ cpi->oxcf.starting_buffer_level =
+ rescale(cpi->oxcf.starting_buffer_level,
+ cpi->oxcf.target_bandwidth, 1000);
if (cpi->oxcf.optimal_buffer_level == 0)
cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
else
- cpi->oxcf.optimal_buffer_level *= cpi->oxcf.target_bandwidth;
+ cpi->oxcf.optimal_buffer_level =
+ rescale(cpi->oxcf.optimal_buffer_level,
+ cpi->oxcf.target_bandwidth, 1000);
if (cpi->oxcf.maximum_buffer_size == 0)
cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8;
else
- cpi->oxcf.maximum_buffer_size *= cpi->oxcf.target_bandwidth;
+ cpi->oxcf.maximum_buffer_size =
+ rescale(cpi->oxcf.maximum_buffer_size,
+ cpi->oxcf.target_bandwidth, 1000);
cpi->buffer_level = cpi->oxcf.starting_buffer_level;
cpi->bits_off_target = cpi->oxcf.starting_buffer_level;
@@ -1805,6 +1968,10 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
cpi->active_best_quality = cpi->oxcf.best_allowed_q;
cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE;
+ cpi->rolling_target_bits = cpi->av_per_frame_bandwidth;
+ cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth;
+ cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth;
+ cpi->long_rolling_actual_bits = cpi->av_per_frame_bandwidth;
cpi->total_actual_bits = 0;
cpi->total_target_vs_actual = 0;
@@ -1848,9 +2015,9 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs;
}
- if (((cm->Width + 15) & 0xfffffff0) != cm->last_frame.y_width ||
- ((cm->Height + 15) & 0xfffffff0) != cm->last_frame.y_height ||
- cm->last_frame.y_width == 0)
+ if (((cm->Width + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_width ||
+ ((cm->Height + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_height ||
+ cm->yv12_fb[cm->lst_fb_idx].y_width == 0)
{
alloc_raw_frame_buffers(cpi);
vp8_alloc_compressor_data(cpi);
@@ -1877,16 +2044,10 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS)
cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS;
- // force play_alternate to 0 if allow_lag is 0, lag_in_frames is too small, Mode is real time or one pass compress enabled.
- if (cpi->oxcf.allow_lag == 0 || cpi->oxcf.lag_in_frames <= 5 || (cpi->oxcf.Mode < MODE_SECONDPASS))
- {
- cpi->oxcf.play_alternate = 0;
- cpi->ref_frame_flags = cpi->ref_frame_flags & ~VP8_ALT_FLAG;
- }
-
// YX Temp
cpi->last_alt_ref_sei = -1;
cpi->is_src_frame_alt_ref = 0;
+ cpi->is_next_src_alt_ref = 0;
#if 0
// Experimental RD Code
@@ -1924,7 +2085,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
VP8_COMP *cpi;
VP8_PTR ptr;
} ctx;
-
+
VP8_COMP *cpi;
VP8_COMMON *cm;
@@ -1951,8 +2112,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
CHECK_MEM_ERROR(cpi->rdtok, vpx_calloc(256 * 3 / 2, sizeof(TOKENEXTRA)));
CHECK_MEM_ERROR(cpi->mb.ss, vpx_calloc(sizeof(search_site), (MAX_MVSEARCH_STEPS * 8) + 1));
- vp8_cmachine_specific_config(cpi);
vp8_create_common(&cpi->common);
+ vp8_cmachine_specific_config(cpi);
vp8_init_config((VP8_PTR)cpi, oxcf);
@@ -1993,7 +2154,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->active_map_enabled = 0;
// Create the first pass motion map structure and set to 0
- CHECK_MEM_ERROR(cpi->fp_motion_map, vpx_calloc(cpi->common.MBs, 1));
+ // Allocate space for maximum of 15 buffers
+ CHECK_MEM_ERROR(cpi->fp_motion_map, vpx_calloc(15*cpi->common.MBs, 1));
#if 0
// Experimental code for lagged and one pass
@@ -2035,19 +2197,11 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
// Test function for segmentation
//segmentation_test_function((VP8_PTR) cpi);
- // Loop filter mode / ref deltas test function
- //mode_ref_lf_test_function(cpi);
-
#ifdef ENTROPY_STATS
init_context_counters();
#endif
-#ifdef INTRARDOPT
- cpi->intra_rd_opt = 1;
-
-#endif
-
cpi->frames_since_key = 8; // Give a sensible default for the first frame.
cpi->key_frame_frequency = cpi->oxcf.key_freq;
@@ -2148,10 +2302,12 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
}
else if (cpi->pass == 2)
{
+ size_t packet_sz = vp8_firstpass_stats_sz(cpi->common.MBs);
+ int packets = oxcf->two_pass_stats_in.sz / packet_sz;
+
cpi->stats_in = oxcf->two_pass_stats_in.buf;
- cpi->stats_in_end = cpi->stats_in
- + oxcf->two_pass_stats_in.sz / sizeof(FIRSTPASS_STATS)
- - 1;
+ cpi->stats_in_end = (void*)((char *)cpi->stats_in
+ + (packets - 1) * packet_sz);
vp8_init_second_pass(cpi);
}
@@ -2178,11 +2334,55 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
vp8cx_create_encoder_threads(cpi);
- cpi->fn_ptr.sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16);
- cpi->fn_ptr.vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16);
- cpi->fn_ptr.svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x16);
- cpi->fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3);
- cpi->fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d);
+ cpi->fn_ptr[BLOCK_16X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16);
+ cpi->fn_ptr[BLOCK_16X16].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16);
+ cpi->fn_ptr[BLOCK_16X16].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x16);
+ cpi->fn_ptr[BLOCK_16X16].svf_halfpix_h = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_h);
+ cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_v);
+ cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_hv);
+ cpi->fn_ptr[BLOCK_16X16].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3);
+ cpi->fn_ptr[BLOCK_16X16].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x8);
+ cpi->fn_ptr[BLOCK_16X16].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d);
+
+ cpi->fn_ptr[BLOCK_16X8].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8);
+ cpi->fn_ptr[BLOCK_16X8].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x8);
+ cpi->fn_ptr[BLOCK_16X8].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x8);
+ cpi->fn_ptr[BLOCK_16X8].svf_halfpix_h = NULL;
+ cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v = NULL;
+ cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL;
+ cpi->fn_ptr[BLOCK_16X8].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3);
+ cpi->fn_ptr[BLOCK_16X8].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x8);
+ cpi->fn_ptr[BLOCK_16X8].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d);
+
+ cpi->fn_ptr[BLOCK_8X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16);
+ cpi->fn_ptr[BLOCK_8X16].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x16);
+ cpi->fn_ptr[BLOCK_8X16].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x16);
+ cpi->fn_ptr[BLOCK_8X16].svf_halfpix_h = NULL;
+ cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v = NULL;
+ cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL;
+ cpi->fn_ptr[BLOCK_8X16].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3);
+ cpi->fn_ptr[BLOCK_8X16].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x8);
+ cpi->fn_ptr[BLOCK_8X16].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d);
+
+ cpi->fn_ptr[BLOCK_8X8].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8);
+ cpi->fn_ptr[BLOCK_8X8].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x8);
+ cpi->fn_ptr[BLOCK_8X8].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x8);
+ cpi->fn_ptr[BLOCK_8X8].svf_halfpix_h = NULL;
+ cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v = NULL;
+ cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL;
+ cpi->fn_ptr[BLOCK_8X8].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3);
+ cpi->fn_ptr[BLOCK_8X8].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x8);
+ cpi->fn_ptr[BLOCK_8X8].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d);
+
+ cpi->fn_ptr[BLOCK_4X4].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4);
+ cpi->fn_ptr[BLOCK_4X4].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var4x4);
+ cpi->fn_ptr[BLOCK_4X4].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar4x4);
+ cpi->fn_ptr[BLOCK_4X4].svf_halfpix_h = NULL;
+ cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v = NULL;
+ cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL;
+ cpi->fn_ptr[BLOCK_4X4].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3);
+ cpi->fn_ptr[BLOCK_4X4].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x8);
+ cpi->fn_ptr[BLOCK_4X4].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d);
#if !(CONFIG_REALTIME_ONLY)
cpi->full_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, full_search);
@@ -2246,7 +2446,8 @@ void vp8_remove_compressor(VP8_PTR *ptr)
if (cpi->b_calculate_psnr)
{
- double samples = 3.0 / 2 * cpi->count * cpi->common.last_frame.y_width * cpi->common.last_frame.y_height;
+ YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
+ double samples = 3.0 / 2 * cpi->count * lst_yv12->y_width * lst_yv12->y_height;
double total_psnr = vp8_mse2psnr(samples, 255.0, cpi->total_sq_error);
double total_psnr2 = vp8_mse2psnr(samples, 255.0, cpi->total_sq_error2);
double total_ssim = 100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0);
@@ -2375,6 +2576,7 @@ void vp8_remove_compressor(VP8_PTR *ptr)
}
fprintf(fmode, "};\n");
+ fclose(fmode);
}
#endif
@@ -2585,19 +2787,19 @@ int vp8_get_reference(VP8_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONF
{
VP8_COMP *cpi = (VP8_COMP *)(ptr);
VP8_COMMON *cm = &cpi->common;
+ int ref_fb_idx;
if (ref_frame_flag == VP8_LAST_FLAG)
- vp8_yv12_copy_frame_ptr(&cm->last_frame, sd);
-
+ ref_fb_idx = cm->lst_fb_idx;
else if (ref_frame_flag == VP8_GOLD_FLAG)
- vp8_yv12_copy_frame_ptr(&cm->golden_frame, sd);
-
+ ref_fb_idx = cm->gld_fb_idx;
else if (ref_frame_flag == VP8_ALT_FLAG)
- vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, sd);
-
+ ref_fb_idx = cm->alt_fb_idx;
else
return -1;
+ vp8_yv12_copy_frame_ptr(&cm->yv12_fb[ref_fb_idx], sd);
+
return 0;
}
int vp8_set_reference(VP8_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
@@ -2605,18 +2807,19 @@ int vp8_set_reference(VP8_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONF
VP8_COMP *cpi = (VP8_COMP *)(ptr);
VP8_COMMON *cm = &cpi->common;
- if (ref_frame_flag == VP8_LAST_FLAG)
- vp8_yv12_copy_frame_ptr(sd, &cm->last_frame);
+ int ref_fb_idx;
+ if (ref_frame_flag == VP8_LAST_FLAG)
+ ref_fb_idx = cm->lst_fb_idx;
else if (ref_frame_flag == VP8_GOLD_FLAG)
- vp8_yv12_copy_frame_ptr(sd, &cm->golden_frame);
-
+ ref_fb_idx = cm->gld_fb_idx;
else if (ref_frame_flag == VP8_ALT_FLAG)
- vp8_yv12_copy_frame_ptr(sd, &cm->alt_ref_frame);
-
+ ref_fb_idx = cm->alt_fb_idx;
else
return -1;
+ vp8_yv12_copy_frame_ptr(sd, &cm->yv12_fb[ref_fb_idx]);
+
return 0;
}
int vp8_update_entropy(VP8_PTR comp, int update)
@@ -2628,6 +2831,8 @@ int vp8_update_entropy(VP8_PTR comp, int update)
return 0;
}
+
+#if OUTPUT_YUV_SRC
void vp8_write_yuv_frame(const char *name, YV12_BUFFER_CONFIG *s)
{
FILE *yuv_file = fopen(name, "ab");
@@ -2663,6 +2868,8 @@ void vp8_write_yuv_frame(const char *name, YV12_BUFFER_CONFIG *s)
fclose(yuv_file);
}
+#endif
+
static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
{
@@ -2691,14 +2898,25 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
#endif
}
// we may need to copy to a buffer so we can extend the image...
- else if (cm->Width != cm->last_frame.y_width ||
- cm->Height != cm->last_frame.y_height)
+ else if (cm->Width != cm->yv12_fb[cm->lst_fb_idx].y_width ||
+ cm->Height != cm->yv12_fb[cm->lst_fb_idx].y_height)
{
//vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
#if HAVE_ARMV7
- vp8_yv12_copy_src_frame_func_neon(sd, &cpi->scaled_source);
-#else
- vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_yv12_copy_src_frame_func_neon(sd, &cpi->scaled_source);
+ }
+#if CONFIG_RUNTIME_CPU_DETECT
+ else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+ {
+ vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
+ }
#endif
cpi->Source = &cpi->scaled_source;
@@ -2782,23 +3000,17 @@ static int pick_frame_size(VP8_COMP *cpi)
cm->frame_type = KEY_FRAME;
}
- // Auto key frames (Only two pass will enter here)
- else if (cm->frame_type == KEY_FRAME)
+ // Special case for forced key frames
+ // The frame sizing here is still far from ideal for 2 pass.
+ else if (cm->frame_flags & FRAMEFLAGS_KEY)
{
- vp8_calc_auto_iframe_target_size(cpi);
- }
- // Forced key frames (by interval or an external signal)
- else if ((cm->frame_flags & FRAMEFLAGS_KEY) ||
- (cpi->oxcf.auto_key && (cpi->frames_since_key % cpi->key_frame_frequency == 0)))
- {
- // Key frame from VFW/auto-keyframe/first frame
cm->frame_type = KEY_FRAME;
-
resize_key_frame(cpi);
-
- // Compute target frame size
- if (cpi->pass != 2)
- vp8_calc_iframe_target_size(cpi);
+ vp8_calc_iframe_target_size(cpi);
+ }
+ else if (cm->frame_type == KEY_FRAME)
+ {
+ vp8_calc_auto_iframe_target_size(cpi);
}
else
{
@@ -2845,7 +3057,7 @@ static void update_alt_ref_frame_and_stats(VP8_COMP *cpi)
VP8_COMMON *cm = &cpi->common;
// Update the golden frame buffer
- vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame);
+ vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->alt_fb_idx]);
// Select an interval before next GF or altref
if (!cpi->auto_gold)
@@ -2865,8 +3077,8 @@ static void update_alt_ref_frame_and_stats(VP8_COMP *cpi)
}
// Update data structure that monitors level of reference to last GF
- vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
- cm->gf_active_count = cm->mb_rows * cm->mb_cols;
+ vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
+ cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
// this frame refreshes means next frames don't unless specified by user
cpi->common.frames_since_golden = 0;
@@ -2887,7 +3099,7 @@ static void update_golden_frame_and_stats(VP8_COMP *cpi)
if (cm->refresh_golden_frame)
{
// Update the golden frame buffer
- vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame);
+ vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]);
// Select an interval before next GF
if (!cpi->auto_gold)
@@ -2913,8 +3125,8 @@ static void update_golden_frame_and_stats(VP8_COMP *cpi)
}
// Update data structure that monitors level of reference to last GF
- vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
- cm->gf_active_count = cm->mb_rows * cm->mb_cols;
+ vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
+ cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
// this frame refreshes means next frames don't unless specified by user
cm->refresh_golden_frame = 0;
@@ -3220,291 +3432,14 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame)
#endif
// return of 0 means drop frame
-#if VP8_TEMPORAL_ALT_REF
-static void vp8cx_temp_blur1_c
+static void encode_frame_to_data_rate
(
- unsigned char **frames,
- int frame_count,
- unsigned char *src,
- unsigned char *dst,
- int width,
- int stride,
- int height,
- int strength,
- int *fixed_divide,
- unsigned char *motion_map_ptr,
- unsigned char block_size
+ VP8_COMP *cpi,
+ unsigned long *size,
+ unsigned char *dest,
+ unsigned int *frame_flags
)
{
- int byte = 0; // Buffer offset for the current pixel value being filtered
- int frame = 0;
- int modifier = 0;
- int i, j, k;
- int block_ofset;
- int Cols, Rows;
- unsigned char Shift = (block_size == 16) ? 4 : 3;
-
- Cols = width / block_size;
- Rows = height / block_size;
-
- for (i = 0; i < height; i++)
- {
- block_ofset = (i >> Shift) * Cols;
-
- for (j = 0; j < Cols; j ++)
- {
- if (motion_map_ptr[block_ofset] > 2)
- {
- vpx_memcpy(&dst[byte], &src[byte], block_size);
- byte += block_size;
- }
- else
- {
- for (k = 0; k < block_size; k++)
- {
- int accumulator = 0;
- int count = 0;
- int src_byte = src[byte];
-
- for (frame = 0; frame < frame_count; frame++)
- {
- // get current frame pixel value
- int pixel_value = frames[frame][byte]; // int pixel_value = *frameptr;
-
- modifier = src_byte; // modifier = s[byte];
- modifier -= pixel_value;
- modifier *= modifier;
- modifier >>= strength;
- modifier *= 3;
-
- if (modifier > 16)
- modifier = 16;
-
- modifier = 16 - modifier;
-
- accumulator += modifier * pixel_value;
-
- count += modifier;
- }
-
- accumulator += (count >> 1);
- accumulator *= fixed_divide[count]; // accumulator *= ppi->fixed_divide[count];
- accumulator >>= 16;
-
- dst[byte] = accumulator; // d[byte] = accumulator;
-
- // move to next pixel
- byte++;
- }
- }
-
- block_ofset++;
- }
-
- // Step byte on over the UMV border to the start of the next line
- byte += stride - width;
- }
-}
-
-static void vp8cx_temp_filter_c
-(
- VP8_COMP *cpi
-)
-{
- YV12_BUFFER_CONFIG *temp_source_buffer;
- int *fixed_divide = cpi->fixed_divide;
-
- int frame = 0;
- int max_frames = 11;
-
- int num_frames_backward = 0;
- int num_frames_forward = 0;
- int frames_to_blur_backward = 0;
- int frames_to_blur_forward = 0;
- int frames_to_blur = 0;
- int start_frame = 0;
-
- int strength = cpi->oxcf.arnr_strength;
-
- int blur_type = cpi->oxcf.arnr_type;
-
- int new_max_frames = cpi->oxcf.arnr_max_frames;
-
- if (new_max_frames > 0)
- max_frames = new_max_frames;
-
- num_frames_backward = cpi->last_alt_ref_sei - cpi->source_encode_index;
-
- if (num_frames_backward < 0)
- num_frames_backward += cpi->oxcf.lag_in_frames;
-
- num_frames_forward = cpi->oxcf.lag_in_frames - (num_frames_backward + 1);
-
- switch (blur_type)
- {
- case 1:
- /////////////////////////////////////////
- // Backward Blur
-
- frames_to_blur_backward = num_frames_backward;
-
- if (frames_to_blur_backward >= max_frames)
- frames_to_blur_backward = max_frames - 1;
-
- frames_to_blur = frames_to_blur_backward + 1;
- break;
-
- case 2:
- /////////////////////////////////////////
- // Forward Blur
-
- frames_to_blur_forward = num_frames_forward;
-
- if (frames_to_blur_forward >= max_frames)
- frames_to_blur_forward = max_frames - 1;
-
- frames_to_blur = frames_to_blur_forward + 1;
- break;
-
- case 3:
- /////////////////////////////////////////
- // Center Blur
- frames_to_blur_forward = num_frames_forward;
- frames_to_blur_backward = num_frames_backward;
-
- if (frames_to_blur_forward > frames_to_blur_backward)
- frames_to_blur_forward = frames_to_blur_backward;
-
- if (frames_to_blur_backward > frames_to_blur_forward)
- frames_to_blur_backward = frames_to_blur_forward;
-
- if (frames_to_blur_forward > (max_frames / 2))
- frames_to_blur_forward = (max_frames / 2);
-
- if (frames_to_blur_backward > (max_frames / 2))
- frames_to_blur_backward = (max_frames / 2);
-
- frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1;
- break;
-
- default:
- /////////////////////////////////////////
- // At most 4 frames forward Blur
- frames_to_blur_forward = 4;
- frames_to_blur_backward = num_frames_backward;
-
- if (max_frames > 5)
- {
- if ((frames_to_blur_backward + frames_to_blur_forward) >= max_frames)
- {
- frames_to_blur_backward = max_frames - frames_to_blur_forward - 1;
- }
- }
- else
- {
- frames_to_blur_forward = max_frames - 1;
- frames_to_blur_backward = 0;
- }
-
- frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1;
- break;
- }
-
- start_frame = (cpi->last_alt_ref_sei + frames_to_blur_forward) % cpi->oxcf.lag_in_frames;
-
-#ifdef DEBUGFWG
- // DEBUG FWG
- printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d"
- , max_frames
- , num_frames_backward
- , num_frames_forward
- , frames_to_blur
- , frames_to_blur_backward
- , frames_to_blur_forward
- , cpi->source_encode_index
- , cpi->last_alt_ref_sei
- , start_frame);
-#endif
-
- for (frame = 0; frame < frames_to_blur; frame++)
- {
- int which_buffer = start_frame - frame;
-
- if (which_buffer < 0)
- which_buffer += cpi->oxcf.lag_in_frames;
-
- cpi->frames[frame] = cpi->src_buffer[which_buffer].source_buffer.y_buffer;
- }
-
- temp_source_buffer = &cpi->src_buffer[cpi->last_alt_ref_sei].source_buffer;
-
- // Blur Y
- vp8cx_temp_blur1_c(
- cpi->frames,
- frames_to_blur,
- temp_source_buffer->y_buffer, // cpi->Source->y_buffer,
- cpi->alt_ref_buffer.source_buffer.y_buffer, // cpi->Source->y_buffer,
- temp_source_buffer->y_width,
- temp_source_buffer->y_stride,
- temp_source_buffer->y_height,
- //temp_source_buffer->y_height * temp_source_buffer->y_stride,
- strength,
- fixed_divide,
- cpi->fp_motion_map, 16);
-
- for (frame = 0; frame < frames_to_blur; frame++)
- {
- int which_buffer = cpi->last_alt_ref_sei - frame;
-
- if (which_buffer < 0)
- which_buffer += cpi->oxcf.lag_in_frames;
-
- cpi->frames[frame] = cpi->src_buffer[which_buffer].source_buffer.u_buffer;
- }
-
- // Blur U
- vp8cx_temp_blur1_c(
- cpi->frames,
- frames_to_blur,
- temp_source_buffer->u_buffer,
- cpi->alt_ref_buffer.source_buffer.u_buffer, // cpi->Source->u_buffer,
- temp_source_buffer->uv_width,
- temp_source_buffer->uv_stride,
- temp_source_buffer->uv_height,
- //temp_source_buffer->uv_height * temp_source_buffer->uv_stride,
- strength,
- fixed_divide,
- cpi->fp_motion_map, 8);
-
- for (frame = 0; frame < frames_to_blur; frame++)
- {
- int which_buffer = cpi->last_alt_ref_sei - frame;
-
- if (which_buffer < 0)
- which_buffer += cpi->oxcf.lag_in_frames;
-
- cpi->frames[frame] = cpi->src_buffer[which_buffer].source_buffer.v_buffer;
- }
-
- // Blur V
- vp8cx_temp_blur1_c(
- cpi->frames,
- frames_to_blur,
- temp_source_buffer->v_buffer,
- cpi->alt_ref_buffer.source_buffer.v_buffer, // cpi->Source->v_buffer,
- temp_source_buffer->uv_width,
- temp_source_buffer->uv_stride,
- //temp_source_buffer->uv_height * temp_source_buffer->uv_stride,
- temp_source_buffer->uv_height,
- strength,
- fixed_divide,
- cpi->fp_motion_map, 8);
-}
-#endif
-
-
-static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, unsigned int *frame_flags)
-{
int Q;
int frame_over_shoot_limit;
int frame_under_shoot_limit;
@@ -3588,6 +3523,9 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
{
int i;
+ // Reset the loop filter deltas and segmentation map
+ setup_features(cpi);
+
// If segmentation is enabled force a map update for key frames
if (cpi->mb.e_mbd.segmentation_enabled)
{
@@ -3595,12 +3533,6 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
cpi->mb.e_mbd.update_mb_segmentation_data = 1;
}
- // If mode or reference frame based loop filter deltas are enabled then force an update for key frames.
- if (cpi->mb.e_mbd.mode_ref_lf_delta_enabled)
- {
- cpi->mb.e_mbd.mode_ref_lf_delta_update = 1;
- }
-
// The alternate reference frame cannot be active for a key frame
cpi->source_alt_ref_active = FALSE;
@@ -3753,87 +3685,49 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
// Set an active best quality and if necessary active worst quality
if (cpi->pass == 2 || (cm->current_video_frame > 150))
{
- //if ( (cm->frame_type == KEY_FRAME) || cm->refresh_golden_frame )
int Q;
int i;
int bpm_target;
+ //int tmp;
+
+ vp8_clear_system_state();
Q = cpi->active_worst_quality;
if ((cm->frame_type == KEY_FRAME) || cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame)
{
- vp8_clear_system_state();
-
if (cm->frame_type != KEY_FRAME)
{
- // Where a gf overlays an existing arf then allow active max Q to drift to highest allowed value.
- //if ( cpi->common.refresh_golden_frame && cpi->source_alt_ref_active )
- //cpi->active_worst_quality = cpi->worst_quality;
-
if (cpi->avg_frame_qindex < cpi->active_worst_quality)
Q = cpi->avg_frame_qindex;
- if (cpi->section_is_low_motion)
- bpm_target = (vp8_bits_per_mb[cm->frame_type][Q] * ((Q * 3 / 2) + 128)) / 64;
- else if (cpi->section_is_fast_motion)
- bpm_target = (vp8_bits_per_mb[cm->frame_type][Q] * (Q + 128)) / 64;
+ if ( cpi->gfu_boost > 1000 )
+ cpi->active_best_quality = gf_low_motion_minq[Q];
+ else if ( cpi->gfu_boost < 400 )
+ cpi->active_best_quality = gf_high_motion_minq[Q];
else
- bpm_target = (vp8_bits_per_mb[cm->frame_type][Q] * ((Q * 5 / 4) + 128)) / 64;
- }
- // KEY FRAMES
- else
- {
- if (cpi->section_is_low_motion)
- bpm_target = (vp8_bits_per_mb[cm->frame_type][Q] * (Q + 240)) / 64; // Approx 2.5 to 4.5 where Q has the range 0-127
- else
- bpm_target = (vp8_bits_per_mb[cm->frame_type][Q] * (Q + 160)) / 64;
- }
-
- for (i = Q; i > 0; i--)
- {
- if (bpm_target <= vp8_bits_per_mb[cm->frame_type][i])
- break;
- }
-
- cpi->active_best_quality = i;
-
- // this entire section could be replaced by a look up table
-#if 0
- {
- int Q, best_q[128];
-
- for (Q = 0; Q < 128; Q++)
- {
- bpm_target = (vp8_bits_per_mb[cm->frame_type][Q] * (Q + 160)) / 64; // Approx 2.5 to 4.5 where Q has the range 0-127
-
- for (i = Q; i > 0; i--)
- {
- if (bpm_target <= vp8_bits_per_mb[cm->frame_type][i])
- break;
- }
-
- best_q[Q] = i;
- }
-
- Q += 0;
- }
-#endif
-
+ cpi->active_best_quality = gf_mid_motion_minq[Q];
+
+ /*cpi->active_best_quality = gf_arf_minq[Q];
+ tmp = (cpi->gfu_boost > 1000) ? 600 : cpi->gfu_boost - 400;
+ //tmp = (cpi->gfu_boost > 1000) ? 600 :
+ //(cpi->gfu_boost < 400) ? 0 : cpi->gfu_boost - 400;
+ tmp = 128 - (tmp >> 4);
+ cpi->active_best_quality = (cpi->active_best_quality * tmp)>>7;*/
+
+ }
+ // KEY FRAMES
+ else
+ {
+ if (cpi->gfu_boost > 600)
+ cpi->active_best_quality = kf_low_motion_minq[Q];
+ else
+ cpi->active_best_quality = kf_high_motion_minq[Q];
+ }
}
else
{
- vp8_clear_system_state();
-
- //bpm_target = (vp8_bits_per_mb[cm->frame_type][Q]*(Q+128))/64; // Approx 2 to 4 where Q has the range 0-127
- bpm_target = (vp8_bits_per_mb[cm->frame_type][Q] * (Q + 192)) / 128; // Approx * 1.5 to 2.5 where Q has range 0-127
-
- for (i = Q; i > 0; i--)
- {
- if (bpm_target <= vp8_bits_per_mb[cm->frame_type][i])
- break;
- }
-
- cpi->active_best_quality = i;
+ cpi->active_best_quality = inter_minq[Q];
}
// If CBR and the buffer is as full then it is reasonable to allow higher quality on the frames
@@ -4060,6 +3954,9 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
// Clear the Alt reference frame active flag when we have a key frame
cpi->source_alt_ref_active = FALSE;
+ // Reset the loop filter deltas and segmentation map
+ setup_features(cpi);
+
// If segmentation is enabled force a map update for key frames
if (cpi->mb.e_mbd.segmentation_enabled)
{
@@ -4067,12 +3964,6 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
cpi->mb.e_mbd.update_mb_segmentation_data = 1;
}
- // If mode or reference frame based loop filter deltas are enabled then force an update for key frames.
- if (cpi->mb.e_mbd.mode_ref_lf_delta_enabled)
- {
- cpi->mb.e_mbd.mode_ref_lf_delta_update = 1;
- }
-
vp8_restore_coding_context(cpi);
Q = vp8_regulate_q(cpi, cpi->this_frame_target);
@@ -4276,17 +4167,18 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
// Update the GF useage maps.
// This is done after completing the compression of a frame when all modes etc. are finalized but before loop filter
- vp8_update_gf_useage_maps(cm, &cpi->mb.e_mbd);
+ vp8_update_gf_useage_maps(cpi, cm, &cpi->mb);
if (cm->frame_type == KEY_FRAME)
cm->refresh_last_frame = 1;
- if (0)
+#if 0
{
FILE *f = fopen("gfactive.stt", "a");
- fprintf(f, "%8d %8d %8d %8d %8d\n", cm->current_video_frame, (100 * cpi->common.gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols), cpi->this_iiratio, cpi->next_iiratio, cm->refresh_golden_frame);
+ fprintf(f, "%8d %8d %8d %8d %8d\n", cm->current_video_frame, (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols), cpi->this_iiratio, cpi->next_iiratio, cm->refresh_golden_frame);
fclose(f);
}
+#endif
// For inter frames the current default behaviour is that when cm->refresh_golden_frame is set we copy the old GF over to the ARF buffer
// This is purely an encoder descision at present.
@@ -4297,11 +4189,11 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
if (cm->refresh_last_frame)
{
- vp8_swap_yv12_buffer(&cm->last_frame, &cm->new_frame);
- cm->frame_to_show = &cm->last_frame;
+ vp8_swap_yv12_buffer(&cm->yv12_fb[cm->lst_fb_idx], &cm->yv12_fb[cm->new_fb_idx]);
+ cm->frame_to_show = &cm->yv12_fb[cm->lst_fb_idx];
}
else
- cm->frame_to_show = &cm->new_frame;
+ cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx];
@@ -4351,43 +4243,48 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
}
}
-
- // At this point the new frame has been encoded coded.
- // If any buffer copy / swaping is signalled it should be done here.
- if (cm->frame_type == KEY_FRAME)
{
- vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame);
- vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame);
- }
- else // For non key frames
- {
- // Code to copy between reference buffers
- if (cm->copy_buffer_to_arf)
+ YV12_BUFFER_CONFIG *lst_yv12 = &cm->yv12_fb[cm->lst_fb_idx];
+ YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx];
+ YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx];
+ YV12_BUFFER_CONFIG *alt_yv12 = &cm->yv12_fb[cm->alt_fb_idx];
+ // At this point the new frame has been encoded coded.
+ // If any buffer copy / swaping is signalled it should be done here.
+ if (cm->frame_type == KEY_FRAME)
+ {
+ vp8_yv12_copy_frame_ptr(cm->frame_to_show, gld_yv12);
+ vp8_yv12_copy_frame_ptr(cm->frame_to_show, alt_yv12);
+ }
+ else // For non key frames
{
- if (cm->copy_buffer_to_arf == 1)
+ // Code to copy between reference buffers
+ if (cm->copy_buffer_to_arf)
{
- if (cm->refresh_last_frame)
- // We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
- vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->alt_ref_frame);
- else
- vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->alt_ref_frame);
+ if (cm->copy_buffer_to_arf == 1)
+ {
+ if (cm->refresh_last_frame)
+ // We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
+ vp8_yv12_copy_frame_ptr(new_yv12, alt_yv12);
+ else
+ vp8_yv12_copy_frame_ptr(lst_yv12, alt_yv12);
+ }
+ else if (cm->copy_buffer_to_arf == 2)
+ vp8_yv12_copy_frame_ptr(gld_yv12, alt_yv12);
}
- else if (cm->copy_buffer_to_arf == 2)
- vp8_yv12_copy_frame_ptr(&cm->golden_frame, &cm->alt_ref_frame);
- }
- if (cm->copy_buffer_to_gf)
- {
- if (cm->copy_buffer_to_gf == 1)
+ if (cm->copy_buffer_to_gf)
{
- if (cm->refresh_last_frame)
- // We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
- vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->golden_frame);
- else
- vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame);
+ if (cm->copy_buffer_to_gf == 1)
+ {
+ if (cm->refresh_last_frame)
+ // We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
+ vp8_yv12_copy_frame_ptr(new_yv12, gld_yv12);
+ else
+ vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12);
+ }
+ else if (cm->copy_buffer_to_gf == 2)
+ vp8_yv12_copy_frame_ptr(alt_yv12, gld_yv12);
}
- else if (cm->copy_buffer_to_gf == 2)
- vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, &cm->golden_frame);
}
}
@@ -4525,18 +4422,46 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
}
}
-#if CONFIG_PSNR
-
- if (0)
+#if 0 && CONFIG_PSNR
{
FILE *f = fopen("tmp.stt", "a");
vp8_clear_system_state(); //__asm emms;
if (cpi->total_coded_error_left != 0.0)
- fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6ld %6ld %6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f %10.3f %8ld\n", cpi->common.current_video_frame, cpi->this_frame_target, cpi->projected_frame_size, (cpi->projected_frame_size - cpi->this_frame_target), (int)cpi->total_target_vs_actual, (cpi->oxcf.starting_buffer_level - cpi->bits_off_target), (int)cpi->total_actual_bits, cm->base_qindex, cpi->active_best_quality, cpi->active_worst_quality, cpi->avg_frame_qindex, cpi->zbin_over_quant, cm->refresh_golden_frame, cm->refresh_alt_ref_frame, cm->frame_type, cpi->gfu_boost, cpi->est_max_qcorrection_factor, (int)cpi->bits_left, cpi->total_coded_error_left, (double)cpi->bits_left / cpi->total_coded_error_left, cpi->tot_recode_hits);
+ fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6ld %6ld"
+ "%6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f"
+ "%10.3f %8ld\n",
+ cpi->common.current_video_frame, cpi->this_frame_target,
+ cpi->projected_frame_size,
+ (cpi->projected_frame_size - cpi->this_frame_target),
+ (int)cpi->total_target_vs_actual,
+ (cpi->oxcf.starting_buffer_level-cpi->bits_off_target),
+ (int)cpi->total_actual_bits, cm->base_qindex,
+ cpi->active_best_quality, cpi->active_worst_quality,
+ cpi->avg_frame_qindex, cpi->zbin_over_quant,
+ cm->refresh_golden_frame, cm->refresh_alt_ref_frame,
+ cm->frame_type, cpi->gfu_boost,
+ cpi->est_max_qcorrection_factor, (int)cpi->bits_left,
+ cpi->total_coded_error_left,
+ (double)cpi->bits_left / cpi->total_coded_error_left,
+ cpi->tot_recode_hits);
else
- fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6ld %6ld %6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f %8ld\n", cpi->common.current_video_frame, cpi->this_frame_target, cpi->projected_frame_size, (cpi->projected_frame_size - cpi->this_frame_target), (int)cpi->total_target_vs_actual, (cpi->oxcf.starting_buffer_level - cpi->bits_off_target), (int)cpi->total_actual_bits, cm->base_qindex, cpi->active_best_quality, cpi->active_worst_quality, cpi->avg_frame_qindex, cpi->zbin_over_quant, cm->refresh_golden_frame, cm->refresh_alt_ref_frame, cm->frame_type, cpi->gfu_boost, cpi->est_max_qcorrection_factor, (int)cpi->bits_left, cpi->total_coded_error_left, cpi->tot_recode_hits);
+ fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6ld %6ld"
+ "%6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f"
+ "%8ld\n",
+ cpi->common.current_video_frame,
+ cpi->this_frame_target, cpi->projected_frame_size,
+ (cpi->projected_frame_size - cpi->this_frame_target),
+ (int)cpi->total_target_vs_actual,
+ (cpi->oxcf.starting_buffer_level-cpi->bits_off_target),
+ (int)cpi->total_actual_bits, cm->base_qindex,
+ cpi->active_best_quality, cpi->active_worst_quality,
+ cpi->avg_frame_qindex, cpi->zbin_over_quant,
+ cm->refresh_golden_frame, cm->refresh_alt_ref_frame,
+ cm->frame_type, cpi->gfu_boost,
+ cpi->est_max_qcorrection_factor, (int)cpi->bits_left,
+ cpi->total_coded_error_left, cpi->tot_recode_hits);
fclose(f);
@@ -4544,7 +4469,10 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
FILE *fmodes = fopen("Modes.stt", "a");
int i;
- fprintf(fmodes, "%6d:%1d:%1d:%1d ", cpi->common.current_video_frame, cm->frame_type, cm->refresh_golden_frame, cm->refresh_alt_ref_frame);
+ fprintf(fmodes, "%6d:%1d:%1d:%1d ",
+ cpi->common.current_video_frame,
+ cm->frame_type, cm->refresh_golden_frame,
+ cm->refresh_alt_ref_frame);
for (i = 0; i < MAX_MODES; i++)
fprintf(fmodes, "%5d ", cpi->mode_chosen_counts[i]);
@@ -4590,23 +4518,23 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG;
if (cpi->gold_is_last)
- cpi->ref_frame_flags &= !VP8_GOLD_FLAG;
+ cpi->ref_frame_flags &= ~VP8_GOLD_FLAG;
if (cpi->alt_is_last)
- cpi->ref_frame_flags &= !VP8_ALT_FLAG;
+ cpi->ref_frame_flags &= ~VP8_ALT_FLAG;
if (cpi->gold_is_alt)
- cpi->ref_frame_flags &= !VP8_ALT_FLAG;
+ cpi->ref_frame_flags &= ~VP8_ALT_FLAG;
if (cpi->oxcf.error_resilient_mode)
{
// Is this an alternate reference update
if (cpi->common.refresh_alt_ref_frame)
- vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame);
+ vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->alt_fb_idx]);
if (cpi->common.refresh_golden_frame)
- vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame);
+ vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]);
}
else
{
@@ -4652,15 +4580,17 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
- if (0)
+#if 0
{
char filename[512];
FILE *recon_file;
sprintf(filename, "enc%04d.yuv", (int) cm->current_video_frame);
recon_file = fopen(filename, "wb");
- fwrite(cm->last_frame.buffer_alloc, cm->last_frame.frame_size, 1, recon_file);
+ fwrite(cm->yv12_fb[cm->lst_fb_idx].buffer_alloc,
+ cm->yv12_fb[cm->lst_fb_idx].frame_size, 1, recon_file);
fclose(recon_file);
}
+#endif
// DEBUG
//vp8_write_yuv_frame("encoder_recon.yuv", cm->frame_to_show);
@@ -4682,7 +4612,7 @@ int vp8_is_gf_update_needed(VP8_PTR ptr)
void vp8_check_gf_quality(VP8_COMP *cpi)
{
VP8_COMMON *cm = &cpi->common;
- int gf_active_pct = (100 * cm->gf_active_count) / (cm->mb_rows * cm->mb_cols);
+ int gf_active_pct = (100 * cpi->gf_active_count) / (cm->mb_rows * cm->mb_cols);
int gf_ref_usage_pct = (cpi->count_mb_ref_frame_usage[GOLDEN_FRAME] * 100) / (cm->mb_rows * cm->mb_cols);
int last_ref_zz_useage = (cpi->inter_zz_count * 100) / (cm->mb_rows * cm->mb_cols);
@@ -4720,8 +4650,6 @@ void vp8_check_gf_quality(VP8_COMP *cpi)
}
#if 0
-
- if (0)
{
FILE *f = fopen("gfneeded.stt", "a");
fprintf(f, "%10d %10d %10d %10d %10ld \n",
@@ -4758,10 +4686,10 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
#if HAVE_ARMV7
extern void vp8_push_neon(INT64 *store);
extern void vp8_pop_neon(INT64 *store);
-static INT64 store_reg[8];
#endif
int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time)
{
+ INT64 store_reg[8];
VP8_COMP *cpi = (VP8_COMP *) ptr;
VP8_COMMON *cm = &cpi->common;
struct vpx_usec_timer timer;
@@ -4770,7 +4698,12 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
return -1;
#if HAVE_ARMV7
- vp8_push_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_push_neon(store_reg);
+ }
#endif
vpx_usec_timer_start(&timer);
@@ -4779,7 +4712,12 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
if (cpi->source_buffer_count != 0 && cpi->source_buffer_count >= cpi->oxcf.lag_in_frames)
{
#if HAVE_ARMV7
- vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(store_reg);
+ }
#endif
return -1;
}
@@ -4820,9 +4758,20 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
s->source_time_stamp = time_stamp;
s->source_frame_flags = frame_flags;
#if HAVE_ARMV7
- vp8_yv12_copy_src_frame_func_neon(sd, &s->source_buffer);
-#else
- vp8_yv12_copy_frame_ptr(sd, &s->source_buffer);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_yv12_copy_src_frame_func_neon(sd, &s->source_buffer);
+ }
+#if CONFIG_RUNTIME_CPU_DETECT
+ else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+ {
+ vp8_yv12_copy_frame_ptr(sd, &s->source_buffer);
+ }
#endif
cpi->source_buffer_count = 1;
}
@@ -4831,14 +4780,19 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
#if HAVE_ARMV7
- vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(store_reg);
+ }
#endif
return 0;
}
int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush)
{
-
+ INT64 store_reg[8];
VP8_COMP *cpi = (VP8_COMP *) ptr;
VP8_COMMON *cm = &cpi->common;
struct vpx_usec_timer tsctimer;
@@ -4849,7 +4803,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
return -1;
#if HAVE_ARMV7
- vp8_push_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_push_neon(store_reg);
+ }
#endif
vpx_usec_timer_start(&cmptimer);
@@ -4950,6 +4909,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
cm->show_frame = 0;
cpi->source_alt_ref_pending = FALSE; // Clear Pending altf Ref flag.
cpi->is_src_frame_alt_ref = 0;
+ cpi->is_next_src_alt_ref = 0;
}
else
#endif
@@ -4961,26 +4921,18 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
{
if (cpi->source_encode_index == cpi->last_alt_ref_sei)
{
-#if VP8_TEMPORAL_ALT_REF
-
- if (cpi->oxcf.arnr_max_frames == 0)
- {
- cpi->is_src_frame_alt_ref = 1; // copy alt ref
- }
- else
- {
- cpi->is_src_frame_alt_ref = 0;
- }
-
-#else
cpi->is_src_frame_alt_ref = 1;
-#endif
cpi->last_alt_ref_sei = -1;
}
else
cpi->is_src_frame_alt_ref = 0;
cpi->source_encode_index = (cpi->source_encode_index + 1) % cpi->oxcf.lag_in_frames;
+
+ if(cpi->source_encode_index == cpi->last_alt_ref_sei)
+ cpi->is_next_src_alt_ref = 1;
+ else
+ cpi->is_next_src_alt_ref = 0;
}
#endif
@@ -5008,7 +4960,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
#endif
#if HAVE_ARMV7
- vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(store_reg);
+ }
#endif
return -1;
}
@@ -5051,7 +5008,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
if (!cpi)
{
#if HAVE_ARMV7
- vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(store_reg);
+ }
#endif
return 0;
}
@@ -5142,8 +5104,6 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
{
// return to normal state
- cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG;
-
cm->refresh_entropy_probs = 1;
cm->refresh_alt_ref_frame = 0;
cm->refresh_golden_frame = 0;
@@ -5242,7 +5202,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
#endif
#if HAVE_ARMV7
- vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_pop_neon(store_reg);
+ }
#endif
return 0;
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 55076b091..81e32f031 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -28,7 +29,6 @@
#include "vpx/internal/vpx_codec_internal.h"
#include "mcomp.h"
-#define INTRARDOPT
//#define SPEEDSTATS 1
#define MIN_GF_INTERVAL 4
#define DEFAULT_GF_INTERVAL 7
@@ -229,20 +229,33 @@ typedef struct VP8_ENCODER_RTCD
vp8_search_rtcd_vtable_t search;
} VP8_ENCODER_RTCD;
+enum
+{
+ BLOCK_16X8,
+ BLOCK_8X16,
+ BLOCK_8X8,
+ BLOCK_4X4,
+ BLOCK_16X16,
+ BLOCK_MAX_SEGMENTS
+};
+
typedef struct
{
- DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][4][4]);
- DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][4][4]);
- DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][4][4]);
+ DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, Y1quant_shift[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][4][4]);
- DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][4][4]);
- DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][4][4]);
+ DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, Y2quant_shift[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][4][4]);
- DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][4][4]);
- DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][4][4]);
+ DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, UVquant_shift[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2[QINDEX_RANGE][16]);
@@ -270,6 +283,7 @@ typedef struct
int last_alt_ref_sei;
int is_src_frame_alt_ref;
+ int is_next_src_alt_ref;
int gold_is_last; // golden frame same as last frame ( short circuit gold searches)
int alt_is_last; // Alt reference frame same as last ( short circuit altref search)
@@ -306,15 +320,12 @@ typedef struct
int subseqblockweight;
int errthresh;
-#ifdef INTRARDOPT
int RDMULT;
int RDDIV ;
TOKENEXTRA *rdtok;
- int intra_rd_opt;
vp8_writer rdbc;
int intra_mode_costs[10];
-#endif
CODING_CONTEXT coding_context;
@@ -355,9 +366,14 @@ typedef struct
int gf_bits; // Bits for the golden frame or ARF - 2 pass only
int mid_gf_extra_bits; // A few extra bits for the frame half way between two gfs.
- int kf_group_bits; // Projected total bits available for a key frame group of frames
- int kf_group_error_left; // Error score of frames still to be coded in kf group
- int kf_bits; // Bits for the key frame in a key frame group - 2 pass only
+ // Projected total bits available for a key frame group of frames
+ long long kf_group_bits;
+
+ // Error score of frames still to be coded in kf group
+ long long kf_group_error_left;
+
+ // Bits for the key frame in a key frame group - 2 pass only
+ int kf_bits;
int non_gf_bitrate_adjustment; // Used in the few frames following a GF to recover the extra bits spent in that GF
int initial_gf_use; // percentage use of gf 2 frames after gf
@@ -369,6 +385,7 @@ typedef struct
int max_gf_interval;
int baseline_gf_interval;
int gf_decay_rate;
+ int active_arnr_frames; // <= cpi->oxcf.arnr_max_frames
INT64 key_frame_count;
INT64 tot_key_frame_bits;
@@ -454,14 +471,14 @@ typedef struct
int target_bandwidth;
long long bits_left;
- FIRSTPASS_STATS total_stats;
- FIRSTPASS_STATS this_frame_stats;
+ FIRSTPASS_STATS *total_stats;
+ FIRSTPASS_STATS *this_frame_stats;
FIRSTPASS_STATS *stats_in, *stats_in_end;
struct vpx_codec_pkt_list *output_pkt_list;
int first_pass_done;
unsigned char *fp_motion_map;
- FILE *fp_motion_mapfile;
- int fpmm_pos;
+
+ unsigned char *fp_motion_map_stats, *fp_motion_map_stats_save;
#if 0
// Experimental code for lagged and one pass
@@ -522,8 +539,8 @@ typedef struct
int motion_lvl;
int motion_speed;
int motion_var;
- int next_iiratio;
- int this_iiratio;
+ unsigned int next_iiratio;
+ unsigned int this_iiratio;
int this_frame_modified_error;
double norm_intra_err_per_mb;
@@ -584,7 +601,7 @@ typedef struct
fractional_mv_step_fp *find_fractional_mv_step;
vp8_full_search_fn_t full_search_sad;
vp8_diamond_search_fn_t diamond_search_sad;
- vp8_variance_fn_ptr_t fn_ptr;
+ vp8_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SEGMENTS];
unsigned int time_receive_data;
unsigned int time_compress_data;
unsigned int time_pick_lpf;
@@ -607,9 +624,11 @@ typedef struct
#endif
#if VP8_TEMPORAL_ALT_REF
SOURCE_SAMPLE alt_ref_buffer;
- unsigned char *frames[MAX_LAG_BUFFERS];
- int fixed_divide[255];
+ YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS];
+ int fixed_divide[512];
#endif
+ // Flag to indicate temporal filter method
+ int use_weighted_temporal_filter;
#if CONFIG_PSNR
int count;
@@ -637,6 +656,12 @@ typedef struct
int b_calculate_ssimg;
#endif
int b_calculate_psnr;
+
+
+ unsigned char *gf_active_flags; // Record of which MBs still refer to last golden frame either directly or through 0,0
+ int gf_active_count;
+
+
} VP8_COMP;
void control_data_rate(VP8_COMP *cpi);
diff --git a/vp8/encoder/parms.cpp b/vp8/encoder/parms.cpp
index 66fdafb1a..6cc450121 100644
--- a/vp8/encoder/parms.cpp
+++ b/vp8/encoder/parms.cpp
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index d61e2ceda..2f7dd9c7c 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -49,14 +50,13 @@ extern int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]);
extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv);
-int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
+int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
{
(void) b;
(void) d;
(void) ref_mv;
(void) error_per_bit;
- (void) svf;
- (void) vf;
+ (void) vfp;
(void) mvcost;
bestmv->row <<= 3;
bestmv->col <<= 3;
@@ -64,7 +64,7 @@ int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv,
}
-static int get_inter_mbpred_error(MACROBLOCK *mb, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, unsigned int *sse)
+static int get_inter_mbpred_error(MACROBLOCK *mb, const vp8_variance_fn_ptr_t *vfp, unsigned int *sse)
{
BLOCK *b = &mb->block[0];
@@ -80,20 +80,20 @@ static int get_inter_mbpred_error(MACROBLOCK *mb, vp8_subpixvariance_fn_t svf, v
if (xoffset | yoffset)
{
- return svf(in_what, in_what_stride, xoffset, yoffset, what, what_stride, sse);
+ return vfp->svf(in_what, in_what_stride, xoffset, yoffset, what, what_stride, sse);
}
else
{
- return vf(what, what_stride, in_what, in_what_stride, sse);
+ return vfp->vf(what, what_stride, in_what, in_what_stride, sse);
}
}
unsigned int vp8_get16x16pred_error_c
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int ref_stride,
int max_sad
)
@@ -124,9 +124,9 @@ unsigned int vp8_get16x16pred_error_c
unsigned int vp8_get4x4sse_cs_c
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
int max_sad
)
@@ -219,13 +219,20 @@ int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb, int
{
MACROBLOCKD *const xd = &mb->e_mbd;
int i;
- TEMP_CONTEXT t;
int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
int error = RD_ESTIMATE(mb->rdmult, mb->rddiv, cost, 0); // Rd estimate for the cost of the block prediction mode
int distortion = 0;
+ ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT *ta;
+ ENTROPY_CONTEXT *tl;
+
+ vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
vp8_intra_prediction_down_copy(xd);
- vp8_setup_temp_context(&t, xd->above_context[Y1CONTEXT], xd->left_context[Y1CONTEXT], 4);
for (i = 0; i < 16; i++)
{
@@ -238,8 +245,8 @@ int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb, int
error += pick_intra4x4block(rtcd,
mb, mb->block + i, xd->block + i, &best_mode, A, L,
- t.a + vp8_block2above[i],
- t.l + vp8_block2left[i], &r, &d);
+ ta + vp8_block2above[i],
+ tl + vp8_block2left[i], &r, &d);
cost += r;
distortion += d;
@@ -409,7 +416,7 @@ int vp8_pick_intra_mbuv_mode(MACROBLOCK *mb)
}
- mb->e_mbd.mbmi.uv_mode = best_mode;
+ mb->e_mbd.mode_info_context->mbmi.uv_mode = best_mode;
return best_error;
}
@@ -422,6 +429,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
MACROBLOCKD *xd = &x->e_mbd;
B_MODE_INFO best_bmodes[16];
MB_MODE_INFO best_mbmode;
+ PARTITION_INFO best_partition;
MV best_ref_mv1;
MV mode_mv[MB_MODE_COUNT];
MB_PREDICTION_MODE this_mode;
@@ -453,41 +461,48 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
vpx_memset(mode_mv, 0, sizeof(mode_mv));
vpx_memset(nearest_mv, 0, sizeof(nearest_mv));
vpx_memset(near_mv, 0, sizeof(near_mv));
+ vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
// set up all the refframe dependent pointers.
if (cpi->ref_frame_flags & VP8_LAST_FLAG)
{
+ YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
+
vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &nearest_mv[LAST_FRAME], &near_mv[LAST_FRAME],
&best_ref_mv[LAST_FRAME], MDCounts[LAST_FRAME], LAST_FRAME, cpi->common.ref_frame_sign_bias);
- y_buffer[LAST_FRAME] = cpi->common.last_frame.y_buffer + recon_yoffset;
- u_buffer[LAST_FRAME] = cpi->common.last_frame.u_buffer + recon_uvoffset;
- v_buffer[LAST_FRAME] = cpi->common.last_frame.v_buffer + recon_uvoffset;
+ y_buffer[LAST_FRAME] = lst_yv12->y_buffer + recon_yoffset;
+ u_buffer[LAST_FRAME] = lst_yv12->u_buffer + recon_uvoffset;
+ v_buffer[LAST_FRAME] = lst_yv12->v_buffer + recon_uvoffset;
}
else
skip_mode[LAST_FRAME] = 1;
if (cpi->ref_frame_flags & VP8_GOLD_FLAG)
{
+ YV12_BUFFER_CONFIG *gld_yv12 = &cpi->common.yv12_fb[cpi->common.gld_fb_idx];
+
vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &nearest_mv[GOLDEN_FRAME], &near_mv[GOLDEN_FRAME],
&best_ref_mv[GOLDEN_FRAME], MDCounts[GOLDEN_FRAME], GOLDEN_FRAME, cpi->common.ref_frame_sign_bias);
- y_buffer[GOLDEN_FRAME] = cpi->common.golden_frame.y_buffer + recon_yoffset;
- u_buffer[GOLDEN_FRAME] = cpi->common.golden_frame.u_buffer + recon_uvoffset;
- v_buffer[GOLDEN_FRAME] = cpi->common.golden_frame.v_buffer + recon_uvoffset;
+ y_buffer[GOLDEN_FRAME] = gld_yv12->y_buffer + recon_yoffset;
+ u_buffer[GOLDEN_FRAME] = gld_yv12->u_buffer + recon_uvoffset;
+ v_buffer[GOLDEN_FRAME] = gld_yv12->v_buffer + recon_uvoffset;
}
else
skip_mode[GOLDEN_FRAME] = 1;
if (cpi->ref_frame_flags & VP8_ALT_FLAG && cpi->source_alt_ref_active)
{
+ YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx];
+
vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &nearest_mv[ALTREF_FRAME], &near_mv[ALTREF_FRAME],
&best_ref_mv[ALTREF_FRAME], MDCounts[ALTREF_FRAME], ALTREF_FRAME, cpi->common.ref_frame_sign_bias);
- y_buffer[ALTREF_FRAME] = cpi->common.alt_ref_frame.y_buffer + recon_yoffset;
- u_buffer[ALTREF_FRAME] = cpi->common.alt_ref_frame.u_buffer + recon_uvoffset;
- v_buffer[ALTREF_FRAME] = cpi->common.alt_ref_frame.v_buffer + recon_uvoffset;
+ y_buffer[ALTREF_FRAME] = alt_yv12->y_buffer + recon_yoffset;
+ u_buffer[ALTREF_FRAME] = alt_yv12->u_buffer + recon_uvoffset;
+ v_buffer[ALTREF_FRAME] = alt_yv12->v_buffer + recon_uvoffset;
}
else
skip_mode[ALTREF_FRAME] = 1;
@@ -527,7 +542,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
best_rd = INT_MAX;
- x->e_mbd.mbmi.ref_frame = INTRA_FRAME;
+ x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
// if we encode a new mv this is important
// find the best new motion vector
@@ -539,9 +554,9 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
if (best_rd <= cpi->rd_threshes[mode_index])
continue;
- x->e_mbd.mbmi.ref_frame = vp8_ref_frame_order[mode_index];
+ x->e_mbd.mode_info_context->mbmi.ref_frame = vp8_ref_frame_order[mode_index];
- if (skip_mode[x->e_mbd.mbmi.ref_frame])
+ if (skip_mode[x->e_mbd.mode_info_context->mbmi.ref_frame])
continue;
// Check to see if the testing frequency for this mode is at its max
@@ -570,33 +585,33 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
distortion2 = 0;
this_mode = vp8_mode_order[mode_index];
-
+
// Experimental debug code.
//all_rds[mode_index] = -1;
- x->e_mbd.mbmi.mode = this_mode;
- x->e_mbd.mbmi.uv_mode = DC_PRED;
+ x->e_mbd.mode_info_context->mbmi.mode = this_mode;
+ x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
// Work out the cost assosciated with selecting the reference frame
- frame_cost = ref_frame_cost[x->e_mbd.mbmi.ref_frame];
+ frame_cost = ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
rate2 += frame_cost;
// everything but intra
- if (x->e_mbd.mbmi.ref_frame)
+ if (x->e_mbd.mode_info_context->mbmi.ref_frame)
{
- x->e_mbd.pre.y_buffer = y_buffer[x->e_mbd.mbmi.ref_frame];
- x->e_mbd.pre.u_buffer = u_buffer[x->e_mbd.mbmi.ref_frame];
- x->e_mbd.pre.v_buffer = v_buffer[x->e_mbd.mbmi.ref_frame];
- mode_mv[NEARESTMV] = nearest_mv[x->e_mbd.mbmi.ref_frame];
- mode_mv[NEARMV] = near_mv[x->e_mbd.mbmi.ref_frame];
- best_ref_mv1 = best_ref_mv[x->e_mbd.mbmi.ref_frame];
- memcpy(mdcounts, MDCounts[x->e_mbd.mbmi.ref_frame], sizeof(mdcounts));
+ x->e_mbd.pre.y_buffer = y_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
+ x->e_mbd.pre.u_buffer = u_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
+ x->e_mbd.pre.v_buffer = v_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
+ mode_mv[NEARESTMV] = nearest_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
+ mode_mv[NEARMV] = near_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
+ best_ref_mv1 = best_ref_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
+ memcpy(mdcounts, MDCounts[x->e_mbd.mode_info_context->mbmi.ref_frame], sizeof(mdcounts));
}
//Only consider ZEROMV/ALTREF_FRAME for alt ref frame.
if (cpi->is_src_frame_alt_ref)
{
- if (this_mode != ZEROMV || x->e_mbd.mbmi.ref_frame != ALTREF_FRAME)
+ if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME)
continue;
}
@@ -636,7 +651,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
case TM_PRED:
vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
- rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mbmi.mode];
+ rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
if (this_rd < best_intra_rd)
@@ -703,13 +718,13 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
if (cpi->sf.search_method == HEX)
{
- bestsme = vp8_hex_search(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, cpi->fn_ptr.vf, cpi->fn_ptr.sdf, x->mvsadcost, x->mvcost);
+ bestsme = vp8_hex_search(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost);
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
}
else
{
- bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb < 9
+ bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb < 9
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
@@ -728,7 +743,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
num00--;
else
{
- thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb = 9
+ thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb = 9
if (thissme < bestsme)
{
@@ -749,7 +764,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
}
if (bestsme < INT_MAX)
- cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv1, x->errorperbit, cpi->fn_ptr.svf, cpi->fn_ptr.vf, cpi->mb.mvcost);
+ cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv1, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost);
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
@@ -774,12 +789,12 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
continue;
rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
- x->e_mbd.mbmi.mode = this_mode;
- x->e_mbd.mbmi.mv.as_mv = mode_mv[this_mode];
+ x->e_mbd.mode_info_context->mbmi.mode = this_mode;
+ x->e_mbd.mode_info_context->mbmi.mv.as_mv = mode_mv[this_mode];
x->e_mbd.block[0].bmi.mode = this_mode;
- x->e_mbd.block[0].bmi.mv.as_int = x->e_mbd.mbmi.mv.as_int;
+ x->e_mbd.block[0].bmi.mv.as_int = x->e_mbd.mode_info_context->mbmi.mv.as_int;
- distortion2 = get_inter_mbpred_error(x, cpi->fn_ptr.svf, cpi->fn_ptr.vf, (unsigned int *)(&sse));
+ distortion2 = get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], (unsigned int *)(&sse));
this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
@@ -816,7 +831,8 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
*returnrate = rate2;
*returndistortion = distortion2;
best_rd = this_rd;
- vpx_memcpy(&best_mbmode, &x->e_mbd.mbmi, sizeof(MB_MODE_INFO));
+ vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
+ vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO));
if (this_mode == B_PRED || this_mode == SPLITMV)
for (i = 0; i < 16; i++)
@@ -862,9 +878,9 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
if (best_mbmode.mode <= B_PRED)
{
- x->e_mbd.mbmi.ref_frame = INTRA_FRAME;
+ x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
vp8_pick_intra_mbuv_mode(x);
- best_mbmode.uv_mode = x->e_mbd.mbmi.uv_mode;
+ best_mbmode.uv_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
}
@@ -890,23 +906,25 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
best_mbmode.partitioning = 0;
best_mbmode.dc_diff = 0;
- vpx_memcpy(&x->e_mbd.mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+ vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+ vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
for (i = 0; i < 16; i++)
{
vpx_memset(&x->e_mbd.block[i].bmi, 0, sizeof(B_MODE_INFO));
}
- x->e_mbd.mbmi.mv.as_int = 0;
+ x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
return best_rd;
}
// macroblock modes
- vpx_memcpy(&x->e_mbd.mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+ vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+ vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
- if (x->e_mbd.mbmi.mode == B_PRED || x->e_mbd.mbmi.mode == SPLITMV)
+ if (x->e_mbd.mode_info_context->mbmi.mode == B_PRED || x->e_mbd.mode_info_context->mbmi.mode == SPLITMV)
for (i = 0; i < 16; i++)
{
vpx_memcpy(&x->e_mbd.block[i].bmi, &best_bmodes[i], sizeof(B_MODE_INFO));
@@ -914,10 +932,10 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
}
else
{
- vp8_set_mbmode_and_mvs(x, x->e_mbd.mbmi.mode, &best_bmodes[0].mv.as_mv);
+ vp8_set_mbmode_and_mvs(x, x->e_mbd.mode_info_context->mbmi.mode, &best_bmodes[0].mv.as_mv);
}
- x->e_mbd.mbmi.mv.as_mv = x->e_mbd.block[15].bmi.mv.as_mv;
+ x->e_mbd.mode_info_context->mbmi.mv.as_mv = x->e_mbd.block[15].bmi.mv.as_mv;
return best_rd;
}
diff --git a/vp8/encoder/pickinter.h b/vp8/encoder/pickinter.h
index fb28837ed..b80e4c86f 100644
--- a/vp8/encoder/pickinter.h
+++ b/vp8/encoder/pickinter.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c
index bbd7840b8..09e8b5412 100644
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -15,6 +16,9 @@
#include "vpx_scale/yv12extend.h"
#include "vpx_scale/vpxscale.h"
#include "alloccommon.h"
+#if ARCH_ARM
+#include "vpx_ports/arm.h"
+#endif
extern void vp8_loop_filter_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val);
extern void vp8_loop_filter_frame_yonly(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val, int sharpness_lvl);
@@ -305,9 +309,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
// Make a copy of the unfiltered / processed recon buffer
#if HAVE_ARMV7
- vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(cm->frame_to_show, &cpi->last_frame_uf);
-#else
- vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(cm->frame_to_show, &cpi->last_frame_uf);
+ }
+#if CONFIG_RUNTIME_CPU_DETECT
+ else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+ {
+ vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf);
+ }
#endif
if (cm->frame_type == KEY_FRAME)
@@ -342,9 +357,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
// Re-instate the unfiltered frame
#if HAVE_ARMV7
- vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
-#else
- vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
+ }
+#if CONFIG_RUNTIME_CPU_DETECT
+ else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+ {
+ vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+ }
#endif
while (filter_step > 0)
@@ -371,9 +397,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
// Re-instate the unfiltered frame
#if HAVE_ARMV7
- vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
-#else
- vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
+ }
+#if CONFIG_RUNTIME_CPU_DETECT
+ else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+ {
+ vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+ }
#endif
// If value is close to the best so far then bias towards a lower loop filter value.
@@ -400,9 +437,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
// Re-instate the unfiltered frame
#if HAVE_ARMV7
- vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
-#else
- vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+#if CONFIG_RUNTIME_CPU_DETECT
+ if (cm->rtcd.flags & HAS_NEON)
+#endif
+ {
+ vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
+ }
+#if CONFIG_RUNTIME_CPU_DETECT
+ else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+ {
+ vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+ }
#endif
// Was it better than the previous best?
diff --git a/vp8/encoder/ppc/csystemdependent.c b/vp8/encoder/ppc/csystemdependent.c
index f99277f99..588656b97 100644
--- a/vp8/encoder/ppc/csystemdependent.c
+++ b/vp8/encoder/ppc/csystemdependent.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/encoder/ppc/encodemb_altivec.asm b/vp8/encoder/ppc/encodemb_altivec.asm
index e0e976d71..6e0099ddc 100644
--- a/vp8/encoder/ppc/encodemb_altivec.asm
+++ b/vp8/encoder/ppc/encodemb_altivec.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/encoder/ppc/fdct_altivec.asm b/vp8/encoder/ppc/fdct_altivec.asm
index eaab14c79..935d0cb09 100644
--- a/vp8/encoder/ppc/fdct_altivec.asm
+++ b/vp8/encoder/ppc/fdct_altivec.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/encoder/ppc/rdopt_altivec.asm b/vp8/encoder/ppc/rdopt_altivec.asm
index 917bfe036..ba4823009 100644
--- a/vp8/encoder/ppc/rdopt_altivec.asm
+++ b/vp8/encoder/ppc/rdopt_altivec.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/encoder/ppc/sad_altivec.asm b/vp8/encoder/ppc/sad_altivec.asm
index 1102ccf17..e5f26380f 100644
--- a/vp8/encoder/ppc/sad_altivec.asm
+++ b/vp8/encoder/ppc/sad_altivec.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/encoder/ppc/variance_altivec.asm b/vp8/encoder/ppc/variance_altivec.asm
index 952bf7286..a1ebf663a 100644
--- a/vp8/encoder/ppc/variance_altivec.asm
+++ b/vp8/encoder/ppc/variance_altivec.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/encoder/ppc/variance_subpixel_altivec.asm b/vp8/encoder/ppc/variance_subpixel_altivec.asm
index 148a8d25b..301360b1d 100644
--- a/vp8/encoder/ppc/variance_subpixel_altivec.asm
+++ b/vp8/encoder/ppc/variance_subpixel_altivec.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
diff --git a/vp8/encoder/preproc.c b/vp8/encoder/preproc.c
index d2a13dced..bd918fa3c 100644
--- a/vp8/encoder/preproc.c
+++ b/vp8/encoder/preproc.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/encoder/psnr.c b/vp8/encoder/psnr.c
index 0e34cecb1..dc2a03b69 100644
--- a/vp8/encoder/psnr.c
+++ b/vp8/encoder/psnr.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/encoder/psnr.h b/vp8/encoder/psnr.h
index 9f6ca0bbf..8ae444823 100644
--- a/vp8/encoder/psnr.h
+++ b/vp8/encoder/psnr.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 6028ebf56..5e65fadb3 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -15,18 +16,21 @@
#include "entropy.h"
#include "predictdc.h"
+//#define EXACT_QUANT
+#ifdef EXACT_QUANT
void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
{
int i, rc, eob;
int zbin;
int x, y, z, sz;
- short *coeff_ptr = &b->coeff[0];
- short *zbin_ptr = &b->zbin[0][0];
- short *round_ptr = &b->round[0][0];
- short *quant_ptr = &b->quant[0][0];
- short *qcoeff_ptr = d->qcoeff;
- short *dqcoeff_ptr = d->dqcoeff;
- short *dequant_ptr = &d->dequant[0][0];
+ short *coeff_ptr = b->coeff;
+ short *zbin_ptr = b->zbin;
+ short *round_ptr = b->round;
+ short *quant_ptr = b->quant;
+ short *quant_shift_ptr = b->quant_shift;
+ short *qcoeff_ptr = d->qcoeff;
+ short *dqcoeff_ptr = d->dqcoeff;
+ short *dequant_ptr = d->dequant;
vpx_memset(qcoeff_ptr, 0, 32);
vpx_memset(dqcoeff_ptr, 0, 32);
@@ -44,7 +48,9 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
if (x >= zbin)
{
- y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
+ x += round_ptr[rc];
+ y = (((x * quant_ptr[rc]) >> 16) + x)
+ >> quant_shift_ptr[rc]; // quantize (x)
x = (y ^ sz) - sz; // get the sign back
qcoeff_ptr[rc] = x; // write to destination
dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
@@ -55,9 +61,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
}
}
}
-
d->eob = eob + 1;
-
}
void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
@@ -65,15 +69,16 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
int i, rc, eob;
int zbin;
int x, y, z, sz;
- short *zbin_boost_ptr = &b->zrun_zbin_boost[0];
- short *coeff_ptr = &b->coeff[0];
- short *zbin_ptr = &b->zbin[0][0];
- short *round_ptr = &b->round[0][0];
- short *quant_ptr = &b->quant[0][0];
- short *qcoeff_ptr = d->qcoeff;
- short *dqcoeff_ptr = d->dqcoeff;
- short *dequant_ptr = &d->dequant[0][0];
- short zbin_oq_value = b->zbin_extra;
+ short *zbin_boost_ptr = b->zrun_zbin_boost;
+ short *coeff_ptr = b->coeff;
+ short *zbin_ptr = b->zbin;
+ short *round_ptr = b->round;
+ short *quant_ptr = b->quant;
+ short *quant_shift_ptr = b->quant_shift;
+ short *qcoeff_ptr = d->qcoeff;
+ short *dqcoeff_ptr = d->dqcoeff;
+ short *dequant_ptr = d->dequant;
+ short zbin_oq_value = b->zbin_extra;
vpx_memset(qcoeff_ptr, 0, 32);
vpx_memset(dqcoeff_ptr, 0, 32);
@@ -96,7 +101,9 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
if (x >= zbin)
{
- y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
+ x += round_ptr[rc];
+ y = (((x * quant_ptr[rc]) >> 16) + x)
+ >> quant_shift_ptr[rc]; // quantize (x)
x = (y ^ sz) - sz; // get the sign back
qcoeff_ptr[rc] = x; // write to destination
dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
@@ -111,139 +118,182 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
d->eob = eob + 1;
}
-void vp8_quantize_mby(MACROBLOCK *x)
+
+/* Perform regular quantization, with unbiased rounding and no zero bin. */
+void vp8_strict_quantize_b(BLOCK *b, BLOCKD *d)
{
int i;
-
- if (x->e_mbd.mbmi.mode != B_PRED && x->e_mbd.mbmi.mode != SPLITMV)
+ int rc;
+ int eob;
+ int x;
+ int y;
+ int z;
+ int sz;
+ short *coeff_ptr;
+ short *quant_ptr;
+ short *quant_shift_ptr;
+ short *qcoeff_ptr;
+ short *dqcoeff_ptr;
+ short *dequant_ptr;
+
+ coeff_ptr = b->coeff;
+ quant_ptr = b->quant;
+ quant_shift_ptr = b->quant_shift;
+ qcoeff_ptr = d->qcoeff;
+ dqcoeff_ptr = d->dqcoeff;
+ dequant_ptr = d->dequant;
+ eob = - 1;
+ vpx_memset(qcoeff_ptr, 0, 32);
+ vpx_memset(dqcoeff_ptr, 0, 32);
+ for (i = 0; i < 16; i++)
{
- for (i = 0; i < 16; i++)
+ int dq;
+ int round;
+
+ /*TODO: These arrays should be stored in zig-zag order.*/
+ rc = vp8_default_zig_zag1d[i];
+ z = coeff_ptr[rc];
+ dq = dequant_ptr[rc];
+ round = dq >> 1;
+ /* Sign of z. */
+ sz = -(z < 0);
+ x = (z + sz) ^ sz;
+ x += round;
+ if (x >= dq)
{
- x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
- x->e_mbd.mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2);
+ /* Quantize x. */
+ y = (((x * quant_ptr[rc]) >> 16) + x) >> quant_shift_ptr[rc];
+ /* Put the sign back. */
+ x = (y + sz) ^ sz;
+ /* Save the coefficient and its dequantized value. */
+ qcoeff_ptr[rc] = x;
+ dqcoeff_ptr[rc] = x * dq;
+ /* Remember the last non-zero coefficient. */
+ if (y)
+ eob = i;
}
+ }
- x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
- x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[24].eob);
+ d->eob = eob + 1;
+}
- }
- else
+#else
+void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
+{
+ int i, rc, eob;
+ int zbin;
+ int x, y, z, sz;
+ short *coeff_ptr = b->coeff;
+ short *round_ptr = b->round;
+ short *quant_ptr = b->quant;
+ short *qcoeff_ptr = d->qcoeff;
+ short *dqcoeff_ptr = d->dqcoeff;
+ short *dequant_ptr = d->dequant;
+
+ eob = -1;
+ for (i = 0; i < 16; i++)
{
- for (i = 0; i < 16; i++)
+ rc = vp8_default_zig_zag1d[i];
+ z = coeff_ptr[rc];
+
+ sz = (z >> 31); // sign of z
+ x = (z ^ sz) - sz; // x = abs(z)
+
+ y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
+ x = (y ^ sz) - sz; // get the sign back
+ qcoeff_ptr[rc] = x; // write to destination
+ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
+
+ if (y)
{
- x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
- x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
+ eob = i; // last nonzero coeffs
}
}
+ d->eob = eob + 1;
}
-void vp8_quantize_mb(MACROBLOCK *x)
+void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
{
- int i;
+ int i, rc, eob;
+ int zbin;
+ int x, y, z, sz;
+ short *zbin_boost_ptr = b->zrun_zbin_boost;
+ short *coeff_ptr = b->coeff;
+ short *zbin_ptr = b->zbin;
+ short *round_ptr = b->round;
+ short *quant_ptr = b->quant;
+ short *qcoeff_ptr = d->qcoeff;
+ short *dqcoeff_ptr = d->dqcoeff;
+ short *dequant_ptr = d->dequant;
+ short zbin_oq_value = b->zbin_extra;
- x->e_mbd.mbmi.mb_skip_coeff = 1;
+ vpx_memset(qcoeff_ptr, 0, 32);
+ vpx_memset(dqcoeff_ptr, 0, 32);
- if (x->e_mbd.mbmi.mode != B_PRED && x->e_mbd.mbmi.mode != SPLITMV)
- {
- for (i = 0; i < 16; i++)
- {
- x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
- x->e_mbd.mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2);
- }
+ eob = -1;
- for (i = 16; i < 25; i++)
- {
- x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
- x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
- }
- }
- else
+ for (i = 0; i < 16; i++)
{
- for (i = 0; i < 24; i++)
+ rc = vp8_default_zig_zag1d[i];
+ z = coeff_ptr[rc];
+
+ //if ( i == 0 )
+ // zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value/2;
+ //else
+ zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
+
+ zbin_boost_ptr ++;
+ sz = (z >> 31); // sign of z
+ x = (z ^ sz) - sz; // x = abs(z)
+
+ if (x >= zbin)
{
- x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
- x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
+ y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
+ x = (y ^ sz) - sz; // get the sign back
+ qcoeff_ptr[rc] = x; // write to destination
+ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
+
+ if (y)
+ {
+ eob = i; // last nonzero coeffs
+ zbin_boost_ptr = &b->zrun_zbin_boost[0]; // reset zero runlength
+ }
}
}
+ d->eob = eob + 1;
}
+#endif
-void vp8_quantize_mbuv(MACROBLOCK *x)
+void vp8_quantize_mby(MACROBLOCK *x)
{
int i;
+ int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
+ && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
- for (i = 16; i < 24; i++)
- {
+ for (i = 0; i < 16; i++)
x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
- x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
- }
-}
-
-// This function is not currently called
-void vp8_quantize_mbrd(MACROBLOCK *x)
-{
- int i;
-
- x->e_mbd.mbmi.mb_skip_coeff = 1;
-
- if (x->e_mbd.mbmi.mode != B_PRED && x->e_mbd.mbmi.mode != SPLITMV)
- {
- for (i = 0; i < 16; i++)
- {
- x->quantize_brd(&x->block[i], &x->e_mbd.block[i]);
- x->e_mbd.mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2);
- }
- for (i = 16; i < 25; i++)
- {
- x->quantize_brd(&x->block[i], &x->e_mbd.block[i]);
- x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
- }
- }
- else
- {
- for (i = 0; i < 24; i++)
- {
- x->quantize_brd(&x->block[i], &x->e_mbd.block[i]);
- x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
- }
- }
+ if(has_2nd_order)
+ x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
}
-void vp8_quantize_mbuvrd(MACROBLOCK *x)
+void vp8_quantize_mb(MACROBLOCK *x)
{
int i;
+ int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED
+ && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
- for (i = 16; i < 24; i++)
- {
- x->quantize_brd(&x->block[i], &x->e_mbd.block[i]);
- x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
- }
+ for (i = 0; i < 24+has_2nd_order; i++)
+ x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
}
-void vp8_quantize_mbyrd(MACROBLOCK *x)
+
+void vp8_quantize_mbuv(MACROBLOCK *x)
{
int i;
- if (x->e_mbd.mbmi.mode != B_PRED && x->e_mbd.mbmi.mode != SPLITMV)
- {
- for (i = 0; i < 16; i++)
- {
- x->quantize_brd(&x->block[i], &x->e_mbd.block[i]);
- x->e_mbd.mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2);
- }
-
- x->quantize_brd(&x->block[24], &x->e_mbd.block[24]);
- x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[24].eob);
-
- }
- else
- {
- for (i = 0; i < 16; i++)
- {
- x->quantize_brd(&x->block[i], &x->e_mbd.block[i]);
- x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
- }
- }
+ for (i = 16; i < 24; i++)
+ x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
}
diff --git a/vp8/encoder/quantize.h b/vp8/encoder/quantize.h
index 868e8e3a8..b74718bfa 100644
--- a/vp8/encoder/quantize.h
+++ b/vp8/encoder/quantize.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -16,6 +17,10 @@
#define prototype_quantize_block(sym) \
void (sym)(BLOCK *b,BLOCKD *d)
+#if ARCH_X86 || ARCH_X86_64
+#include "x86/quantize_x86.h"
+#endif
+
#if ARCH_ARM
#include "arm/quantize_arm.h"
#endif
@@ -42,11 +47,10 @@ typedef struct
#define QUANTIZE_INVOKE(ctx,fn) vp8_quantize_##fn
#endif
+extern void vp8_strict_quantize_b(BLOCK *b,BLOCKD *d);
+
extern void vp8_quantize_mb(MACROBLOCK *x);
extern void vp8_quantize_mbuv(MACROBLOCK *x);
extern void vp8_quantize_mby(MACROBLOCK *x);
-extern void vp8_quantize_mbyrd(MACROBLOCK *x);
-extern void vp8_quantize_mbuvrd(MACROBLOCK *x);
-extern void vp8_quantize_mbrd(MACROBLOCK *x);
#endif
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index 23a2d1abd..dd324f435 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -407,7 +408,7 @@ static void calc_gf_params(VP8_COMP *cpi)
cpi->recent_ref_frame_usage[GOLDEN_FRAME] +
cpi->recent_ref_frame_usage[ALTREF_FRAME];
- int pct_gf_active = (100 * cpi->common.gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols);
+ int pct_gf_active = (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols);
// Reset the last boost indicator
//cpi->last_boost = 100;
@@ -1021,7 +1022,7 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi)
cpi->recent_ref_frame_usage[GOLDEN_FRAME] +
cpi->recent_ref_frame_usage[ALTREF_FRAME];
- int pct_gf_active = (100 * cpi->common.gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols);
+ int pct_gf_active = (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols);
// Reset the last boost indicator
//cpi->last_boost = 100;
@@ -1119,10 +1120,12 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi)
}
// If there is an active ARF at this location use the minimum
- // bits on this frame unless it was a contructed arf.
- else if (cpi->oxcf.arnr_max_frames == 0)
+ // bits on this frame even if it is a contructed arf.
+ // The active maximum quantizer insures that an appropriate
+ // number of bits will be spent if needed for contstructed ARFs.
+ else
{
- cpi->this_frame_target = 0; // Minimial spend on gf that is replacing an arf
+ cpi->this_frame_target = 0;
}
cpi->current_gf_interval = cpi->frames_till_gf_update_due;
@@ -1363,8 +1366,7 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame)
if (cpi->zbin_over_quant > zbin_oqmax)
cpi->zbin_over_quant = zbin_oqmax;
- // Each over-run step is assumed to equate to approximately
- // 3% reduction in bitrate
+ // Adjust bits_per_mb_at_this_q estimate
bits_per_mb_at_this_q = (int)(Factor * bits_per_mb_at_this_q);
Factor += factor_adjustment;
@@ -1442,6 +1444,9 @@ void vp8_adjust_key_frame_context(VP8_COMP *cpi)
}
else
{
+ int last_kf_interval =
+ (cpi->frames_since_key > 0) ? cpi->frames_since_key : 1;
+
// reset keyframe context and calculate weighted average of last KEY_FRAME_CONTEXT keyframes
for (i = 0; i < KEY_FRAME_CONTEXT; i++)
{
@@ -1452,8 +1457,8 @@ void vp8_adjust_key_frame_context(VP8_COMP *cpi)
}
else
{
- cpi->prior_key_frame_size[KEY_FRAME_CONTEXT - 1] = cpi->projected_frame_size;
- cpi->prior_key_frame_distance[KEY_FRAME_CONTEXT - 1] = cpi->frames_since_key;
+ cpi->prior_key_frame_size[i] = cpi->projected_frame_size;
+ cpi->prior_key_frame_distance[i] = last_kf_interval;
}
av_key_frame_bits += prior_key_frame_weight[i] * cpi->prior_key_frame_size[i];
@@ -1476,6 +1481,8 @@ void vp8_adjust_key_frame_context(VP8_COMP *cpi)
// allocated than those following other gfs.
cpi->kf_overspend_bits += (cpi->projected_frame_size - cpi->per_frame_bandwidth) * 7 / 8;
cpi->gf_overspend_bits += (cpi->projected_frame_size - cpi->per_frame_bandwidth) * 1 / 8;
+ if(!av_key_frame_frequency)
+ av_key_frame_frequency = 60;
// Work out how much to try and recover per frame.
// For one pass we estimate the number of frames to spread it over based upon past history.
diff --git a/vp8/encoder/ratectrl.h b/vp8/encoder/ratectrl.h
index 588c7a823..766dfdfce 100644
--- a/vp8/encoder/ratectrl.h
+++ b/vp8/encoder/ratectrl.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 601c52978..8a753fd44 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -63,11 +64,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x);
#define MAXF(a,b) (((a) > (b)) ? (a) : (b))
-extern const TOKENEXTRA vp8_dct_value_tokens[DCT_MAX_VALUE*2];
-extern const TOKENEXTRA *vp8_dct_value_tokens_ptr;
-extern int vp8_dct_value_cost[DCT_MAX_VALUE*2];
-extern int *vp8_dct_value_cost_ptr;
-
const int vp8_auto_speed_thresh[17] =
{
@@ -170,15 +166,13 @@ static void fill_token_costs(
}
-static int rd_iifactor [ 32 ] = { 16, 16, 16, 12, 8, 4, 2, 0,
+static int rd_iifactor [ 32 ] = { 4, 4, 3, 2, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
-
-
// The values in this table should be reviewed
static int sad_per_bit16lut[128] =
{
@@ -232,43 +226,41 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
int i;
int *thresh;
int threshmult;
-
- int capped_q = (Qvalue < 160) ? Qvalue : 160;
+ double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
+ double rdconst = 3.00;
vp8_clear_system_state(); //__asm emms;
- cpi->RDMULT = (int)((0.00007 * (capped_q * capped_q * capped_q * capped_q)) - (0.0125 * (capped_q * capped_q * capped_q)) +
- (2.25 * (capped_q * capped_q)) - (12.5 * capped_q) + 25.0);
-
- if (cpi->RDMULT < 25)
- cpi->RDMULT = 25;
-
- if (cpi->pass == 2)
- {
- if (cpi->common.frame_type == KEY_FRAME)
- cpi->RDMULT += (cpi->RDMULT * rd_iifactor[0]) / 16;
- else if (cpi->next_iiratio > 31)
- cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) / 16;
- else
- cpi->RDMULT += (cpi->RDMULT * rd_iifactor[cpi->next_iiratio]) / 16;
- }
-
+ // Further tests required to see if optimum is different
+ // for key frames, golden frames and arf frames.
+ // if (cpi->common.refresh_golden_frame ||
+ // cpi->common.refresh_alt_ref_frame)
+ cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));
// Extend rate multiplier along side quantizer zbin increases
if (cpi->zbin_over_quant > 0)
{
- // Extend rate multiplier along side quantizer zbin increases
- if (cpi->zbin_over_quant > 0)
- {
- double oq_factor = pow(1.006, cpi->zbin_over_quant);
-
- if (oq_factor > (1.0 + ((double)cpi->zbin_over_quant / 64.0)))
- oq_factor = (1.0 + (double)cpi->zbin_over_quant / 64.0);
+ double oq_factor;
+ double modq;
+
+ // Experimental code using the same basic equation as used for Q above
+ // The units of cpi->zbin_over_quant are 1/128 of Q bin size
+ oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant);
+ modq = (int)((double)capped_q * oq_factor);
+ cpi->RDMULT = (int)(rdconst * (modq * modq));
+ }
- cpi->RDMULT = (int)(oq_factor * cpi->RDMULT);
- }
+ if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME))
+ {
+ if (cpi->next_iiratio > 31)
+ cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
+ else
+ cpi->RDMULT += (cpi->RDMULT * rd_iifactor[cpi->next_iiratio]) >> 4;
}
+ if (cpi->RDMULT < 125)
+ cpi->RDMULT = 125;
+
cpi->mb.errorperbit = (cpi->RDMULT / 100);
if (cpi->mb.errorperbit < 1)
@@ -494,7 +486,7 @@ static int macro_block_max_error(MACROBLOCK *mb)
int i, j;
int berror;
- dc = !(mb->e_mbd.mbmi.mode == B_PRED || mb->e_mbd.mbmi.mode == SPLITMV);
+ dc = !(mb->e_mbd.mode_info_context->mbmi.mode == B_PRED || mb->e_mbd.mode_info_context->mbmi.mode == SPLITMV);
for (i = 0; i < 16; i++)
{
@@ -622,24 +614,28 @@ int vp8_rdcost_mby(MACROBLOCK *mb)
{
int cost = 0;
int b;
- TEMP_CONTEXT t, t2;
int type = 0;
-
MACROBLOCKD *x = &mb->e_mbd;
+ ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT *ta;
+ ENTROPY_CONTEXT *tl;
- vp8_setup_temp_context(&t, x->above_context[Y1CONTEXT], x->left_context[Y1CONTEXT], 4);
- vp8_setup_temp_context(&t2, x->above_context[Y2CONTEXT], x->left_context[Y2CONTEXT], 1);
+ vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
- if (x->mbmi.mode == SPLITMV)
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
+
+ if (x->mode_info_context->mbmi.mode == SPLITMV)
type = 3;
for (b = 0; b < 16; b++)
cost += cost_coeffs(mb, x->block + b, type,
- t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
+ ta + vp8_block2above[b], tl + vp8_block2left[b]);
- if (x->mbmi.mode != SPLITMV)
+ if (x->mode_info_context->mbmi.mode != SPLITMV)
cost += cost_coeffs(mb, x->block + 24, 1,
- t2.a + vp8_block2above[24], t2.l + vp8_block2left[24]);
+ ta + vp8_block2above[24], tl + vp8_block2left[24]);
return cost;
}
@@ -718,13 +714,20 @@ int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, int
{
MACROBLOCKD *const xd = &mb->e_mbd;
int i;
- TEMP_CONTEXT t;
int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
int distortion = 0;
int tot_rate_y = 0;
+ ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT *ta;
+ ENTROPY_CONTEXT *tl;
+
+ vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
vp8_intra_prediction_down_copy(xd);
- vp8_setup_temp_context(&t, xd->above_context[Y1CONTEXT], xd->left_context[Y1CONTEXT], 4);
for (i = 0; i < 16; i++)
{
@@ -737,8 +740,8 @@ int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, int
rd_pick_intra4x4block(
cpi, mb, mb->block + i, xd->block + i, &best_mode, A, L,
- t.a + vp8_block2above[i],
- t.l + vp8_block2left[i], &r, &ry, &d);
+ ta + vp8_block2above[i],
+ tl + vp8_block2left[i], &r, &ry, &d);
cost += r;
distortion += d;
@@ -769,9 +772,9 @@ int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *Rate, int
int dummy;
rate = 0;
- x->e_mbd.mbmi.mode = mode;
+ x->e_mbd.mode_info_context->mbmi.mode = mode;
- rate += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mbmi.mode];
+ rate += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
vp8_encode_intra16x16mbyrd(IF_RTCD(&cpi->rtcd), x);
@@ -793,28 +796,33 @@ int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *Rate, int
}
}
- x->e_mbd.mbmi.mode = mode_selected;
+ x->e_mbd.mode_info_context->mbmi.mode = mode_selected;
return best_rd;
}
static int rd_cost_mbuv(MACROBLOCK *mb)
{
- TEMP_CONTEXT t, t2;
int b;
int cost = 0;
MACROBLOCKD *x = &mb->e_mbd;
+ ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT *ta;
+ ENTROPY_CONTEXT *tl;
- vp8_setup_temp_context(&t, x->above_context[UCONTEXT], x->left_context[UCONTEXT], 2);
- vp8_setup_temp_context(&t2, x->above_context[VCONTEXT], x->left_context[VCONTEXT], 2);
+ vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
for (b = 16; b < 20; b++)
cost += cost_coeffs(mb, x->block + b, vp8_block2type[b],
- t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
+ ta + vp8_block2above[b], tl + vp8_block2left[b]);
for (b = 20; b < 24; b++)
cost += cost_coeffs(mb, x->block + b, vp8_block2type[b],
- t2.a + vp8_block2above[b], t2.l + vp8_block2left[b]);
+ ta + vp8_block2above[b], tl + vp8_block2left[b]);
return cost;
}
@@ -855,11 +863,11 @@ int vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *ra
int distortion;
int this_rd;
- x->e_mbd.mbmi.uv_mode = mode;
+ x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
vp8_encode_intra16x16mbuvrd(IF_RTCD(&cpi->rtcd), x);
rate_to = rd_cost_mbuv(x);
- rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][x->e_mbd.mbmi.uv_mode];
+ rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.uv_mode];
distortion = vp8_get_mbuvrecon_error(IF_RTCD(&cpi->rtcd.variance), x);
@@ -878,7 +886,7 @@ int vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *ra
*rate = r;
*distortion = d;
- x->e_mbd.mbmi.uv_mode = mode_selected;
+ x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected;
return best_rd;
}
#endif
@@ -888,16 +896,17 @@ int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4])
vp8_prob p [VP8_MVREFS-1];
assert(NEARESTMV <= m && m <= SPLITMV);
vp8_mv_ref_probs(p, near_mv_ref_ct);
- return vp8_cost_token(vp8_mv_ref_tree, p, VP8_MVREFENCODINGS + m);
+ return vp8_cost_token(vp8_mv_ref_tree, p,
+ vp8_mv_ref_encoding_array - NEARESTMV + m);
}
void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv)
{
int i;
- x->e_mbd.mbmi.mode = mb;
- x->e_mbd.mbmi.mv.as_mv.row = mv->row;
- x->e_mbd.mbmi.mv.as_mv.col = mv->col;
+ x->e_mbd.mode_info_context->mbmi.mode = mb;
+ x->e_mbd.mode_info_context->mbmi.mv.as_mv.row = mv->row;
+ x->e_mbd.mode_info_context->mbmi.mv.as_mv.col = mv->col;
for (i = 0; i < 16; i++)
{
@@ -909,21 +918,6 @@ void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv)
}
#if !(CONFIG_REALTIME_ONLY)
-int vp8_count_labels(int const *labelings)
-{
- int i;
- int count = 0;
-
- for (i = 0; i < 16; i++)
- {
- if (labelings[i] > count)
- count = labelings[i];
- }
-
- return count + 1;
-}
-
-
static int labels2mode(
MACROBLOCK *x,
int const *labelings, int which_label,
@@ -1002,18 +996,19 @@ static int labels2mode(
return cost;
}
-static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels, int which_label, TEMP_CONTEXT *t)
+static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels,
+ int which_label, ENTROPY_CONTEXT *ta,
+ ENTROPY_CONTEXT *tl)
{
int cost = 0;
int b;
MACROBLOCKD *x = &mb->e_mbd;
-
for (b = 0; b < 16; b++)
if (labels[ b] == which_label)
cost += cost_coeffs(mb, x->block + b, 3,
- t->a + vp8_block2above[b],
- t->l + vp8_block2left[b]);
+ ta + vp8_block2above[b],
+ tl + vp8_block2left[b]);
return cost;
@@ -1033,11 +1028,11 @@ static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels
vp8_build_inter_predictors_b(bd, 16, x->e_mbd.subpixel_predict);
ENCODEMB_INVOKE(rtcd, subb)(be, bd, 16);
- x->short_fdct4x4rd(be->src_diff, be->coeff, 32);
+ x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
// set to 0 no way to account for 2nd order DC so discount
//be->coeff[0] = 0;
- x->quantize_brd(be, bd);
+ x->quantize_b(be, bd);
distortion += ENCODEMB_INVOKE(rtcd, berr)(be->coeff, bd->dqcoeff);
}
@@ -1061,13 +1056,13 @@ static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion, const vp
// Fdct and building the 2nd order block
for (beptr = mb->block; beptr < mb->block + 16; beptr += 2)
{
- mb->short_fdct8x4rd(beptr->src_diff, beptr->coeff, 32);
+ mb->vp8_short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
*Y2DCPtr++ = beptr->coeff[0];
*Y2DCPtr++ = beptr->coeff[16];
}
// 2nd order fdct
- if (x->mbmi.mode != SPLITMV)
+ if (x->mode_info_context->mbmi.mode != SPLITMV)
{
mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
}
@@ -1075,20 +1070,20 @@ static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion, const vp
// Quantization
for (b = 0; b < 16; b++)
{
- mb->quantize_brd(&mb->block[b], &mb->e_mbd.block[b]);
+ mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
}
// DC predication and Quantization of 2nd Order block
- if (x->mbmi.mode != SPLITMV)
+ if (x->mode_info_context->mbmi.mode != SPLITMV)
{
{
- mb->quantize_brd(mb_y2, x_y2);
+ mb->quantize_b(mb_y2, x_y2);
}
}
// Distortion
- if (x->mbmi.mode == SPLITMV)
+ if (x->mode_info_context->mbmi.mode == SPLITMV)
d = ENCODEMB_INVOKE(rtcd, mberr)(mb, 0) << 2;
else
{
@@ -1102,15 +1097,19 @@ static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion, const vp
*Rate = vp8_rdcost_mby(mb);
}
+unsigned char vp8_mbsplit_offset2[4][16] = {
+ { 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
+};
static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *best_ref_mv, int best_rd, int *mdcounts, int *returntotrate, int *returnyrate, int *returndistortion, int compressor_speed, int *mvcost[2], int mvthresh, int fullpixel)
{
int i, segmentation;
B_PREDICTION_MODE this_mode;
MACROBLOCKD *xc = &x->e_mbd;
- BLOCK *b = &x->block[0];
- BLOCKD *d = &x->e_mbd.block[0];
- BLOCK *c = &x->block[0];
- BLOCKD *e = &x->e_mbd.block[0];
+ BLOCK *c;
+ BLOCKD *e;
int const *labels;
int best_segment_rd = INT_MAX;
int best_seg = 0;
@@ -1120,6 +1119,8 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
int bsd = 0;
int bestsegmentyrate = 0;
+ static const int segmentation_to_sseshift[4] = {3, 3, 2, 0};
+
// FIX TO Rd error outrange bug PGW 9 june 2004
B_PREDICTION_MODE bmodes[16] = {ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4,
ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4,
@@ -1130,6 +1131,9 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
MV bmvs[16];
int beobs[16];
+ vpx_memset(beobs, 0, sizeof(beobs));
+
+
for (segmentation = 0; segmentation < VP8_NUMMBSPLITS; segmentation++)
{
int label_count;
@@ -1138,56 +1142,33 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
int rate = 0;
int sbr = 0;
int sbd = 0;
- int UNINITIALIZED_IS_SAFE(sseshift);
+ int sseshift;
int segmentyrate = 0;
- vp8_variance_fn_ptr_t v_fn_ptr;
+ vp8_variance_fn_ptr_t *v_fn_ptr;
+
+ ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT *ta;
+ ENTROPY_CONTEXT *tl;
+ ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
+ ENTROPY_CONTEXT *ta_b;
+ ENTROPY_CONTEXT *tl_b;
+
+ vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
- TEMP_CONTEXT t;
- TEMP_CONTEXT tb;
- vp8_setup_temp_context(&t, xc->above_context[Y1CONTEXT], xc->left_context[Y1CONTEXT], 4);
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
+ ta_b = (ENTROPY_CONTEXT *)&t_above_b;
+ tl_b = (ENTROPY_CONTEXT *)&t_left_b;
br = 0;
bd = 0;
- switch (segmentation)
- {
- case 0:
- v_fn_ptr.vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x8);
- v_fn_ptr.svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x8);
- v_fn_ptr.sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8);
- v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3);
- v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d);
- sseshift = 3;
- break;
- case 1:
- v_fn_ptr.vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x16);
- v_fn_ptr.svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x16);
- v_fn_ptr.sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16);
- v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3);
- v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d);
- sseshift = 3;
- break;
- case 2:
- v_fn_ptr.vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x8);
- v_fn_ptr.svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x8);
- v_fn_ptr.sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8);
- v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3);
- v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d);
- sseshift = 2;
- break;
- case 3:
- v_fn_ptr.vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var4x4);
- v_fn_ptr.svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar4x4);
- v_fn_ptr.sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4);
- v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3);
- v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d);
- sseshift = 0;
- break;
- }
-
+ v_fn_ptr = &cpi->fn_ptr[segmentation];
+ sseshift = segmentation_to_sseshift[segmentation];
labels = vp8_mbsplits[segmentation];
- label_count = vp8_count_labels(labels);
+ label_count = vp8_mbsplit_count[segmentation];
// 64 makes this threshold really big effectively
// making it so that we very rarely check mvs on
@@ -1211,14 +1192,9 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
int j;
int bestlabelyrate = 0;
- b = &x->block[0];
- d = &x->e_mbd.block[0];
-
// find first label
- for (j = 0; j < 16; j++)
- if (labels[j] == i)
- break;
+ j = vp8_mbsplit_offset2[segmentation][i];
c = &x->block[j];
e = &x->e_mbd.block[j];
@@ -1230,9 +1206,15 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
int this_rd;
int num00;
int labelyrate;
+ ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
+ ENTROPY_CONTEXT *ta_s;
+ ENTROPY_CONTEXT *tl_s;
+
+ vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES));
- TEMP_CONTEXT ts;
- vp8_setup_temp_context(&ts, &t.a[0], &t.l[0], 4);
+ ta_s = (ENTROPY_CONTEXT *)&t_above_s;
+ tl_s = (ENTROPY_CONTEXT *)&t_left_s;
if (this_mode == NEW4X4)
{
@@ -1251,10 +1233,10 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
int sadpb = x->sadperbit4;
if (cpi->sf.search_method == HEX)
- bestsme = vp8_hex_search(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb/*x->errorperbit*/, &num00, v_fn_ptr.vf, v_fn_ptr.sdf, x->mvsadcost, mvcost);
+ bestsme = vp8_hex_search(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost);
else
{
- bestsme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb / 2/*x->errorperbit*/, &num00, &v_fn_ptr, x->mvsadcost, mvcost);
+ bestsme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb / 2/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost);
n = num00;
num00 = 0;
@@ -1267,7 +1249,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
num00--;
else
{
- thissme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &temp_mv, step_param + n, sadpb / 2/*x->errorperbit*/, &num00, &v_fn_ptr, x->mvsadcost, mvcost);
+ thissme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &temp_mv, step_param + n, sadpb / 2/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost);
if (thissme < bestsme)
{
@@ -1282,7 +1264,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
// Should we do a full search (best quality only)
if ((compressor_speed == 0) && (bestsme >> sseshift) > 4000)
{
- thissme = cpi->full_search_sad(x, c, e, best_ref_mv, sadpb / 4, 16, &v_fn_ptr, x->mvcost, x->mvsadcost);
+ thissme = cpi->full_search_sad(x, c, e, best_ref_mv, sadpb / 4, 16, v_fn_ptr, x->mvcost, x->mvsadcost);
if (thissme < bestsme)
{
@@ -1300,9 +1282,9 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
if (bestsme < INT_MAX)
{
if (!fullpixel)
- cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit / 2, v_fn_ptr.svf, v_fn_ptr.vf, mvcost);
+ cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit / 2, v_fn_ptr, mvcost);
else
- vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit, v_fn_ptr.svf, v_fn_ptr.vf, mvcost);
+ vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit, v_fn_ptr, mvcost);
}
}
@@ -1317,7 +1299,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
distortion = vp8_encode_inter_mb_segment(x, labels, i, IF_RTCD(&cpi->rtcd.encodemb)) / 4;
- labelyrate = rdcost_mbsegment_y(x, labels, i, &ts);
+ labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s);
rate += labelyrate;
this_rd = RDFUNC(x->rdmult, x->rddiv, rate, distortion, cpi->target_bits_per_mb);
@@ -1329,12 +1311,15 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
bestlabelyrate = labelyrate;
mode_selected = this_mode;
best_label_rd = this_rd;
- vp8_setup_temp_context(&tb, &ts.a[0], &ts.l[0], 4);
+
+ vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
}
}
- vp8_setup_temp_context(&t, &tb.a[0], &tb.l[0], 4);
+ vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected], best_ref_mv, mvcost);
@@ -1377,49 +1362,23 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
bd->eob = beobs[i];
}
- // Trap cases where the best split mode has all vectors coded 0,0 (or all the same)
- if (FALSE)
- {
- int allsame = 1;
-
- for (i = 1; i < 16; i++)
- {
- if ((bmvs[i].col != bmvs[i-1].col) || (bmvs[i].row != bmvs[i-1].row))
- {
- allsame = 0;
- break;
- }
- }
-
- if (allsame)
- {
- best_segment_rd = INT_MAX;
- }
- }
-
*returntotrate = bsr;
*returndistortion = bsd;
*returnyrate = bestsegmentyrate;
-
-
// save partitions
labels = vp8_mbsplits[best_seg];
- x->e_mbd.mbmi.partitioning = best_seg;
- x->e_mbd.mbmi.partition_count = vp8_count_labels(labels);
+ x->e_mbd.mode_info_context->mbmi.partitioning = best_seg;
+ x->partition_info->count = vp8_mbsplit_count[best_seg];
- for (i = 0; i < x->e_mbd.mbmi.partition_count; i++)
+ for (i = 0; i < x->partition_info->count; i++)
{
int j;
- for (j = 0; j < 16; j++)
- {
- if (labels[j] == i)
- break;
- }
+ j = vp8_mbsplit_offset2[best_seg][i];
- x->e_mbd.mbmi.partition_bmi[i].mode = x->e_mbd.block[j].bmi.mode;
- x->e_mbd.mbmi.partition_bmi[i].mv.as_mv = x->e_mbd.block[j].bmi.mv.as_mv;
+ x->partition_info->bmi[i].mode = x->e_mbd.block[j].bmi.mode;
+ x->partition_info->bmi[i].mv.as_mv = x->e_mbd.block[j].bmi.mv.as_mv;
}
return best_segment_rd;
@@ -1433,6 +1392,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
MACROBLOCKD *xd = &x->e_mbd;
B_MODE_INFO best_bmodes[16];
MB_MODE_INFO best_mbmode;
+ PARTITION_INFO best_partition;
MV best_ref_mv;
MV mode_mv[MB_MODE_COUNT];
MB_PREDICTION_MODE this_mode;
@@ -1464,6 +1424,8 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
*returnintra = INT_MAX;
+ vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); // clean
+
cpi->mbs_tested_so_far++; // Count of the number of MBs tested so far this frame
x->skip = 0;
@@ -1517,9 +1479,9 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
vpx_memset(mode_mv, 0, sizeof(mode_mv));
- x->e_mbd.mbmi.ref_frame = INTRA_FRAME;
+ x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
vp8_rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, &uv_intra_rate_tokenonly, &uv_intra_distortion);
- uv_intra_mode = x->e_mbd.mbmi.uv_mode;
+ uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
{
uvintra_eob = 0;
@@ -1541,7 +1503,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
//all_rds[mode_index] = -1;
//all_rates[mode_index] = -1;
//all_dist[mode_index] = -1;
- //intermodecost[mode_index] = -1;
+ //intermodecost[mode_index] = -1;
// Test best rd so far against threshold for trying this mode.
if (best_rd <= cpi->rd_threshes[mode_index])
@@ -1563,31 +1525,34 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
this_mode = vp8_mode_order[mode_index];
- x->e_mbd.mbmi.mode = this_mode;
- x->e_mbd.mbmi.uv_mode = DC_PRED;
- x->e_mbd.mbmi.ref_frame = vp8_ref_frame_order[mode_index];
+ x->e_mbd.mode_info_context->mbmi.mode = this_mode;
+ x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
+ x->e_mbd.mode_info_context->mbmi.ref_frame = vp8_ref_frame_order[mode_index];
//Only consider ZEROMV/ALTREF_FRAME for alt ref frame.
if (cpi->is_src_frame_alt_ref)
{
- if (this_mode != ZEROMV || x->e_mbd.mbmi.ref_frame != ALTREF_FRAME)
+ if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME)
continue;
}
- if (x->e_mbd.mbmi.ref_frame == LAST_FRAME)
+ if (x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
{
+ YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
+
if (!(cpi->ref_frame_flags & VP8_LAST_FLAG))
continue;
lf_or_gf = 0; // Local last frame vs Golden frame flag
// Set up pointers for this macro block into the previous frame recon buffer
- x->e_mbd.pre.y_buffer = cpi->common.last_frame.y_buffer + recon_yoffset;
- x->e_mbd.pre.u_buffer = cpi->common.last_frame.u_buffer + recon_uvoffset;
- x->e_mbd.pre.v_buffer = cpi->common.last_frame.v_buffer + recon_uvoffset;
+ x->e_mbd.pre.y_buffer = lst_yv12->y_buffer + recon_yoffset;
+ x->e_mbd.pre.u_buffer = lst_yv12->u_buffer + recon_uvoffset;
+ x->e_mbd.pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset;
}
- else if (x->e_mbd.mbmi.ref_frame == GOLDEN_FRAME)
+ else if (x->e_mbd.mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
{
+ YV12_BUFFER_CONFIG *gld_yv12 = &cpi->common.yv12_fb[cpi->common.gld_fb_idx];
// not supposed to reference gold frame
if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG))
@@ -1596,12 +1561,14 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
lf_or_gf = 1; // Local last frame vs Golden frame flag
// Set up pointers for this macro block into the previous frame recon buffer
- x->e_mbd.pre.y_buffer = cpi->common.golden_frame.y_buffer + recon_yoffset;
- x->e_mbd.pre.u_buffer = cpi->common.golden_frame.u_buffer + recon_uvoffset;
- x->e_mbd.pre.v_buffer = cpi->common.golden_frame.v_buffer + recon_uvoffset;
+ x->e_mbd.pre.y_buffer = gld_yv12->y_buffer + recon_yoffset;
+ x->e_mbd.pre.u_buffer = gld_yv12->u_buffer + recon_uvoffset;
+ x->e_mbd.pre.v_buffer = gld_yv12->v_buffer + recon_uvoffset;
}
- else if (x->e_mbd.mbmi.ref_frame == ALTREF_FRAME)
+ else if (x->e_mbd.mode_info_context->mbmi.ref_frame == ALTREF_FRAME)
{
+ YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx];
+
// not supposed to reference alt ref frame
if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
continue;
@@ -1612,19 +1579,19 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
lf_or_gf = 1; // Local last frame vs Golden frame flag
// Set up pointers for this macro block into the previous frame recon buffer
- x->e_mbd.pre.y_buffer = cpi->common.alt_ref_frame.y_buffer + recon_yoffset;
- x->e_mbd.pre.u_buffer = cpi->common.alt_ref_frame.u_buffer + recon_uvoffset;
- x->e_mbd.pre.v_buffer = cpi->common.alt_ref_frame.v_buffer + recon_uvoffset;
+ x->e_mbd.pre.y_buffer = alt_yv12->y_buffer + recon_yoffset;
+ x->e_mbd.pre.u_buffer = alt_yv12->u_buffer + recon_uvoffset;
+ x->e_mbd.pre.v_buffer = alt_yv12->v_buffer + recon_uvoffset;
}
vp8_find_near_mvs(&x->e_mbd,
x->e_mbd.mode_info_context,
&mode_mv[NEARESTMV], &mode_mv[NEARMV], &best_ref_mv,
- mdcounts, x->e_mbd.mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
+ mdcounts, x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
// Estimate the reference frame signaling cost and add it to the rolling cost variable.
- frame_cost = ref_frame_cost[x->e_mbd.mbmi.ref_frame];
+ frame_cost = ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
rate2 += frame_cost;
if (this_mode <= B_PRED)
@@ -1691,9 +1658,9 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
int breakout_rd = best_rd - frame_cost_rd;
int tmp_rd;
- if (x->e_mbd.mbmi.ref_frame == LAST_FRAME)
+ if (x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, breakout_rd, mdcounts, &rate, &rate_y, &distortion, cpi->compressor_speed, x->mvcost, cpi->rd_threshes[THR_NEWMV], cpi->common.full_pixel) ;
- else if (x->e_mbd.mbmi.ref_frame == GOLDEN_FRAME)
+ else if (x->e_mbd.mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, breakout_rd, mdcounts, &rate, &rate_y, &distortion, cpi->compressor_speed, x->mvcost, cpi->rd_threshes[THR_NEWG], cpi->common.full_pixel) ;
else
tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, breakout_rd, mdcounts, &rate, &rate_y, &distortion, cpi->compressor_speed, x->mvcost, cpi->rd_threshes[THR_NEWA], cpi->common.full_pixel) ;
@@ -1747,19 +1714,19 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
}
// trap cases where the 8x8s can be promoted to 8x16s or 16x8s
- if (0)//x->e_mbd.mbmi.partition_count == 4)
+ if (0)//x->partition_info->count == 4)
{
- if (x->e_mbd.mbmi.partition_bmi[0].mv.as_int == x->e_mbd.mbmi.partition_bmi[1].mv.as_int
- && x->e_mbd.mbmi.partition_bmi[2].mv.as_int == x->e_mbd.mbmi.partition_bmi[3].mv.as_int)
+ if (x->partition_info->bmi[0].mv.as_int == x->partition_info->bmi[1].mv.as_int
+ && x->partition_info->bmi[2].mv.as_int == x->partition_info->bmi[3].mv.as_int)
{
const int *labels = vp8_mbsplits[2];
- x->e_mbd.mbmi.partitioning = 0;
+ x->e_mbd.mode_info_context->mbmi.partitioning = 0;
rate -= vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + 2);
rate += vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings);
- //rate -= x->inter_bmode_costs[ x->e_mbd.mbmi.partition_bmi[1]];
- //rate -= x->inter_bmode_costs[ x->e_mbd.mbmi.partition_bmi[3]];
- x->e_mbd.mbmi.partition_bmi[1] = x->e_mbd.mbmi.partition_bmi[2];
+ //rate -= x->inter_bmode_costs[ x->partition_info->bmi[1]];
+ //rate -= x->inter_bmode_costs[ x->partition_info->bmi[3]];
+ x->partition_info->bmi[1] = x->partition_info->bmi[2];
}
}
@@ -1769,14 +1736,14 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
case V_PRED:
case H_PRED:
case TM_PRED:
- x->e_mbd.mbmi.ref_frame = INTRA_FRAME;
+ x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
{
macro_block_yrd(x, &rate, &distortion, IF_RTCD(&cpi->rtcd.encodemb)) ;
rate2 += rate;
rate_y = rate;
distortion2 += distortion;
- rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mbmi.mode];
+ rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
rate2 += uv_intra_rate;
rate_uv = uv_intra_rate_tokenonly;
distortion2 += uv_intra_distortion;
@@ -1811,13 +1778,13 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
if (cpi->sf.search_method == HEX)
{
- bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, cpi->fn_ptr.vf, cpi->fn_ptr.sdf, x->mvsadcost, x->mvcost);
+ bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost);
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
}
else
{
- bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb < 9
+ bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb < 9
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
@@ -1836,7 +1803,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
num00--;
else
{
- thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb = 9
+ thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb = 9
if (thissme < bestsme)
{
@@ -1873,7 +1840,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
search_range = (search_range > cpi->sf.max_fs_radius) ? cpi->sf.max_fs_radius : search_range;
{
int sadpb = x->sadperbit16 >> 2;
- thissme = cpi->full_search_sad(x, b, d, &best_ref_mv, sadpb, search_range, &cpi->fn_ptr, x->mvcost, x->mvsadcost);
+ thissme = cpi->full_search_sad(x, b, d, &best_ref_mv, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, x->mvsadcost);
}
// Barrier threshold to initiating full search
@@ -1898,7 +1865,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
if (bestsme < INT_MAX)
// cpi->find_fractional_mv_step(x,b,d,&d->bmi.mv.as_mv,&best_ref_mv,x->errorperbit/2,cpi->fn_ptr.svf,cpi->fn_ptr.vf,x->mvcost); // normal mvc=11
- cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, cpi->fn_ptr.svf, cpi->fn_ptr.vf, x->mvcost);
+ cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost);
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
@@ -2082,7 +2049,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
//all_rates[mode_index] = rate2;
//all_dist[mode_index] = distortion2;
- if ((x->e_mbd.mbmi.ref_frame == INTRA_FRAME) && (this_rd < *returnintra))
+ if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) && (this_rd < *returnintra))
{
*returnintra = this_rd ;
}
@@ -2092,17 +2059,18 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
{
// Note index of best mode so far
best_mode_index = mode_index;
- x->e_mbd.mbmi.force_no_skip = force_no_skip;
+ x->e_mbd.mode_info_context->mbmi.force_no_skip = force_no_skip;
if (this_mode <= B_PRED)
{
- x->e_mbd.mbmi.uv_mode = uv_intra_mode;
+ x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode;
}
*returnrate = rate2;
*returndistortion = distortion2;
best_rd = this_rd;
- vpx_memcpy(&best_mbmode, &x->e_mbd.mbmi, sizeof(MB_MODE_INFO));
+ vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
+ vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO));
for (i = 0; i < 16; i++)
{
@@ -2183,28 +2151,30 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
best_mbmode.partitioning = 0;
best_mbmode.dc_diff = 0;
- vpx_memcpy(&x->e_mbd.mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+ vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+ vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
for (i = 0; i < 16; i++)
{
vpx_memset(&x->e_mbd.block[i].bmi, 0, sizeof(B_MODE_INFO));
}
- x->e_mbd.mbmi.mv.as_int = 0;
+ x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
return best_rd;
}
// macroblock modes
- vpx_memcpy(&x->e_mbd.mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+ vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+ vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
for (i = 0; i < 16; i++)
{
vpx_memcpy(&x->e_mbd.block[i].bmi, &best_bmodes[i], sizeof(B_MODE_INFO));
}
- x->e_mbd.mbmi.mv.as_mv = x->e_mbd.block[15].bmi.mv.as_mv;
+ x->e_mbd.mode_info_context->mbmi.mv.as_mv = x->e_mbd.block[15].bmi.mv.as_mv;
return best_rd;
}
diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h
index c6eae4b92..fb74dd431 100644
--- a/vp8/encoder/rdopt.h
+++ b/vp8/encoder/rdopt.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/encoder/sad_c.c b/vp8/encoder/sad_c.c
index 74c6bd76a..5eaca5935 100644
--- a/vp8/encoder/sad_c.c
+++ b/vp8/encoder/sad_c.c
@@ -1,19 +1,20 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
#include <stdlib.h>
unsigned int vp8_sad16x16_c(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int ref_stride,
int max_sad)
{
@@ -38,9 +39,9 @@ unsigned int vp8_sad16x16_c(
static __inline
unsigned int sad_mx_n_c(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int ref_stride,
int m,
int n)
@@ -65,9 +66,9 @@ unsigned int sad_mx_n_c(
unsigned int vp8_sad8x8_c(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int ref_stride,
int max_sad)
{
@@ -77,9 +78,9 @@ unsigned int vp8_sad8x8_c(
unsigned int vp8_sad16x8_c(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int ref_stride,
int max_sad)
{
@@ -90,9 +91,9 @@ unsigned int vp8_sad16x8_c(
unsigned int vp8_sad8x16_c(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int ref_stride,
int max_sad)
{
@@ -102,9 +103,9 @@ unsigned int vp8_sad8x16_c(
unsigned int vp8_sad4x4_c(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int ref_stride,
int max_sad)
{
@@ -113,9 +114,9 @@ unsigned int vp8_sad4x4_c(
}
void vp8_sad16x16x3_c(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int ref_stride,
unsigned int *sad_array
)
@@ -125,10 +126,28 @@ void vp8_sad16x16x3_c(
sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
}
+void vp8_sad16x16x8_c(
+ const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned short *sad_array
+)
+{
+ sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
+ sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
+ sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
+ sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
+ sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
+ sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
+}
+
void vp8_sad16x8x3_c(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int ref_stride,
unsigned int *sad_array
)
@@ -138,10 +157,28 @@ void vp8_sad16x8x3_c(
sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
}
+void vp8_sad16x8x8_c(
+ const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned short *sad_array
+)
+{
+ sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
+ sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
+ sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
+ sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
+ sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
+ sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
+}
+
void vp8_sad8x8x3_c(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int ref_stride,
unsigned int *sad_array
)
@@ -151,10 +188,28 @@ void vp8_sad8x8x3_c(
sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
}
+void vp8_sad8x8x8_c(
+ const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned short *sad_array
+)
+{
+ sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
+ sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
+ sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
+ sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
+ sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
+ sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
+}
+
void vp8_sad8x16x3_c(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int ref_stride,
unsigned int *sad_array
)
@@ -164,10 +219,28 @@ void vp8_sad8x16x3_c(
sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
}
+void vp8_sad8x16x8_c(
+ const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned short *sad_array
+)
+{
+ sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
+ sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
+ sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
+ sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
+ sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
+ sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
+}
+
void vp8_sad4x4x3_c(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int ref_stride,
unsigned int *sad_array
)
@@ -177,8 +250,26 @@ void vp8_sad4x4x3_c(
sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
}
+void vp8_sad4x4x8_c(
+ const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned short *sad_array
+)
+{
+ sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
+ sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
+ sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
+ sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
+ sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
+ sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
+}
+
void vp8_sad16x16x4d_c(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_stride,
unsigned char *ref_ptr[],
int ref_stride,
@@ -192,7 +283,7 @@ void vp8_sad16x16x4d_c(
}
void vp8_sad16x8x4d_c(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_stride,
unsigned char *ref_ptr[],
int ref_stride,
@@ -206,7 +297,7 @@ void vp8_sad16x8x4d_c(
}
void vp8_sad8x8x4d_c(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_stride,
unsigned char *ref_ptr[],
int ref_stride,
@@ -220,7 +311,7 @@ void vp8_sad8x8x4d_c(
}
void vp8_sad8x16x4d_c(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_stride,
unsigned char *ref_ptr[],
int ref_stride,
@@ -234,7 +325,7 @@ void vp8_sad8x16x4d_c(
}
void vp8_sad4x4x4d_c(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_stride,
unsigned char *ref_ptr[],
int ref_stride,
diff --git a/vp8/common/segmentation_common.c b/vp8/encoder/segmentation.c
index 72b8c874b..fc0967db3 100644
--- a/vp8/common/segmentation_common.c
+++ b/vp8/encoder/segmentation.c
@@ -1,29 +1,30 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
-#include "segmentation_common.h"
+#include "segmentation.h"
#include "vpx_mem/vpx_mem.h"
-void vp8_update_gf_useage_maps(VP8_COMMON *cm, MACROBLOCKD *xd)
+void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x)
{
int mb_row, mb_col;
MODE_INFO *this_mb_mode_info = cm->mi;
- xd->gf_active_ptr = (signed char *)cm->gf_active_flags;
+ x->gf_active_ptr = (signed char *)cpi->gf_active_flags;
if ((cm->frame_type == KEY_FRAME) || (cm->refresh_golden_frame))
{
// Reset Gf useage monitors
- vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
- cm->gf_active_count = cm->mb_rows * cm->mb_cols;
+ vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
+ cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
}
else
{
@@ -39,19 +40,19 @@ void vp8_update_gf_useage_maps(VP8_COMMON *cm, MACROBLOCKD *xd)
// else if using non 0,0 motion or intra modes then clear flag if it is currently set
if ((this_mb_mode_info->mbmi.ref_frame == GOLDEN_FRAME) || (this_mb_mode_info->mbmi.ref_frame == ALTREF_FRAME))
{
- if (*(xd->gf_active_ptr) == 0)
+ if (*(x->gf_active_ptr) == 0)
{
- *(xd->gf_active_ptr) = 1;
- cm->gf_active_count ++;
+ *(x->gf_active_ptr) = 1;
+ cpi->gf_active_count ++;
}
}
- else if ((this_mb_mode_info->mbmi.mode != ZEROMV) && *(xd->gf_active_ptr))
+ else if ((this_mb_mode_info->mbmi.mode != ZEROMV) && *(x->gf_active_ptr))
{
- *(xd->gf_active_ptr) = 0;
- cm->gf_active_count--;
+ *(x->gf_active_ptr) = 0;
+ cpi->gf_active_count--;
}
- xd->gf_active_ptr++; // Step onto next entry
+ x->gf_active_ptr++; // Step onto next entry
this_mb_mode_info++; // skip to next mb
}
diff --git a/vp8/encoder/segmentation.h b/vp8/encoder/segmentation.h
new file mode 100644
index 000000000..216e194c2
--- /dev/null
+++ b/vp8/encoder/segmentation.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "string.h"
+#include "blockd.h"
+#include "onyx_int.h"
+
+extern void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x);
diff --git a/vp8/encoder/ssim.c b/vp8/encoder/ssim.c
index df214a89f..4ebcba1a1 100644
--- a/vp8/encoder/ssim.c
+++ b/vp8/encoder/ssim.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
new file mode 100644
index 000000000..fd5dd7ede
--- /dev/null
+++ b/vp8/encoder/temporal_filter.c
@@ -0,0 +1,651 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "onyxc_int.h"
+#include "onyx_int.h"
+#include "systemdependent.h"
+#include "quantize.h"
+#include "alloccommon.h"
+#include "mcomp.h"
+#include "firstpass.h"
+#include "psnr.h"
+#include "vpx_scale/vpxscale.h"
+#include "extend.h"
+#include "ratectrl.h"
+#include "quant_common.h"
+#include "segmentation.h"
+#include "g_common.h"
+#include "vpx_scale/yv12extend.h"
+#include "postproc.h"
+#include "vpx_mem/vpx_mem.h"
+#include "swapyv12buffer.h"
+#include "threading.h"
+#include "vpx_ports/vpx_timer.h"
+#include "vpxerrors.h"
+
+#include <math.h>
+#include <limits.h>
+
+#define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering
+#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
+
+#define USE_FILTER_LUT 1
+#if VP8_TEMPORAL_ALT_REF
+
+#if USE_FILTER_LUT
+static int modifier_lut[7][19] =
+{
+ // Strength=0
+ {16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ // Strength=1
+ {16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ // Strength=2
+ {16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ // Strength=3
+ {16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ // Strength=4
+ {16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ // Strength=5
+ {16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0},
+ // Strength=6
+ {16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1}
+};
+#endif
+static void build_predictors_mb
+(
+ MACROBLOCKD *x,
+ unsigned char *y_mb_ptr,
+ unsigned char *u_mb_ptr,
+ unsigned char *v_mb_ptr,
+ int stride,
+ int mv_row,
+ int mv_col,
+ unsigned char *pred
+)
+{
+ int offset;
+ unsigned char *yptr, *uptr, *vptr;
+
+ // Y
+ yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3);
+
+ if ((mv_row | mv_col) & 7)
+ {
+// vp8_sixtap_predict16x16_c(yptr, stride,
+// mv_col & 7, mv_row & 7, &pred[0], 16);
+ x->subpixel_predict16x16(yptr, stride,
+ mv_col & 7, mv_row & 7, &pred[0], 16);
+ }
+ else
+ {
+ //vp8_copy_mem16x16_c (yptr, stride, &pred[0], 16);
+ RECON_INVOKE(&x->rtcd->recon, copy16x16)(yptr, stride, &pred[0], 16);
+ }
+
+ // U & V
+ mv_row >>= 1;
+ mv_col >>= 1;
+ stride >>= 1;
+ offset = (mv_row >> 3) * stride + (mv_col >> 3);
+ uptr = u_mb_ptr + offset;
+ vptr = v_mb_ptr + offset;
+
+ if ((mv_row | mv_col) & 7)
+ {
+ x->subpixel_predict8x8(uptr, stride,
+ mv_col & 7, mv_row & 7, &pred[256], 8);
+ x->subpixel_predict8x8(vptr, stride,
+ mv_col & 7, mv_row & 7, &pred[320], 8);
+ }
+ else
+ {
+ RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, stride, &pred[256], 8);
+ RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, stride, &pred[320], 8);
+ }
+}
+static void apply_temporal_filter
+(
+ unsigned char *frame1,
+ unsigned int stride,
+ unsigned char *frame2,
+ unsigned int block_size,
+ int strength,
+ int filter_weight,
+ unsigned int *accumulator,
+ unsigned int *count
+)
+{
+ int i, j, k;
+ int modifier;
+ int byte = 0;
+
+#if USE_FILTER_LUT
+ int *lut = modifier_lut[strength];
+#endif
+
+ for (i = 0,k = 0; i < block_size; i++)
+ {
+ for (j = 0; j < block_size; j++, k++)
+ {
+
+ int src_byte = frame1[byte];
+ int pixel_value = *frame2++;
+
+#if USE_FILTER_LUT
+ // LUT implementation --
+ // improves precision of filter
+ modifier = abs(src_byte-pixel_value);
+ modifier = modifier>18 ? 0 : lut[modifier];
+#else
+ modifier = src_byte;
+ modifier -= pixel_value;
+ modifier *= modifier;
+ modifier >>= strength;
+ modifier *= 3;
+
+ if (modifier > 16)
+ modifier = 16;
+
+ modifier = 16 - modifier;
+#endif
+ modifier *= filter_weight;
+
+ count[k] += modifier;
+ accumulator[k] += modifier * pixel_value;
+
+ byte++;
+ }
+
+ byte += stride - block_size;
+ }
+}
+
+#if ALT_REF_MC_ENABLED
+static int dummy_cost[2*mv_max+1];
+
+static int find_matching_mb
+(
+ VP8_COMP *cpi,
+ YV12_BUFFER_CONFIG *arf_frame,
+ YV12_BUFFER_CONFIG *frame_ptr,
+ int mb_offset,
+ int error_thresh
+)
+{
+ MACROBLOCK *x = &cpi->mb;
+ int thissme;
+ int step_param;
+ int further_steps;
+ int n = 0;
+ int sadpb = x->sadperbit16;
+ int bestsme = INT_MAX;
+ int num00 = 0;
+
+ BLOCK *b = &x->block[0];
+ BLOCKD *d = &x->e_mbd.block[0];
+ MV best_ref_mv1 = {0,0};
+
+ int *mvcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
+ int *mvsadcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
+
+ // Save input state
+ unsigned char **base_src = b->base_src;
+ int src = b->src;
+ int src_stride = b->src_stride;
+ unsigned char **base_pre = d->base_pre;
+ int pre = d->pre;
+ int pre_stride = d->pre_stride;
+
+ // Setup frame pointers
+ b->base_src = &arf_frame->y_buffer;
+ b->src_stride = arf_frame->y_stride;
+ b->src = mb_offset;
+
+ d->base_pre = &frame_ptr->y_buffer;
+ d->pre_stride = frame_ptr->y_stride;
+ d->pre = mb_offset;
+
+ // Further step/diamond searches as necessary
+ if (cpi->Speed < 8)
+ {
+ step_param = cpi->sf.first_step +
+ ((cpi->Speed > 5) ? 1 : 0);
+ further_steps =
+ (cpi->sf.max_step_search_steps - 1)-step_param;
+ }
+ else
+ {
+ step_param = cpi->sf.first_step + 2;
+ further_steps = 0;
+ }
+
+ if (1/*cpi->sf.search_method == HEX*/)
+ {
+ // TODO Check that the 16x16 vf & sdf are selected here
+ bestsme = vp8_hex_search(x, b, d,
+ &best_ref_mv1, &d->bmi.mv.as_mv,
+ step_param,
+ sadpb/*x->errorperbit*/,
+ &num00, &cpi->fn_ptr[BLOCK_16X16],
+ mvsadcost, mvcost);
+ }
+ else
+ {
+ int mv_x, mv_y;
+
+ bestsme = cpi->diamond_search_sad(x, b, d,
+ &best_ref_mv1, &d->bmi.mv.as_mv,
+ step_param,
+ sadpb / 2/*x->errorperbit*/,
+ &num00, &cpi->fn_ptr[BLOCK_16X16],
+ mvsadcost, mvcost); //sadpb < 9
+
+ // Further step/diamond searches as necessary
+ n = 0;
+ //further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
+
+ n = num00;
+ num00 = 0;
+
+ while (n < further_steps)
+ {
+ n++;
+
+ if (num00)
+ num00--;
+ else
+ {
+ thissme = cpi->diamond_search_sad(x, b, d,
+ &best_ref_mv1, &d->bmi.mv.as_mv,
+ step_param + n,
+ sadpb / 4/*x->errorperbit*/,
+ &num00, &cpi->fn_ptr[BLOCK_16X16],
+ mvsadcost, mvcost); //sadpb = 9
+
+ if (thissme < bestsme)
+ {
+ bestsme = thissme;
+ mv_y = d->bmi.mv.as_mv.row;
+ mv_x = d->bmi.mv.as_mv.col;
+ }
+ else
+ {
+ d->bmi.mv.as_mv.row = mv_y;
+ d->bmi.mv.as_mv.col = mv_x;
+ }
+ }
+ }
+ }
+
+#if ALT_REF_SUBPEL_ENABLED
+ // Try sub-pixel MC?
+ //if (bestsme > error_thresh && bestsme < INT_MAX)
+ {
+ bestsme = cpi->find_fractional_mv_step(x, b, d,
+ &d->bmi.mv.as_mv, &best_ref_mv1,
+ x->errorperbit, &cpi->fn_ptr[BLOCK_16X16],
+ cpi->mb.mvcost);
+ }
+#endif
+
+ // Save input state
+ b->base_src = base_src;
+ b->src = src;
+ b->src_stride = src_stride;
+ d->base_pre = base_pre;
+ d->pre = pre;
+ d->pre_stride = pre_stride;
+
+ return bestsme;
+}
+#endif
+
+static void vp8cx_temp_blur1_c
+(
+ VP8_COMP *cpi,
+ int frame_count,
+ int alt_ref_index,
+ int strength
+)
+{
+ int byte;
+ int frame;
+ int mb_col, mb_row;
+ unsigned int filter_weight[MAX_LAG_BUFFERS];
+ unsigned char *mm_ptr = cpi->fp_motion_map;
+ int cols = cpi->common.mb_cols;
+ int rows = cpi->common.mb_rows;
+ int MBs = cpi->common.MBs;
+ int mb_y_offset = 0;
+ int mb_uv_offset = 0;
+ unsigned int accumulator[384];
+ unsigned int count[384];
+ MACROBLOCKD *mbd = &cpi->mb.e_mbd;
+ YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index];
+ unsigned char *dst1, *dst2;
+ DECLARE_ALIGNED(16, unsigned char, predictor[384]);
+
+ // Save input state
+ unsigned char *y_buffer = mbd->pre.y_buffer;
+ unsigned char *u_buffer = mbd->pre.u_buffer;
+ unsigned char *v_buffer = mbd->pre.v_buffer;
+
+ if (!cpi->use_weighted_temporal_filter)
+ {
+ // Temporal filtering is unweighted
+ for (frame = 0; frame < frame_count; frame++)
+ filter_weight[frame] = 1;
+ }
+
+ for (mb_row = 0; mb_row < rows; mb_row++)
+ {
+#if ALT_REF_MC_ENABLED
+ // Reduced search extent by 3 for 6-tap filter & smaller UMV border
+ cpi->mb.mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 19));
+ cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
+ + (VP8BORDERINPIXELS - 19);
+#endif
+
+ for (mb_col = 0; mb_col < cols; mb_col++)
+ {
+ int i, j, k, w;
+ int weight_cap;
+ int stride;
+
+ vpx_memset(accumulator, 0, 384*sizeof(unsigned int));
+ vpx_memset(count, 0, 384*sizeof(unsigned int));
+
+#if ALT_REF_MC_ENABLED
+ // Reduced search extent by 3 for 6-tap filter & smaller UMV border
+ cpi->mb.mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 19));
+ cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16)
+ + (VP8BORDERINPIXELS - 19);
+#endif
+
+ // Read & process macroblock weights from motion map
+ if (cpi->use_weighted_temporal_filter)
+ {
+ weight_cap = 2;
+
+ for (frame = alt_ref_index-1; frame >= 0; frame--)
+ {
+ w = *(mm_ptr + (frame+1)*MBs);
+ filter_weight[frame] = w < weight_cap ? w : weight_cap;
+ weight_cap = w;
+ }
+
+ filter_weight[alt_ref_index] = 2;
+
+ weight_cap = 2;
+
+ for (frame = alt_ref_index+1; frame < frame_count; frame++)
+ {
+ w = *(mm_ptr + frame*MBs);
+ filter_weight[frame] = w < weight_cap ? w : weight_cap;
+ weight_cap = w;
+ }
+
+ }
+
+ for (frame = 0; frame < frame_count; frame++)
+ {
+ int err;
+
+ if (cpi->frames[frame] == NULL)
+ continue;
+
+ mbd->block[0].bmi.mv.as_mv.row = 0;
+ mbd->block[0].bmi.mv.as_mv.col = 0;
+
+#if ALT_REF_MC_ENABLED
+ //if (filter_weight[frame] == 0)
+ {
+#define THRESH_LOW 10000
+#define THRESH_HIGH 20000
+
+ // Correlation has been lost try MC
+ err = find_matching_mb ( cpi,
+ cpi->frames[alt_ref_index],
+ cpi->frames[frame],
+ mb_y_offset,
+ THRESH_LOW );
+
+ if (filter_weight[frame] < 2)
+ {
+ // Set weight depending on error
+ filter_weight[frame] = err<THRESH_LOW
+ ? 2 : err<THRESH_HIGH ? 1 : 0;
+ }
+ }
+#endif
+ if (filter_weight[frame] != 0)
+ {
+ // Construct the predictors
+ build_predictors_mb (
+ mbd,
+ cpi->frames[frame]->y_buffer + mb_y_offset,
+ cpi->frames[frame]->u_buffer + mb_uv_offset,
+ cpi->frames[frame]->v_buffer + mb_uv_offset,
+ cpi->frames[frame]->y_stride,
+ mbd->block[0].bmi.mv.as_mv.row,
+ mbd->block[0].bmi.mv.as_mv.col,
+ predictor );
+
+ // Apply the filter (YUV)
+ apply_temporal_filter ( f->y_buffer + mb_y_offset,
+ f->y_stride,
+ predictor,
+ 16,
+ strength,
+ filter_weight[frame],
+ accumulator,
+ count );
+
+ apply_temporal_filter ( f->u_buffer + mb_uv_offset,
+ f->uv_stride,
+ predictor + 256,
+ 8,
+ strength,
+ filter_weight[frame],
+ accumulator + 256,
+ count + 256 );
+
+ apply_temporal_filter ( f->v_buffer + mb_uv_offset,
+ f->uv_stride,
+ predictor + 320,
+ 8,
+ strength,
+ filter_weight[frame],
+ accumulator + 320,
+ count + 320 );
+ }
+ }
+
+ // Normalize filter output to produce AltRef frame
+ dst1 = cpi->alt_ref_buffer.source_buffer.y_buffer;
+ stride = cpi->alt_ref_buffer.source_buffer.y_stride;
+ byte = mb_y_offset;
+ for (i = 0,k = 0; i < 16; i++)
+ {
+ for (j = 0; j < 16; j++, k++)
+ {
+ unsigned int pval = accumulator[k] + (count[k] >> 1);
+ pval *= cpi->fixed_divide[count[k]];
+ pval >>= 19;
+
+ dst1[byte] = (unsigned char)pval;
+
+ // move to next pixel
+ byte++;
+ }
+
+ byte += stride - 16;
+ }
+
+ dst1 = cpi->alt_ref_buffer.source_buffer.u_buffer;
+ dst2 = cpi->alt_ref_buffer.source_buffer.v_buffer;
+ stride = cpi->alt_ref_buffer.source_buffer.uv_stride;
+ byte = mb_uv_offset;
+ for (i = 0,k = 256; i < 8; i++)
+ {
+ for (j = 0; j < 8; j++, k++)
+ {
+ int m=k+64;
+
+ // U
+ unsigned int pval = accumulator[k] + (count[k] >> 1);
+ pval *= cpi->fixed_divide[count[k]];
+ pval >>= 19;
+ dst1[byte] = (unsigned char)pval;
+
+ // V
+ pval = accumulator[m] + (count[m] >> 1);
+ pval *= cpi->fixed_divide[count[m]];
+ pval >>= 19;
+ dst2[byte] = (unsigned char)pval;
+
+ // move to next pixel
+ byte++;
+ }
+
+ byte += stride - 8;
+ }
+
+ mm_ptr++;
+ mb_y_offset += 16;
+ mb_uv_offset += 8;
+ }
+
+ mb_y_offset += 16*f->y_stride-f->y_width;
+ mb_uv_offset += 8*f->uv_stride-f->uv_width;
+ }
+
+ // Restore input state
+ mbd->pre.y_buffer = y_buffer;
+ mbd->pre.u_buffer = u_buffer;
+ mbd->pre.v_buffer = v_buffer;
+}
+
+void vp8cx_temp_filter_c
+(
+ VP8_COMP *cpi
+)
+{
+ int frame = 0;
+
+ int num_frames_backward = 0;
+ int num_frames_forward = 0;
+ int frames_to_blur_backward = 0;
+ int frames_to_blur_forward = 0;
+ int frames_to_blur = 0;
+ int start_frame = 0;
+ unsigned int filtered = 0;
+
+ int strength = cpi->oxcf.arnr_strength;
+
+ int blur_type = cpi->oxcf.arnr_type;
+
+ int max_frames = cpi->active_arnr_frames;
+
+ num_frames_backward = cpi->last_alt_ref_sei - cpi->source_encode_index;
+
+ if (num_frames_backward < 0)
+ num_frames_backward += cpi->oxcf.lag_in_frames;
+
+ num_frames_forward = cpi->oxcf.lag_in_frames - (num_frames_backward + 1);
+
+ switch (blur_type)
+ {
+ case 1:
+ /////////////////////////////////////////
+ // Backward Blur
+
+ frames_to_blur_backward = num_frames_backward;
+
+ if (frames_to_blur_backward >= max_frames)
+ frames_to_blur_backward = max_frames - 1;
+
+ frames_to_blur = frames_to_blur_backward + 1;
+ break;
+
+ case 2:
+ /////////////////////////////////////////
+ // Forward Blur
+
+ frames_to_blur_forward = num_frames_forward;
+
+ if (frames_to_blur_forward >= max_frames)
+ frames_to_blur_forward = max_frames - 1;
+
+ frames_to_blur = frames_to_blur_forward + 1;
+ break;
+
+ case 3:
+ default:
+ /////////////////////////////////////////
+ // Center Blur
+ frames_to_blur_forward = num_frames_forward;
+ frames_to_blur_backward = num_frames_backward;
+
+ if (frames_to_blur_forward > frames_to_blur_backward)
+ frames_to_blur_forward = frames_to_blur_backward;
+
+ if (frames_to_blur_backward > frames_to_blur_forward)
+ frames_to_blur_backward = frames_to_blur_forward;
+
+ // When max_frames is even we have 1 more frame backward than forward
+ if (frames_to_blur_forward > (max_frames - 1) / 2)
+ frames_to_blur_forward = ((max_frames - 1) / 2);
+
+ if (frames_to_blur_backward > (max_frames / 2))
+ frames_to_blur_backward = (max_frames / 2);
+
+ frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1;
+ break;
+ }
+
+ start_frame = (cpi->last_alt_ref_sei
+ + frames_to_blur_forward) % cpi->oxcf.lag_in_frames;
+
+#ifdef DEBUGFWG
+ // DEBUG FWG
+ printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d"
+ , max_frames
+ , num_frames_backward
+ , num_frames_forward
+ , frames_to_blur
+ , frames_to_blur_backward
+ , frames_to_blur_forward
+ , cpi->source_encode_index
+ , cpi->last_alt_ref_sei
+ , start_frame);
+#endif
+
+ // Setup frame pointers, NULL indicates frame not included in filter
+ vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *));
+ for (frame = 0; frame < frames_to_blur; frame++)
+ {
+ int which_buffer = start_frame - frame;
+
+ if (which_buffer < 0)
+ which_buffer += cpi->oxcf.lag_in_frames;
+
+ cpi->frames[frames_to_blur-1-frame]
+ = &cpi->src_buffer[which_buffer].source_buffer;
+ }
+
+ vp8cx_temp_blur1_c (
+ cpi,
+ frames_to_blur,
+ frames_to_blur_backward,
+ strength );
+}
+#endif
diff --git a/vp8/encoder/temporal_filter.h b/vp8/encoder/temporal_filter.h
new file mode 100644
index 000000000..f70e8c01e
--- /dev/null
+++ b/vp8/encoder/temporal_filter.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef __INC_VP8_TEMPORAL_FILTER_H
+#define __INC_VP8_TEMPORAL_FILTER_H
+
+#include "onyx_int.h"
+
+void vp8cx_temp_filter_c(VP8_COMP *cpi);
+
+#endif // __INC_VP8_TEMPORAL_FILTER_H
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index 33ddd64e7..e4da83379 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -23,12 +24,12 @@
_int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens];
#endif
void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
-void vp8_fix_contexts(VP8_COMP *cpi, MACROBLOCKD *x);
+void vp8_fix_contexts(MACROBLOCKD *x);
-TOKENEXTRA vp8_dct_value_tokens[DCT_MAX_VALUE*2];
-TOKENEXTRA *vp8_dct_value_tokens_ptr;
+TOKENVALUE vp8_dct_value_tokens[DCT_MAX_VALUE*2];
+const TOKENVALUE *vp8_dct_value_tokens_ptr;
int vp8_dct_value_cost[DCT_MAX_VALUE*2];
-int *vp8_dct_value_cost_ptr;
+const int *vp8_dct_value_cost_ptr;
#if 0
int skip_true_count = 0;
int skip_false_count = 0;
@@ -36,7 +37,7 @@ int skip_false_count = 0;
static void fill_value_tokens()
{
- TOKENEXTRA *const t = vp8_dct_value_tokens + DCT_MAX_VALUE;
+ TOKENVALUE *const t = vp8_dct_value_tokens + DCT_MAX_VALUE;
vp8_extra_bit_struct *const e = vp8_extra_bits;
int i = -DCT_MAX_VALUE;
@@ -196,86 +197,40 @@ static void tokenize1st_order_b
*a = *l = pt;
}
-#if 0
-void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
-{
- //int i;
- ENTROPY_CONTEXT **const A = x->above_context;
- ENTROPY_CONTEXT(* const L)[4] = x->left_context;
- int plane_type;
- int b;
- TOKENEXTRA *start = *t;
- TOKENEXTRA *tp = *t;
-
- x->mbmi.dc_diff = 1;
- vpx_memcpy(cpi->coef_counts_backup, cpi->coef_counts, sizeof(cpi->coef_counts));
+static int mb_is_skippable(MACROBLOCKD *x)
+{
+ int has_y2_block;
+ int skip = 1;
+ int i = 0;
- if (x->mbmi.mode == B_PRED || x->mbmi.mode == SPLITMV)
+ has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED
+ && x->mode_info_context->mbmi.mode != SPLITMV);
+ if (has_y2_block)
{
- plane_type = 3;
- }
- else
- {
- tokenize2nd_order_b(x->block + 24, t, 1, x->frame_type,
- A[Y2CONTEXT] + vp8_block2above[24], L[Y2CONTEXT] + vp8_block2left[24], cpi);
- plane_type = 0;
-
+ for (i = 0; i < 16; i++)
+ skip &= (x->block[i].eob < 2);
}
- for (b = 0; b < 16; b++)
- tokenize1st_order_b(x->block + b, t, plane_type, x->frame_type,
- A[vp8_block2context[b]] + vp8_block2above[b],
- L[vp8_block2context[b]] + vp8_block2left[b], cpi);
-
- for (b = 16; b < 24; b++)
- tokenize1st_order_b(x->block + b, t, 2, x->frame_type,
- A[vp8_block2context[b]] + vp8_block2above[b],
- L[vp8_block2context[b]] + vp8_block2left[b], cpi);
-
- if (cpi->common.mb_no_coeff_skip)
- {
- x->mbmi.mb_skip_coeff = 1;
-
- while ((tp != *t) && x->mbmi.mb_skip_coeff)
- {
- x->mbmi.mb_skip_coeff = (x->mbmi.mb_skip_coeff && (tp->Token == DCT_EOB_TOKEN));
- tp ++;
- }
-
- if (x->mbmi.mb_skip_coeff == 1)
- {
- x->mbmi.dc_diff = 0;
- //redo the coutnts
- vpx_memcpy(cpi->coef_counts, cpi->coef_counts_backup, sizeof(cpi->coef_counts));
+ for (; i < 24 + has_y2_block; i++)
+ skip &= (!x->block[i].eob);
- *t = start;
- cpi->skip_true_count++;
+ return skip;
+}
- //skip_true_count++;
- }
- else
- {
- cpi->skip_false_count++;
- //skip_false_count++;
- }
- }
-}
-#else
void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
{
- //int i;
- ENTROPY_CONTEXT **const A = x->above_context;
- ENTROPY_CONTEXT(* const L)[4] = x->left_context;
+ ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context;
+ ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context;
int plane_type;
int b;
TOKENEXTRA *start = *t;
TOKENEXTRA *tp = *t;
- x->mbmi.dc_diff = 1;
+ x->mode_info_context->mbmi.dc_diff = 1;
#if 0
@@ -290,7 +245,8 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
#if 1
- if (x->mbmi.mb_skip_coeff)
+ x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable(x);
+ if (x->mode_info_context->mbmi.mb_skip_coeff)
{
cpi->skip_true_count++;
@@ -299,13 +255,13 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
vp8_stuff_mb(cpi, x, t) ;
else
{
- vp8_fix_contexts(cpi, x);
+ vp8_fix_contexts(x);
}
- if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
- x->mbmi.dc_diff = 0;
+ if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
+ x->mode_info_context->mbmi.dc_diff = 0;
else
- x->mbmi.dc_diff = 1;
+ x->mode_info_context->mbmi.dc_diff = 1;
return;
@@ -314,59 +270,30 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
cpi->skip_false_count++;
#endif
#if 0
-
- if (x->mbmi.mode == B_PRED || x->mbmi.mode == SPLITMV)
- {
- int i, skip = 1;
-
- for (i = 0; i < 24; i++)
- skip &= (!x->block[i].eob);
-
- if (skip != x->mbmi.mb_skip_coeff)
- skip += 0;
-
- x->mbmi.mb_skip_coeff = skip;
- }
- else
- {
- int i, skip = 1;
-
- for (i = 0; i < 16; i++)
- skip &= (x->block[i].eob < 2);
-
- for (i = 16; i < 25; i++)
- skip &= (!x->block[i].eob);
-
- if (skip != x->mbmi.mb_skip_coeff)
- skip += 0;
-
- x->mbmi.mb_skip_coeff = skip;
- }
-
vpx_memcpy(cpi->coef_counts_backup, cpi->coef_counts, sizeof(cpi->coef_counts));
#endif
- if (x->mbmi.mode == B_PRED || x->mbmi.mode == SPLITMV)
+ if (x->mode_info_context->mbmi.mode == B_PRED || x->mode_info_context->mbmi.mode == SPLITMV)
{
plane_type = 3;
}
else
{
tokenize2nd_order_b(x->block + 24, t, 1, x->frame_type,
- A[Y2CONTEXT] + vp8_block2above[24], L[Y2CONTEXT] + vp8_block2left[24], cpi);
+ A + vp8_block2above[24], L + vp8_block2left[24], cpi);
plane_type = 0;
}
for (b = 0; b < 16; b++)
tokenize1st_order_b(x->block + b, t, plane_type, x->frame_type,
- A[vp8_block2context[b]] + vp8_block2above[b],
- L[vp8_block2context[b]] + vp8_block2left[b], cpi);
+ A + vp8_block2above[b],
+ L + vp8_block2left[b], cpi);
for (b = 16; b < 24; b++)
tokenize1st_order_b(x->block + b, t, 2, x->frame_type,
- A[vp8_block2context[b]] + vp8_block2above[b],
- L[vp8_block2context[b]] + vp8_block2left[b], cpi);
+ A + vp8_block2above[b],
+ L + vp8_block2left[b], cpi);
#if 0
@@ -405,7 +332,7 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
#endif
}
-#endif
+
#ifdef ENTROPY_STATS
@@ -580,57 +507,45 @@ void stuff1st_order_buv
void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
{
- //int i;
- ENTROPY_CONTEXT **const A = x->above_context;
- ENTROPY_CONTEXT(* const L)[4] = x->left_context;
+ ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context;
+ ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context;
int plane_type;
int b;
stuff2nd_order_b(x->block + 24, t, 1, x->frame_type,
- A[Y2CONTEXT] + vp8_block2above[24], L[Y2CONTEXT] + vp8_block2left[24], cpi);
+ A + vp8_block2above[24], L + vp8_block2left[24], cpi);
plane_type = 0;
- if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
- x->mbmi.dc_diff = 0;
+ if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
+ x->mode_info_context->mbmi.dc_diff = 0;
else
- x->mbmi.dc_diff = 1;
+ x->mode_info_context->mbmi.dc_diff = 1;
for (b = 0; b < 16; b++)
stuff1st_order_b(x->block + b, t, plane_type, x->frame_type,
- A[vp8_block2context[b]] + vp8_block2above[b],
- L[vp8_block2context[b]] + vp8_block2left[b], cpi);
+ A + vp8_block2above[b],
+ L + vp8_block2left[b], cpi);
for (b = 16; b < 24; b++)
stuff1st_order_buv(x->block + b, t, 2, x->frame_type,
- A[vp8_block2context[b]] + vp8_block2above[b],
- L[vp8_block2context[b]] + vp8_block2left[b], cpi);
+ A + vp8_block2above[b],
+ L + vp8_block2left[b], cpi);
}
-void vp8_fix_contexts(VP8_COMP *cpi, MACROBLOCKD *x)
+void vp8_fix_contexts(MACROBLOCKD *x)
{
- x->left_context[Y1CONTEXT][0] = 0;
- x->left_context[Y1CONTEXT][1] = 0;
- x->left_context[Y1CONTEXT][2] = 0;
- x->left_context[Y1CONTEXT][3] = 0;
- x->left_context[UCONTEXT][0] = 0;
- x->left_context[VCONTEXT][0] = 0;
- x->left_context[UCONTEXT][1] = 0;
- x->left_context[VCONTEXT][1] = 0;
-
- x->above_context[Y1CONTEXT][0] = 0;
- x->above_context[Y1CONTEXT][1] = 0;
- x->above_context[Y1CONTEXT][2] = 0;
- x->above_context[Y1CONTEXT][3] = 0;
- x->above_context[UCONTEXT][0] = 0;
- x->above_context[VCONTEXT][0] = 0;
- x->above_context[UCONTEXT][1] = 0;
- x->above_context[VCONTEXT][1] = 0;
-
- if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
+ /* Clear entropy contexts for Y2 blocks */
+ if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
{
- x->left_context[Y2CONTEXT][0] = 0;
- x->above_context[Y2CONTEXT][0] = 0;
+ vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
}
+ else
+ {
+ vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
+ vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
+ }
+
}
diff --git a/vp8/encoder/tokenize.h b/vp8/encoder/tokenize.h
index 02aacc222..01e8ec6d7 100644
--- a/vp8/encoder/tokenize.h
+++ b/vp8/encoder/tokenize.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -18,6 +19,12 @@ void vp8_tokenize_initialize();
typedef struct
{
+ short Token;
+ short Extra;
+} TOKENVALUE;
+
+typedef struct
+{
int Token;
int Extra;
const vp8_prob *context_tree;
@@ -34,5 +41,11 @@ void print_context_counters();
extern _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens];
#endif
+extern const int *vp8_dct_value_cost_ptr;
+/* TODO: The Token field should be broken out into a separate char array to
+ * improve cache locality, since it's needed for costing when the rest of the
+ * fields are not.
+ */
+extern const TOKENVALUE *vp8_dct_value_tokens_ptr;
#endif /* tokenize_h */
diff --git a/vp8/encoder/treewriter.c b/vp8/encoder/treewriter.c
index e398044db..03967c835 100644
--- a/vp8/encoder/treewriter.c
+++ b/vp8/encoder/treewriter.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/encoder/treewriter.h b/vp8/encoder/treewriter.h
index 05ac74cb7..88096d875 100644
--- a/vp8/encoder/treewriter.h
+++ b/vp8/encoder/treewriter.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h
index b3b55c319..5befd3b86 100644
--- a/vp8/encoder/variance.h
+++ b/vp8/encoder/variance.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -14,9 +15,9 @@
#define prototype_sad(sym)\
unsigned int (sym)\
(\
- unsigned char *src_ptr, \
+ const unsigned char *src_ptr, \
int source_stride, \
- unsigned char *ref_ptr, \
+ const unsigned char *ref_ptr, \
int ref_stride, \
int max_sad\
)
@@ -24,17 +25,27 @@
#define prototype_sad_multi_same_address(sym)\
void (sym)\
(\
- unsigned char *src_ptr, \
+ const unsigned char *src_ptr, \
int source_stride, \
- unsigned char *ref_ptr, \
+ const unsigned char *ref_ptr, \
int ref_stride, \
unsigned int *sad_array\
)
+#define prototype_sad_multi_same_address_1(sym)\
+ void (sym)\
+ (\
+ const unsigned char *src_ptr, \
+ int source_stride, \
+ const unsigned char *ref_ptr, \
+ int ref_stride, \
+ unsigned short *sad_array\
+ )
+
#define prototype_sad_multi_dif_address(sym)\
void (sym)\
(\
- unsigned char *src_ptr, \
+ const unsigned char *src_ptr, \
int source_stride, \
unsigned char *ref_ptr[4], \
int ref_stride, \
@@ -44,9 +55,9 @@
#define prototype_variance(sym) \
unsigned int (sym) \
(\
- unsigned char *src_ptr, \
+ const unsigned char *src_ptr, \
int source_stride, \
- unsigned char *ref_ptr, \
+ const unsigned char *ref_ptr, \
int ref_stride, \
unsigned int *sse\
)
@@ -54,9 +65,9 @@
#define prototype_variance2(sym) \
unsigned int (sym) \
(\
- unsigned char *src_ptr, \
+ const unsigned char *src_ptr, \
int source_stride, \
- unsigned char *ref_ptr, \
+ const unsigned char *ref_ptr, \
int ref_stride, \
unsigned int *sse,\
int *sum\
@@ -65,17 +76,17 @@
#define prototype_subpixvariance(sym) \
unsigned int (sym) \
( \
- unsigned char *src_ptr, \
+ const unsigned char *src_ptr, \
int source_stride, \
int xoffset, \
int yoffset, \
- unsigned char *ref_ptr, \
+ const unsigned char *ref_ptr, \
int Refstride, \
unsigned int *sse \
);
-#define prototype_getmbss(sym) unsigned int (sym)(short *)
+#define prototype_getmbss(sym) unsigned int (sym)(const short *)
#if ARCH_X86 || ARCH_X86_64
#include "x86/variance_x86.h"
@@ -137,6 +148,31 @@ extern prototype_sad_multi_same_address(vp8_variance_sad8x16x3);
#endif
extern prototype_sad_multi_same_address(vp8_variance_sad4x4x3);
+#ifndef vp8_variance_sad16x16x8
+#define vp8_variance_sad16x16x8 vp8_sad16x16x8_c
+#endif
+extern prototype_sad_multi_same_address_1(vp8_variance_sad16x16x8);
+
+#ifndef vp8_variance_sad16x8x8
+#define vp8_variance_sad16x8x8 vp8_sad16x8x8_c
+#endif
+extern prototype_sad_multi_same_address_1(vp8_variance_sad16x8x8);
+
+#ifndef vp8_variance_sad8x8x8
+#define vp8_variance_sad8x8x8 vp8_sad8x8x8_c
+#endif
+extern prototype_sad_multi_same_address_1(vp8_variance_sad8x8x8);
+
+#ifndef vp8_variance_sad8x16x8
+#define vp8_variance_sad8x16x8 vp8_sad8x16x8_c
+#endif
+extern prototype_sad_multi_same_address_1(vp8_variance_sad8x16x8);
+
+#ifndef vp8_variance_sad4x4x8
+#define vp8_variance_sad4x4x8 vp8_sad4x4x8_c
+#endif
+extern prototype_sad_multi_same_address_1(vp8_variance_sad4x4x8);
+
//-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#ifndef vp8_variance_sad16x16x4d
@@ -218,6 +254,21 @@ extern prototype_subpixvariance(vp8_variance_subpixvar16x8);
#endif
extern prototype_subpixvariance(vp8_variance_subpixvar16x16);
+#ifndef vp8_variance_halfpixvar16x16_h
+#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c
+#endif
+extern prototype_variance(vp8_variance_halfpixvar16x16_h);
+
+#ifndef vp8_variance_halfpixvar16x16_v
+#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_c
+#endif
+extern prototype_variance(vp8_variance_halfpixvar16x16_v);
+
+#ifndef vp8_variance_halfpixvar16x16_hv
+#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_c
+#endif
+extern prototype_variance(vp8_variance_halfpixvar16x16_hv);
+
#ifndef vp8_variance_subpixmse16x16
#define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_c
#endif
@@ -258,6 +309,7 @@ extern prototype_sad(vp8_variance_get4x4sse_cs);
typedef prototype_sad(*vp8_sad_fn_t);
typedef prototype_sad_multi_same_address(*vp8_sad_multi_fn_t);
+typedef prototype_sad_multi_same_address_1(*vp8_sad_multi1_fn_t);
typedef prototype_sad_multi_dif_address(*vp8_sad_multi_d_fn_t);
typedef prototype_variance(*vp8_variance_fn_t);
typedef prototype_variance2(*vp8_variance2_fn_t);
@@ -282,6 +334,9 @@ typedef struct
vp8_subpixvariance_fn_t subpixvar8x16;
vp8_subpixvariance_fn_t subpixvar16x8;
vp8_subpixvariance_fn_t subpixvar16x16;
+ vp8_variance_fn_t halfpixvar16x16_h;
+ vp8_variance_fn_t halfpixvar16x16_v;
+ vp8_variance_fn_t halfpixvar16x16_hv;
vp8_subpixvariance_fn_t subpixmse16x16;
vp8_getmbss_fn_t getmbss;
@@ -298,6 +353,12 @@ typedef struct
vp8_sad_multi_fn_t sad8x8x3;
vp8_sad_multi_fn_t sad4x4x3;
+ vp8_sad_multi1_fn_t sad16x16x8;
+ vp8_sad_multi1_fn_t sad16x8x8;
+ vp8_sad_multi1_fn_t sad8x16x8;
+ vp8_sad_multi1_fn_t sad8x8x8;
+ vp8_sad_multi1_fn_t sad4x4x8;
+
vp8_sad_multi_d_fn_t sad16x16x4d;
vp8_sad_multi_d_fn_t sad16x8x4d;
vp8_sad_multi_d_fn_t sad8x16x4d;
@@ -308,11 +369,15 @@ typedef struct
typedef struct
{
- vp8_sad_fn_t sdf;
- vp8_sad_multi_fn_t sdx3f;
- vp8_sad_multi_d_fn_t sdx4df;
- vp8_variance_fn_t vf;
+ vp8_sad_fn_t sdf;
+ vp8_variance_fn_t vf;
vp8_subpixvariance_fn_t svf;
+ vp8_variance_fn_t svf_halfpix_h;
+ vp8_variance_fn_t svf_halfpix_v;
+ vp8_variance_fn_t svf_halfpix_hv;
+ vp8_sad_multi_fn_t sdx3f;
+ vp8_sad_multi1_fn_t sdx8f;
+ vp8_sad_multi_d_fn_t sdx4df;
} vp8_variance_fn_ptr_t;
#if CONFIG_RUNTIME_CPU_DETECT
@@ -321,7 +386,4 @@ typedef struct
#define VARIANCE_INVOKE(ctx,fn) vp8_variance_##fn
#endif
-/* TODO: Determine if this USEBILINEAR flag is necessary. */
-#define USEBILINEAR
-
#endif
diff --git a/vp8/encoder/variance_c.c b/vp8/encoder/variance_c.c
index 85269b9d3..95ec96cec 100644
--- a/vp8/encoder/variance_c.c
+++ b/vp8/encoder/variance_c.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -23,7 +24,6 @@ const int vp8_six_tap[8][6] =
};
-#ifdef USEBILINEAR
const int VP8_FILTER_WEIGHT = 128;
const int VP8_FILTER_SHIFT = 7;
const int vp8_bilinear_taps[8][2] =
@@ -40,7 +40,7 @@ const int vp8_bilinear_taps[8][2] =
unsigned int vp8_get_mb_ss_c
(
- short *src_ptr
+ const short *src_ptr
)
{
unsigned int i = 0, sum = 0;
@@ -57,9 +57,9 @@ unsigned int vp8_get_mb_ss_c
void vp8_variance(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
int w,
int h,
@@ -89,9 +89,9 @@ void vp8_variance(
unsigned int
vp8_get8x8var_c
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *SSE,
int *Sum
@@ -105,9 +105,9 @@ vp8_get8x8var_c
unsigned int
vp8_get16x16var_c
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *SSE,
int *Sum
@@ -122,9 +122,9 @@ vp8_get16x16var_c
unsigned int vp8_variance16x16_c(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
@@ -138,9 +138,9 @@ unsigned int vp8_variance16x16_c(
}
unsigned int vp8_variance8x16_c(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
@@ -154,9 +154,9 @@ unsigned int vp8_variance8x16_c(
}
unsigned int vp8_variance16x8_c(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
@@ -171,9 +171,9 @@ unsigned int vp8_variance16x8_c(
unsigned int vp8_variance8x8_c(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
@@ -187,9 +187,9 @@ unsigned int vp8_variance8x8_c(
}
unsigned int vp8_variance4x4_c(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
@@ -204,9 +204,9 @@ unsigned int vp8_variance4x4_c(
unsigned int vp8_mse16x16_c(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
@@ -249,7 +249,7 @@ unsigned int vp8_mse16x16_c(
****************************************************************************/
void vp8e_filter_block2d_bil_first_pass
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
unsigned short *output_ptr,
unsigned int src_pixels_per_line,
int pixel_step,
@@ -307,7 +307,7 @@ void vp8e_filter_block2d_bil_first_pass
****************************************************************************/
void vp8e_filter_block2d_bil_second_pass
(
- unsigned short *src_ptr,
+ const unsigned short *src_ptr,
unsigned char *output_ptr,
unsigned int src_pixels_per_line,
unsigned int pixel_step,
@@ -365,7 +365,7 @@ void vp8e_filter_block2d_bil_second_pass
****************************************************************************/
void vp8e_filter_block2d_bil
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
unsigned char *output_ptr,
unsigned int src_pixels_per_line,
int *HFilter,
@@ -386,11 +386,11 @@ void vp8e_filter_block2d_bil
unsigned int vp8_sub_pixel_variance4x4_c
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
- unsigned char *dst_ptr,
+ const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
@@ -414,11 +414,11 @@ unsigned int vp8_sub_pixel_variance4x4_c
unsigned int vp8_sub_pixel_variance8x8_c
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
- unsigned char *dst_ptr,
+ const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
@@ -438,11 +438,11 @@ unsigned int vp8_sub_pixel_variance8x8_c
unsigned int vp8_sub_pixel_variance16x16_c
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
- unsigned char *dst_ptr,
+ const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
@@ -460,13 +460,50 @@ unsigned int vp8_sub_pixel_variance16x16_c
return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
}
+
+unsigned int vp8_variance_halfpixvar16x16_h_c(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse)
+{
+ return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 0,
+ ref_ptr, recon_stride, sse);
+}
+
+
+unsigned int vp8_variance_halfpixvar16x16_v_c(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse)
+{
+ return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 4,
+ ref_ptr, recon_stride, sse);
+}
+
+
+unsigned int vp8_variance_halfpixvar16x16_hv_c(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse)
+{
+ return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 4,
+ ref_ptr, recon_stride, sse);
+}
+
+
unsigned int vp8_sub_pixel_mse16x16_c
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
- unsigned char *dst_ptr,
+ const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
@@ -477,11 +514,11 @@ unsigned int vp8_sub_pixel_mse16x16_c
unsigned int vp8_sub_pixel_variance16x8_c
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
- unsigned char *dst_ptr,
+ const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
@@ -501,11 +538,11 @@ unsigned int vp8_sub_pixel_variance16x8_c
unsigned int vp8_sub_pixel_variance8x16_c
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
- unsigned char *dst_ptr,
+ const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
@@ -524,4 +561,3 @@ unsigned int vp8_sub_pixel_variance8x16_c
return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
}
-#endif
diff --git a/vp8/encoder/x86/csystemdependent.c b/vp8/encoder/x86/csystemdependent.c
deleted file mode 100644
index 186ee6856..000000000
--- a/vp8/encoder/x86/csystemdependent.c
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-
-#include "variance.h"
-#include "onyx_int.h"
-
-SADFunction *vp8_sad16x16;
-SADFunction *vp8_sad16x8;
-SADFunction *vp8_sad8x16;
-SADFunction *vp8_sad8x8;
-SADFunction *vp8_sad4x4;
-
-variance_function *vp8_variance4x4;
-variance_function *vp8_variance8x8;
-variance_function *vp8_variance8x16;
-variance_function *vp8_variance16x8;
-variance_function *vp8_variance16x16;
-
-
-variance_function *vp8_mse16x16;
-
-sub_pixel_variance_function *vp8_sub_pixel_variance4x4;
-sub_pixel_variance_function *vp8_sub_pixel_variance8x8;
-sub_pixel_variance_function *vp8_sub_pixel_variance8x16;
-sub_pixel_variance_function *vp8_sub_pixel_variance16x8;
-sub_pixel_variance_function *vp8_sub_pixel_variance16x16;
-
-int (*vp8_block_error)(short *, short *);
-int (*vp8_mbblock_error)(MACROBLOCK *mb, int dc);
-void (*vp8_subtract_mby)(short *diff, unsigned char *src, unsigned char *pred, int stride);
-
-extern void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, int stride);
-extern void vp8_subtract_mby_mmx(short *diff, unsigned char *src, unsigned char *pred, int stride);
-
-extern int vp8_block_error_c(short *, short *);
-extern int vp8_mbblock_error_c(MACROBLOCK *x, int dc);
-
-extern int vp8_block_error_mmx(short *, short *);
-extern int vp8_mbblock_error_mmx(MACROBLOCK *x, int dc);
-
-extern int vp8_block_error_xmm(short *, short *);
-extern int vp8_mbblock_error_xmm(MACROBLOCK *x, int dc);
-
-
-
-int (*vp8_mbuverror)(MACROBLOCK *mb);
-unsigned int (*vp8_get_mb_ss)(short *);
-void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
-void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
-void (*vp8_fast_fdct4x4)(short *input, short *output, int pitch);
-void (*vp8_fast_fdct8x4)(short *input, short *output, int pitch);
-
-void (*vp8_subtract_b)(BLOCK *be, BLOCKD *bd, int pitch);
-void (*vp8_subtract_mbuv)(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
-void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d);
-unsigned int (*vp8_get16x16pred_error)(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
-unsigned int (*vp8_get8x8var)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
-unsigned int (*vp8_get16x16var)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
-unsigned int (*vp8_get4x4sse_cs)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride);
-
-// c imports
-extern int vp8_mbuverror_c(MACROBLOCK *mb);
-extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
-extern void vp8_short_fdct4x4_c(short *input, short *output, int pitch);
-extern void vp8_short_fdct8x4_c(short *input, short *output, int pitch);
-extern void vp8_fast_fdct4x4_c(short *input, short *output, int pitch);
-extern void vp8_fast_fdct8x4_c(short *input, short *output, int pitch);
-
-
-extern void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch);
-extern void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
-extern void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d);
-
-extern SADFunction vp8_sad16x16_c;
-extern SADFunction vp8_sad16x8_c;
-extern SADFunction vp8_sad8x16_c;
-extern SADFunction vp8_sad8x8_c;
-extern SADFunction vp8_sad4x4_c;
-
-extern SADFunction vp8_sad16x16_wmt;
-extern SADFunction vp8_sad16x8_wmt;
-extern SADFunction vp8_sad8x16_wmt;
-extern SADFunction vp8_sad8x8_wmt;
-extern SADFunction vp8_sad4x4_wmt;
-
-extern SADFunction vp8_sad16x16_mmx;
-extern SADFunction vp8_sad16x8_mmx;
-extern SADFunction vp8_sad8x16_mmx;
-extern SADFunction vp8_sad8x8_mmx;
-extern SADFunction vp8_sad4x4_mmx;
-
-extern variance_function vp8_variance16x16_c;
-extern variance_function vp8_variance8x16_c;
-extern variance_function vp8_variance16x8_c;
-extern variance_function vp8_variance8x8_c;
-extern variance_function vp8_variance4x4_c;
-extern variance_function vp8_mse16x16_c;
-
-extern sub_pixel_variance_function vp8_sub_pixel_variance4x4_c;
-extern sub_pixel_variance_function vp8_sub_pixel_variance8x8_c;
-extern sub_pixel_variance_function vp8_sub_pixel_variance8x16_c;
-extern sub_pixel_variance_function vp8_sub_pixel_variance16x8_c;
-extern sub_pixel_variance_function vp8_sub_pixel_variance16x16_c;
-
-extern unsigned int vp8_get_mb_ss_c(short *);
-extern unsigned int vp8_get16x16pred_error_c(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
-extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
-extern unsigned int vp8_get16x16var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
-extern unsigned int vp8_get4x4sse_cs_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride);
-
-// mmx imports
-extern int vp8_mbuverror_mmx(MACROBLOCK *mb);
-extern void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d);
-extern void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch);
-extern void vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
-extern void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch);
-extern void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch);
-extern void vp8_fast_fdct8x4_mmx(short *input, short *output, int pitch);
-extern void vp8_fast_fdct4x4_mmx(short *input, short *output, int pitch);
-extern variance_function vp8_variance4x4_mmx;
-extern variance_function vp8_variance8x8_mmx;
-extern variance_function vp8_variance8x16_mmx;
-extern variance_function vp8_variance16x8_mmx;
-extern variance_function vp8_variance16x16_mmx;
-
-extern variance_function vp8_mse16x16_mmx;
-extern sub_pixel_variance_function vp8_sub_pixel_variance4x4_mmx;
-extern sub_pixel_variance_function vp8_sub_pixel_variance8x8_mmx;
-extern sub_pixel_variance_function vp8_sub_pixel_variance8x16_mmx;
-extern sub_pixel_variance_function vp8_sub_pixel_variance16x8_mmx;
-extern sub_pixel_variance_function vp8_sub_pixel_variance16x16_mmx;
-
-extern unsigned int vp8_get16x16pred_error_mmx(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
-extern unsigned int vp8_get_mb_ss_mmx(short *);
-extern unsigned int vp8_get8x8var_mmx(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
-extern unsigned int vp8_get16x16var_mmx(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
-extern unsigned int vp8_get4x4sse_cs_mmx(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride);
-
-
-// wmt imports
-extern int vp8_mbuverror_xmm(MACROBLOCK *mb);
-extern void vp8_fast_quantize_b_sse(BLOCK *b, BLOCKD *d);
-extern void vp8_fast_fdct8x4_wmt(short *input, short *output, int pitch);
-extern variance_function vp8_variance4x4_wmt;
-extern variance_function vp8_variance8x8_wmt;
-extern variance_function vp8_variance8x16_wmt;
-extern variance_function vp8_variance16x8_wmt;
-extern variance_function vp8_variance16x16_wmt;
-
-extern variance_function vp8_mse16x16_wmt;
-extern sub_pixel_variance_function vp8_sub_pixel_variance4x4_wmt;
-extern sub_pixel_variance_function vp8_sub_pixel_variance8x8_wmt;
-extern sub_pixel_variance_function vp8_sub_pixel_variance8x16_wmt;
-extern sub_pixel_variance_function vp8_sub_pixel_variance16x8_wmt;
-extern sub_pixel_variance_function vp8_sub_pixel_variance16x16_wmt;
-extern unsigned int vp8_get16x16pred_error_sse2(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
-extern unsigned int vp8_get_mb_ss_sse2(short *src_ptr);
-extern unsigned int vp8_get8x8var_sse2(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
-extern unsigned int vp8_get16x16var_sse2(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
-
-extern void vpx_get_processor_flags(int *mmx_enabled, int *xmm_enabled, int *wmt_enabled);
-
-void vp8_cmachine_specific_config(void)
-{
- int mmx_enabled;
- int xmm_enabled;
- int wmt_enabled;
-
- vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled);
-
- if (wmt_enabled) // Willamette
- {
- // Willamette instruction set available:
- vp8_mbuverror = vp8_mbuverror_xmm;
- vp8_fast_quantize_b = vp8_fast_quantize_b_sse;
- vp8_short_fdct4x4 = vp8_short_fdct4x4_mmx;
- vp8_short_fdct8x4 = vp8_short_fdct8x4_mmx;
- vp8_fast_fdct4x4 = vp8_fast_fdct4x4_mmx;
- vp8_fast_fdct8x4 = vp8_fast_fdct8x4_wmt;
- vp8_subtract_b = vp8_subtract_b_mmx;
- vp8_subtract_mbuv = vp8_subtract_mbuv_mmx;
- vp8_variance4x4 = vp8_variance4x4_mmx;
- vp8_variance8x8 = vp8_variance8x8_mmx;
- vp8_variance8x16 = vp8_variance8x16_wmt;
- vp8_variance16x8 = vp8_variance16x8_wmt;
- vp8_variance16x16 = vp8_variance16x16_wmt;
- vp8_mse16x16 = vp8_mse16x16_wmt;
- vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_wmt;
- vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_wmt;
- vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_wmt;
- vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_wmt;
- vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt;
- vp8_get_mb_ss = vp8_get_mb_ss_sse2;
- vp8_get16x16pred_error = vp8_get16x16pred_error_sse2;
- vp8_get8x8var = vp8_get8x8var_sse2;
- vp8_get16x16var = vp8_get16x16var_sse2;
- vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx;
- vp8_sad16x16 = vp8_sad16x16_wmt;
- vp8_sad16x8 = vp8_sad16x8_wmt;
- vp8_sad8x16 = vp8_sad8x16_wmt;
- vp8_sad8x8 = vp8_sad8x8_wmt;
- vp8_sad4x4 = vp8_sad4x4_wmt;
- vp8_block_error = vp8_block_error_xmm;
- vp8_mbblock_error = vp8_mbblock_error_xmm;
- vp8_subtract_mby = vp8_subtract_mby_mmx;
-
- }
- else if (mmx_enabled)
- {
- // MMX instruction set available:
- vp8_mbuverror = vp8_mbuverror_mmx;
- vp8_fast_quantize_b = vp8_fast_quantize_b_mmx;
- vp8_short_fdct4x4 = vp8_short_fdct4x4_mmx;
- vp8_short_fdct8x4 = vp8_short_fdct8x4_mmx;
- vp8_fast_fdct4x4 = vp8_fast_fdct4x4_mmx;
- vp8_fast_fdct8x4 = vp8_fast_fdct8x4_mmx;
- vp8_subtract_b = vp8_subtract_b_mmx;
- vp8_subtract_mbuv = vp8_subtract_mbuv_mmx;
- vp8_variance4x4 = vp8_variance4x4_mmx;
- vp8_variance8x8 = vp8_variance8x8_mmx;
- vp8_variance8x16 = vp8_variance8x16_mmx;
- vp8_variance16x8 = vp8_variance16x8_mmx;
- vp8_variance16x16 = vp8_variance16x16_mmx;
- vp8_mse16x16 = vp8_mse16x16_mmx;
- vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_mmx;
- vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_mmx;
- vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_mmx;
- vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_mmx;
- vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_mmx;
- vp8_get_mb_ss = vp8_get_mb_ss_mmx;
- vp8_get16x16pred_error = vp8_get16x16pred_error_mmx;
- vp8_get8x8var = vp8_get8x8var_mmx;
- vp8_get16x16var = vp8_get16x16var_mmx;
- vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx;
- vp8_sad16x16 = vp8_sad16x16_mmx;
- vp8_sad16x8 = vp8_sad16x8_mmx;
- vp8_sad8x16 = vp8_sad8x16_mmx;
- vp8_sad8x8 = vp8_sad8x8_mmx;
- vp8_sad4x4 = vp8_sad4x4_mmx;
- vp8_block_error = vp8_block_error_mmx;
- vp8_mbblock_error = vp8_mbblock_error_mmx;
- vp8_subtract_mby = vp8_subtract_mby_mmx;
-
- }
- else
- {
- // Pure C:
- vp8_mbuverror = vp8_mbuverror_c;
- vp8_fast_quantize_b = vp8_fast_quantize_b_c;
- vp8_short_fdct4x4 = vp8_short_fdct4x4_c;
- vp8_short_fdct8x4 = vp8_short_fdct8x4_c;
- vp8_fast_fdct4x4 = vp8_fast_fdct4x4_c;
- vp8_fast_fdct8x4 = vp8_fast_fdct8x4_c;
- vp8_subtract_b = vp8_subtract_b_c;
- vp8_subtract_mbuv = vp8_subtract_mbuv_c;
- vp8_variance4x4 = vp8_variance4x4_c;
- vp8_variance8x8 = vp8_variance8x8_c;
- vp8_variance8x16 = vp8_variance8x16_c;
- vp8_variance16x8 = vp8_variance16x8_c;
- vp8_variance16x16 = vp8_variance16x16_c;
- vp8_mse16x16 = vp8_mse16x16_c;
- vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_c;
- vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_c;
- vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_c;
- vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_c;
- vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c;
- vp8_get_mb_ss = vp8_get_mb_ss_c;
- vp8_get16x16pred_error = vp8_get16x16pred_error_c;
- vp8_get8x8var = vp8_get8x8var_c;
- vp8_get16x16var = vp8_get16x16var_c;
- vp8_get4x4sse_cs = vp8_get4x4sse_cs_c;
- vp8_sad16x16 = vp8_sad16x16_c;
- vp8_sad16x8 = vp8_sad16x8_c;
- vp8_sad8x16 = vp8_sad8x16_c;
- vp8_sad8x8 = vp8_sad8x8_c;
- vp8_sad4x4 = vp8_sad4x4_c;
- vp8_block_error = vp8_block_error_c;
- vp8_mbblock_error = vp8_mbblock_error_c;
- vp8_subtract_mby = vp8_subtract_mby_c;
- }
-
-}
diff --git a/vp8/encoder/x86/dct_mmx.asm b/vp8/encoder/x86/dct_mmx.asm
index e13423796..5acaca875 100644
--- a/vp8/encoder/x86/dct_mmx.asm
+++ b/vp8/encoder/x86/dct_mmx.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
@@ -12,8 +13,7 @@
section .text
global sym(vp8_short_fdct4x4_mmx)
- global sym(vp8_fast_fdct4x4_mmx)
- global sym(vp8_fast_fdct8x4_wmt)
+ global sym(vp8_short_fdct8x4_wmt)
%define DCTCONSTANTSBITS (16)
@@ -23,10 +23,6 @@ section .text
%define x_c3 (25080) ; cos(pi*3/8) * (1<<15)
-%define _1STSTAGESHIFT 14
-%define _2NDSTAGESHIFT 16
-
-; using matrix multiply with source and destbuffer has a pitch
;void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch)
sym(vp8_short_fdct4x4_mmx):
push rbp
@@ -36,337 +32,10 @@ sym(vp8_short_fdct4x4_mmx):
push rsi
push rdi
; end prolog
-
- mov rsi, arg(0) ;input
- mov rdi, arg(1) ;output
-
- movsxd rax, dword ptr arg(2) ;pitch
- lea rdx, [dct_matrix GLOBAL]
-
- movq mm0, [rsi ]
- movq mm1, [rsi + rax]
-
- movq mm2, [rsi + rax*2]
- lea rsi, [rsi + rax*2]
-
- movq mm3, [rsi + rax]
-
- ; first column
- movq mm4, mm0
- movq mm7, [rdx]
-
- pmaddwd mm4, mm7
- movq mm5, mm1
-
- pmaddwd mm5, mm7
- movq mm6, mm4
-
- punpckldq mm4, mm5
- punpckhdq mm6, mm5
-
- paddd mm4, mm6
- movq mm5, mm2
-
-
- pmaddwd mm5, mm7
- movq mm6, mm3
-
- pmaddwd mm6, mm7
- movq mm7, mm5
-
- punpckldq mm5, mm6
- punpckhdq mm7, mm6
-
- paddd mm5, mm7
- movq mm6, [dct1st_stage_rounding_mmx GLOBAL]
-
- paddd mm4, mm6
- paddd mm5, mm6
-
- psrad mm4, _1STSTAGESHIFT
- psrad mm5, _1STSTAGESHIFT
-
- packssdw mm4, mm5
- movq [rdi], mm4
-
- ;second column
- movq mm4, mm0
-
- pmaddwd mm4, [rdx+8]
- movq mm5, mm1
-
- pmaddwd mm5, [rdx+8]
- movq mm6, mm4
-
- punpckldq mm4, mm5
- punpckhdq mm6, mm5
-
- paddd mm4, mm6
- movq mm5, mm2
-
- pmaddwd mm5, [rdx+8]
- movq mm6, mm3
-
- pmaddwd mm6, [rdx+8]
- movq mm7, mm5
-
- punpckldq mm5, mm6
- punpckhdq mm7, mm6
-
- paddd mm5, mm7
- movq mm6, [dct1st_stage_rounding_mmx GLOBAL]
-
- paddd mm4, mm6
- paddd mm5, mm6
-
- psrad mm4, _1STSTAGESHIFT
- psrad mm5, _1STSTAGESHIFT
-
- packssdw mm4, mm5
- movq [rdi+8], mm4
-
-
- ;third column
- movq mm4, mm0
-
- pmaddwd mm4, [rdx+16]
- movq mm5, mm1
-
- pmaddwd mm5, [rdx+16]
- movq mm6, mm4
-
- punpckldq mm4, mm5
- punpckhdq mm6, mm5
-
- paddd mm4, mm6
- movq mm5, mm2
-
- pmaddwd mm5, [rdx+16]
- movq mm6, mm3
-
- pmaddwd mm6, [rdx+16]
- movq mm7, mm5
-
- punpckldq mm5, mm6
- punpckhdq mm7, mm6
-
- paddd mm5, mm7
- movq mm6, [dct1st_stage_rounding_mmx GLOBAL]
-
- paddd mm4, mm6
- paddd mm5, mm6
-
- psrad mm4, _1STSTAGESHIFT
- psrad mm5, _1STSTAGESHIFT
-
- packssdw mm4, mm5
- movq [rdi+16], mm4
-
- ;fourth column (this is the last column, so we do not have save the source any more)
-
- pmaddwd mm0, [rdx+24]
-
- pmaddwd mm1, [rdx+24]
- movq mm6, mm0
-
- punpckldq mm0, mm1
- punpckhdq mm6, mm1
-
- paddd mm0, mm6
-
- pmaddwd mm2, [rdx+24]
-
- pmaddwd mm3, [rdx+24]
- movq mm7, mm2
-
- punpckldq mm2, mm3
- punpckhdq mm7, mm3
-
- paddd mm2, mm7
- movq mm6, [dct1st_stage_rounding_mmx GLOBAL]
-
- paddd mm0, mm6
- paddd mm2, mm6
-
- psrad mm0, _1STSTAGESHIFT
- psrad mm2, _1STSTAGESHIFT
-
- packssdw mm0, mm2
-
- movq mm3, mm0
-
- ; done with one pass
- ; now start second pass
- movq mm0, [rdi ]
- movq mm1, [rdi+ 8]
- movq mm2, [rdi+ 16]
-
- movq mm4, mm0
-
- pmaddwd mm4, [rdx]
- movq mm5, mm1
-
- pmaddwd mm5, [rdx]
- movq mm6, mm4
-
- punpckldq mm4, mm5
- punpckhdq mm6, mm5
-
- paddd mm4, mm6
- movq mm5, mm2
-
- pmaddwd mm5, [rdx]
- movq mm6, mm3
-
- pmaddwd mm6, [rdx]
- movq mm7, mm5
-
- punpckldq mm5, mm6
- punpckhdq mm7, mm6
-
- paddd mm5, mm7
- movq mm6, [dct2nd_stage_rounding_mmx GLOBAL]
-
- paddd mm4, mm6
- paddd mm5, mm6
-
- psrad mm4, _2NDSTAGESHIFT
- psrad mm5, _2NDSTAGESHIFT
-
- packssdw mm4, mm5
- movq [rdi], mm4
-
- ;second column
- movq mm4, mm0
-
- pmaddwd mm4, [rdx+8]
- movq mm5, mm1
-
- pmaddwd mm5, [rdx+8]
- movq mm6, mm4
-
- punpckldq mm4, mm5
- punpckhdq mm6, mm5
-
- paddd mm4, mm6
- movq mm5, mm2
-
- pmaddwd mm5, [rdx+8]
- movq mm6, mm3
-
- pmaddwd mm6, [rdx+8]
- movq mm7, mm5
-
- punpckldq mm5, mm6
- punpckhdq mm7, mm6
-
- paddd mm5, mm7
- movq mm6, [dct2nd_stage_rounding_mmx GLOBAL]
-
- paddd mm4, mm6
- paddd mm5, mm6
-
- psrad mm4, _2NDSTAGESHIFT
- psrad mm5, _2NDSTAGESHIFT
-
- packssdw mm4, mm5
- movq [rdi+8], mm4
-
-
- ;third column
- movq mm4, mm0
-
- pmaddwd mm4, [rdx+16]
- movq mm5, mm1
-
- pmaddwd mm5, [rdx+16]
- movq mm6, mm4
-
- punpckldq mm4, mm5
- punpckhdq mm6, mm5
-
- paddd mm4, mm6
- movq mm5, mm2
-
- pmaddwd mm5, [rdx+16]
- movq mm6, mm3
-
- pmaddwd mm6, [rdx+16]
- movq mm7, mm5
-
- punpckldq mm5, mm6
- punpckhdq mm7, mm6
-
- paddd mm5, mm7
- movq mm6, [dct2nd_stage_rounding_mmx GLOBAL]
-
- paddd mm4, mm6
- paddd mm5, mm6
-
- psrad mm4, _2NDSTAGESHIFT
- psrad mm5, _2NDSTAGESHIFT
-
- packssdw mm4, mm5
- movq [rdi+16], mm4
-
- ;fourth column
- movq mm4, mm0
-
- pmaddwd mm4, [rdx+24]
- movq mm5, mm1
-
- pmaddwd mm5, [rdx+24]
- movq mm6, mm4
-
- punpckldq mm4, mm5
- punpckhdq mm6, mm5
-
- paddd mm4, mm6
- movq mm5, mm2
-
- pmaddwd mm5, [rdx+24]
- movq mm6, mm3
-
- pmaddwd mm6, [rdx+24]
- movq mm7, mm5
-
- punpckldq mm5, mm6
- punpckhdq mm7, mm6
-
- paddd mm5, mm7
- movq mm6, [dct2nd_stage_rounding_mmx GLOBAL]
-
- paddd mm4, mm6
- paddd mm5, mm6
-
- psrad mm4, _2NDSTAGESHIFT
- psrad mm5, _2NDSTAGESHIFT
-
- packssdw mm4, mm5
- movq [rdi+24], mm4
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp8_fast_fdct4x4_mmx(short *input, short *output, int pitch)
-sym(vp8_fast_fdct4x4_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 3
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
mov rsi, arg(0) ;input
mov rdi, arg(1) ;output
- lea rdx, [dct_const_mmx GLOBAL]
+ lea rdx, [GLOBAL(dct_const_mmx)]
movsxd rax, dword ptr arg(2) ;pitch
lea rcx, [rsi + rax*2]
@@ -378,11 +47,11 @@ sym(vp8_fast_fdct4x4_mmx):
movq mm3, [rcx + rax]
; get the constants
;shift to left by 1 for prescision
- paddw mm0, mm0
- paddw mm1, mm1
+ psllw mm0, 3
+ psllw mm1, 3
- psllw mm2, 1
- psllw mm3, 1
+ psllw mm2, 3
+ psllw mm3, 3
; transpose for the second stage
movq mm4, mm0 ; 00 01 02 03
@@ -530,20 +199,23 @@ sym(vp8_fast_fdct4x4_mmx):
movq mm3, mm5
; done with vertical
- pcmpeqw mm4, mm4
- pcmpeqw mm5, mm5
- psrlw mm4, 15
- psrlw mm5, 15
+ pcmpeqw mm4, mm4
+ pcmpeqw mm5, mm5
+ psrlw mm4, 15
+ psrlw mm5, 15
+
+ psllw mm4, 2
+ psllw mm5, 2
paddw mm0, mm4
paddw mm1, mm5
paddw mm2, mm4
paddw mm3, mm5
- psraw mm0, 1
- psraw mm1, 1
- psraw mm2, 1
- psraw mm3, 1
+ psraw mm0, 3
+ psraw mm1, 3
+ psraw mm2, 3
+ psraw mm3, 3
movq [rdi ], mm0
movq [rdi+ 8], mm1
@@ -559,8 +231,8 @@ sym(vp8_fast_fdct4x4_mmx):
ret
-;void vp8_fast_fdct8x4_wmt(short *input, short *output, int pitch)
-sym(vp8_fast_fdct8x4_wmt):
+;void vp8_short_fdct8x4_wmt(short *input, short *output, int pitch)
+sym(vp8_short_fdct8x4_wmt):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 3
@@ -571,7 +243,7 @@ sym(vp8_fast_fdct8x4_wmt):
mov rsi, arg(0) ;input
mov rdi, arg(1) ;output
- lea rdx, [dct_const_xmm GLOBAL]
+ lea rdx, [GLOBAL(dct_const_xmm)]
movsxd rax, dword ptr arg(2) ;pitch
lea rcx, [rsi + rax*2]
@@ -583,11 +255,11 @@ sym(vp8_fast_fdct8x4_wmt):
movdqa xmm3, [rcx + rax]
; get the constants
;shift to left by 1 for prescision
- psllw xmm0, 1
- psllw xmm2, 1
+ psllw xmm0, 3
+ psllw xmm2, 3
- psllw xmm4, 1
- psllw xmm3, 1
+ psllw xmm4, 3
+ psllw xmm3, 3
; transpose for the second stage
movdqa xmm1, xmm0 ; 00 01 02 03 04 05 06 07
@@ -757,20 +429,23 @@ sym(vp8_fast_fdct8x4_wmt):
; done with vertical
- pcmpeqw xmm4, xmm4
- pcmpeqw xmm5, xmm5;
- psrlw xmm4, 15
- psrlw xmm5, 15
+ pcmpeqw xmm4, xmm4
+ pcmpeqw xmm5, xmm5;
+ psrlw xmm4, 15
+ psrlw xmm5, 15
+
+ psllw xmm4, 2
+ psllw xmm5, 2
paddw xmm0, xmm4
paddw xmm1, xmm5
paddw xmm2, xmm4
paddw xmm3, xmm5
- psraw xmm0, 1
- psraw xmm1, 1
- psraw xmm2, 1
- psraw xmm3, 1
+ psraw xmm0, 3
+ psraw xmm1, 3
+ psraw xmm2, 3
+ psraw xmm3, 3
movq QWORD PTR[rdi ], xmm0
movq QWORD PTR[rdi+ 8], xmm1
diff --git a/vp8/encoder/x86/dct_sse2.asm b/vp8/encoder/x86/dct_sse2.asm
index 3e5e9a70c..723a78d76 100644
--- a/vp8/encoder/x86/dct_sse2.asm
+++ b/vp8/encoder/x86/dct_sse2.asm
@@ -1,260 +1,189 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
-global sym(vp8_short_fdct4x4_wmt)
-
-%define DCTCONSTANTSBITS (16)
-%define DCTROUNDINGVALUE (1<< (DCTCONSTANTSBITS-1))
-%define x_c1 (60547) ; cos(pi /8) * (1<<15)
-%define x_c2 (46341) ; cos(pi*2/8) * (1<<15)
-%define x_c3 (25080) ; cos(pi*3/8) * (1<<15)
-
-%define _1STSTAGESHIFT 14
-%define _2NDSTAGESHIFT 16
-
-
-;; using matrix multiply
-;void vp8_short_fdct4x4_wmt(short *input, short *output)
-sym(vp8_short_fdct4x4_wmt):
+;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch)
+global sym(vp8_short_fdct4x4_sse2)
+sym(vp8_short_fdct4x4_sse2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 2
+ SHADOW_ARGS_TO_STACK 3
+;; SAVE_XMM
GET_GOT rbx
+ push rsi
+ push rdi
; end prolog
- mov rax, arg(0) ;input
- mov rcx, arg(1) ;output
-
- lea rdx, [dct_matrix_sse2 GLOBAL]
-
- movdqu xmm0, [rax ]
- movdqu xmm1, [rax+16]
-
- ; first column
- movdqa xmm2, xmm0
- movdqa xmm7, [rdx]
-
- pmaddwd xmm2, xmm7
- movdqa xmm3, xmm1
-
- pmaddwd xmm3, xmm7
- movdqa xmm4, xmm2
-
- punpckldq xmm2, xmm3
- punpckhdq xmm4, xmm3
-
- movdqa xmm3, xmm2
- punpckldq xmm2, xmm4
-
- punpckhdq xmm3, xmm4
- paddd xmm2, xmm3
-
-
- paddd xmm2, XMMWORD PTR [dct1st_stage_rounding_sse2 GLOBAL]
- psrad xmm2, _1STSTAGESHIFT
- ;second column
- movdqa xmm3, xmm0
- pmaddwd xmm3, [rdx+16]
-
- movdqa xmm4, xmm1
- pmaddwd xmm4, [rdx+16]
-
- movdqa xmm5, xmm3
- punpckldq xmm3, xmm4
-
- punpckhdq xmm5, xmm4
- movdqa xmm4, xmm3
-
- punpckldq xmm3, xmm5
- punpckhdq xmm4, xmm5
-
- paddd xmm3, xmm4
- paddd xmm3, XMMWORD PTR [dct1st_stage_rounding_sse2 GLOBAL]
-
-
- psrad xmm3, _1STSTAGESHIFT
- packssdw xmm2, xmm3
-
- ;third column
- movdqa xmm3, xmm0
- pmaddwd xmm3, [rdx+32]
-
- movdqa xmm4, xmm1
- pmaddwd xmm4, [rdx+32]
-
- movdqa xmm5, xmm3
- punpckldq xmm3, xmm4
-
- punpckhdq xmm5, xmm4
- movdqa xmm4, xmm3
-
- punpckldq xmm3, xmm5
- punpckhdq xmm4, xmm5
-
- paddd xmm3, xmm4
- paddd xmm3, XMMWORD PTR [dct1st_stage_rounding_sse2 GLOBAL]
-
- psrad xmm3, _1STSTAGESHIFT
-
- ;fourth column (this is the last column, so we do not have save the source any more)
- pmaddwd xmm0, [rdx+48]
- pmaddwd xmm1, [rdx+48]
-
- movdqa xmm4, xmm0
- punpckldq xmm0, xmm1
-
- punpckhdq xmm4, xmm1
- movdqa xmm1, xmm0
-
- punpckldq xmm0, xmm4
- punpckhdq xmm1, xmm4
-
- paddd xmm0, xmm1
- paddd xmm0, XMMWORD PTR [dct1st_stage_rounding_sse2 GLOBAL]
-
-
- psrad xmm0, _1STSTAGESHIFT
- packssdw xmm3, xmm0
- ; done with one pass
- ; now start second pass
- movdqa xmm0, xmm2
- movdqa xmm1, xmm3
-
- pmaddwd xmm2, xmm7
- pmaddwd xmm3, xmm7
-
- movdqa xmm4, xmm2
- punpckldq xmm2, xmm3
+ mov rsi, arg(0)
+ movsxd rax, DWORD PTR arg(2)
+ lea rdi, [rsi + rax*2]
+
+ movq xmm0, MMWORD PTR[rsi ] ;03 02 01 00
+ movq xmm2, MMWORD PTR[rsi + rax] ;13 12 11 10
+ movq xmm1, MMWORD PTR[rsi + rax*2] ;23 22 21 20
+ movq xmm3, MMWORD PTR[rdi + rax] ;33 32 31 30
+
+ punpcklqdq xmm0, xmm2 ;13 12 11 10 03 02 01 00
+ punpcklqdq xmm1, xmm3 ;33 32 31 30 23 22 21 20
+
+ mov rdi, arg(1)
+
+ movdqa xmm2, xmm0
+ punpckldq xmm0, xmm1 ;23 22 03 02 21 20 01 00
+ punpckhdq xmm2, xmm1 ;33 32 13 12 31 30 11 10
+ movdqa xmm1, xmm0
+ punpckldq xmm0, xmm2 ;31 21 30 20 11 10 01 00
+ pshufhw xmm1, xmm1, 0b1h ;22 23 02 03 xx xx xx xx
+ pshufhw xmm2, xmm2, 0b1h ;32 33 12 13 xx xx xx xx
+
+ punpckhdq xmm1, xmm2 ;32 33 22 23 12 13 02 03
+ movdqa xmm3, xmm0
+ paddw xmm0, xmm1 ;b1 a1 b1 a1 b1 a1 b1 a1
+ psubw xmm3, xmm1 ;c1 d1 c1 d1 c1 d1 c1 d1
+ psllw xmm0, 3 ;b1 <<= 3 a1 <<= 3
+ psllw xmm3, 3 ;c1 <<= 3 d1 <<= 3
+ movdqa xmm1, xmm0
+ pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1
+ pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1
+ movdqa xmm4, xmm3
+ pmaddwd xmm3, XMMWORD PTR[GLOBAL(_5352_2217)] ;c1*2217 + d1*5352
+ pmaddwd xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)];d1*2217 - c1*5352
+
+ paddd xmm3, XMMWORD PTR[GLOBAL(_14500)]
+ paddd xmm4, XMMWORD PTR[GLOBAL(_7500)]
+ psrad xmm3, 12 ;(c1 * 2217 + d1 * 5352 + 14500)>>12
+ psrad xmm4, 12 ;(d1 * 2217 - c1 * 5352 + 7500)>>12
+
+ packssdw xmm0, xmm1 ;op[2] op[0]
+ packssdw xmm3, xmm4 ;op[3] op[1]
+ ; 23 22 21 20 03 02 01 00
+ ;
+ ; 33 32 31 30 13 12 11 10
+ ;
+ movdqa xmm2, xmm0
+ punpcklqdq xmm0, xmm3 ;13 12 11 10 03 02 01 00
+ punpckhqdq xmm2, xmm3 ;23 22 21 20 33 32 31 30
+
+ movdqa xmm3, xmm0
+ punpcklwd xmm0, xmm2 ;32 30 22 20 12 10 02 00
+ punpckhwd xmm3, xmm2 ;33 31 23 21 13 11 03 01
+ movdqa xmm2, xmm0
+ punpcklwd xmm0, xmm3 ;13 12 11 10 03 02 01 00
+ punpckhwd xmm2, xmm3 ;33 32 31 30 23 22 21 20
+
+ movdqa xmm5, XMMWORD PTR[GLOBAL(_7)]
+ pshufd xmm2, xmm2, 04eh
+ movdqa xmm3, xmm0
+ paddw xmm0, xmm2 ;b1 b1 b1 b1 a1 a1 a1 a1
+ psubw xmm3, xmm2 ;c1 c1 c1 c1 d1 d1 d1 d1
+
+ pshufd xmm0, xmm0, 0d8h ;b1 b1 a1 a1 b1 b1 a1 a1
+ movdqa xmm2, xmm3 ;save d1 for compare
+ pshufd xmm3, xmm3, 0d8h ;c1 c1 d1 d1 c1 c1 d1 d1
+ pshuflw xmm0, xmm0, 0d8h ;b1 b1 a1 a1 b1 a1 b1 a1
+ pshuflw xmm3, xmm3, 0d8h ;c1 c1 d1 d1 c1 d1 c1 d1
+ pshufhw xmm0, xmm0, 0d8h ;b1 a1 b1 a1 b1 a1 b1 a1
+ pshufhw xmm3, xmm3, 0d8h ;c1 d1 c1 d1 c1 d1 c1 d1
+ movdqa xmm1, xmm0
+ pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1
+ pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1
+
+ pxor xmm4, xmm4 ;zero out for compare
+ paddd xmm0, xmm5
+ paddd xmm1, xmm5
+ pcmpeqw xmm2, xmm4
+ psrad xmm0, 4 ;(a1 + b1 + 7)>>4
+ psrad xmm1, 4 ;(a1 - b1 + 7)>>4
+ pandn xmm2, XMMWORD PTR[GLOBAL(_cmp_mask)] ;clear upper,
+ ;and keep bit 0 of lower
+
+ movdqa xmm4, xmm3
+ pmaddwd xmm3, XMMWORD PTR[GLOBAL(_5352_2217)] ;c1*2217 + d1*5352
+ pmaddwd xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)] ;d1*2217 - c1*5352
+ paddd xmm3, XMMWORD PTR[GLOBAL(_12000)]
+ paddd xmm4, XMMWORD PTR[GLOBAL(_51000)]
+ packssdw xmm0, xmm1 ;op[8] op[0]
+ psrad xmm3, 16 ;(c1 * 2217 + d1 * 5352 + 12000)>>16
+ psrad xmm4, 16 ;(d1 * 2217 - c1 * 5352 + 51000)>>16
+
+ packssdw xmm3, xmm4 ;op[12] op[4]
+ movdqa xmm1, xmm0
+ paddw xmm3, xmm2 ;op[4] += (d1!=0)
+ punpcklqdq xmm0, xmm3 ;op[4] op[0]
+ punpckhqdq xmm1, xmm3 ;op[12] op[8]
+
+ movdqa XMMWORD PTR[rdi + 0], xmm0
+ movdqa XMMWORD PTR[rdi + 16], xmm1
- punpckhdq xmm4, xmm3
- movdqa xmm3, xmm2
-
- punpckldq xmm2, xmm4
- punpckhdq xmm3, xmm4
-
- paddd xmm2, xmm3
- paddd xmm2, XMMWORD PTR [dct2nd_stage_rounding_sse2 GLOBAL]
-
- psrad xmm2, _2NDSTAGESHIFT
-
- ;second column
- movdqa xmm3, xmm0
- pmaddwd xmm3, [rdx+16]
-
- movdqa xmm4, xmm1
- pmaddwd xmm4, [rdx+16]
-
- movdqa xmm5, xmm3
- punpckldq xmm3, xmm4
-
- punpckhdq xmm5, xmm4
- movdqa xmm4, xmm3
-
- punpckldq xmm3, xmm5
- punpckhdq xmm4, xmm5
-
- paddd xmm3, xmm4
- paddd xmm3, XMMWORD PTR [dct2nd_stage_rounding_sse2 GLOBAL]
-
- psrad xmm3, _2NDSTAGESHIFT
- packssdw xmm2, xmm3
-
- movdqu [rcx], xmm2
- ;third column
- movdqa xmm3, xmm0
- pmaddwd xmm3, [rdx+32]
-
- movdqa xmm4, xmm1
- pmaddwd xmm4, [rdx+32]
-
- movdqa xmm5, xmm3
- punpckldq xmm3, xmm4
-
- punpckhdq xmm5, xmm4
- movdqa xmm4, xmm3
-
- punpckldq xmm3, xmm5
- punpckhdq xmm4, xmm5
-
- paddd xmm3, xmm4
- paddd xmm3, XMMWORD PTR [dct2nd_stage_rounding_sse2 GLOBAL]
-
- psrad xmm3, _2NDSTAGESHIFT
- ;fourth column
- pmaddwd xmm0, [rdx+48]
- pmaddwd xmm1, [rdx+48]
-
- movdqa xmm4, xmm0
- punpckldq xmm0, xmm1
-
- punpckhdq xmm4, xmm1
- movdqa xmm1, xmm0
-
- punpckldq xmm0, xmm4
- punpckhdq xmm1, xmm4
-
- paddd xmm0, xmm1
- paddd xmm0, XMMWORD PTR [dct2nd_stage_rounding_sse2 GLOBAL]
-
- psrad xmm0, _2NDSTAGESHIFT
- packssdw xmm3, xmm0
-
- movdqu [rcx+16], xmm3
-
- mov rsp, rbp
; begin epilog
+ pop rdi
+ pop rsi
RESTORE_GOT
+;; RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
-
SECTION_RODATA
-;static unsigned int dct1st_stage_rounding_sse2[4] =
align 16
-dct1st_stage_rounding_sse2:
- times 4 dd 8192
-
-
-;static unsigned int dct2nd_stage_rounding_sse2[4] =
+_5352_2217:
+ dw 5352
+ dw 2217
+ dw 5352
+ dw 2217
+ dw 5352
+ dw 2217
+ dw 5352
+ dw 2217
align 16
-dct2nd_stage_rounding_sse2:
- times 4 dd 32768
-
-;static short dct_matrix_sse2[4][8]=
+_2217_neg5352:
+ dw 2217
+ dw -5352
+ dw 2217
+ dw -5352
+ dw 2217
+ dw -5352
+ dw 2217
+ dw -5352
align 16
-dct_matrix_sse2:
- times 8 dw 23170
-
- dw 30274
- dw 12540
- dw -12540
- dw -30274
- dw 30274
- dw 12540
- dw -12540
- dw -30274
-
- dw 23170
- times 2 dw -23170
- times 2 dw 23170
- times 2 dw -23170
- dw 23170
+_mult_add:
+ times 8 dw 1
+align 16
+_cmp_mask:
+ times 4 dw 1
+ times 4 dw 0
- dw 12540
- dw -30274
- dw 30274
- dw -12540
- dw 12540
- dw -30274
- dw 30274
- dw -12540
+align 16
+_mult_sub:
+ dw 1
+ dw -1
+ dw 1
+ dw -1
+ dw 1
+ dw -1
+ dw 1
+ dw -1
+align 16
+_7:
+ times 4 dd 7
+align 16
+_14500:
+ times 4 dd 14500
+align 16
+_7500:
+ times 4 dd 7500
+align 16
+_12000:
+ times 4 dd 12000
+align 16
+_51000:
+ times 4 dd 51000
diff --git a/vp8/encoder/x86/dct_x86.h b/vp8/encoder/x86/dct_x86.h
index bc80e64ef..05824c684 100644
--- a/vp8/encoder/x86/dct_x86.h
+++ b/vp8/encoder/x86/dct_x86.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -21,46 +22,41 @@
#if HAVE_MMX
extern prototype_fdct(vp8_short_fdct4x4_mmx);
extern prototype_fdct(vp8_short_fdct8x4_mmx);
-extern prototype_fdct(vp8_fast_fdct4x4_mmx);
-extern prototype_fdct(vp8_fast_fdct8x4_mmx);
#if !CONFIG_RUNTIME_CPU_DETECT
+#if 0
#undef vp8_fdct_short4x4
#define vp8_fdct_short4x4 vp8_short_fdct4x4_mmx
#undef vp8_fdct_short8x4
#define vp8_fdct_short8x4 vp8_short_fdct8x4_mmx
-
-#undef vp8_fdct_fast4x4
-#define vp8_fdct_fast4x4 vp8_fast_fdct4x4_mmx
-
-#undef vp8_fdct_fast8x4
-#define vp8_fdct_fast8x4 vp8_fast_fdct8x4_mmx
+#endif
#endif
#endif
#if HAVE_SSE2
-extern prototype_fdct(vp8_short_fdct4x4_wmt);
extern prototype_fdct(vp8_short_fdct8x4_wmt);
-extern prototype_fdct(vp8_fast_fdct8x4_wmt);
-
extern prototype_fdct(vp8_short_walsh4x4_sse2);
-#if !CONFIG_RUNTIME_CPU_DETECT
+extern prototype_fdct(vp8_short_fdct4x4_sse2);
-#if 0
+#if !CONFIG_RUNTIME_CPU_DETECT
+#if 1
/* short SSE2 DCT currently disabled, does not match the MMX version */
#undef vp8_fdct_short4x4
-#define vp8_fdct_short4x4 vp8_short_fdct4x4_wmt
+#define vp8_fdct_short4x4 vp8_short_fdct4x4_sse2
#undef vp8_fdct_short8x4
-#define vp8_fdct_short8x4 vp8_short_fdct8x4_wmt
+#define vp8_fdct_short8x4 vp8_short_fdct8x4_sse2
#endif
+#undef vp8_fdct_fast4x4
+#define vp8_fdct_fast4x4 vp8_short_fdct4x4_sse2
+
#undef vp8_fdct_fast8x4
-#define vp8_fdct_fast8x4 vp8_fast_fdct8x4_wmt
+#define vp8_fdct_fast8x4 vp8_short_fdct8x4_sse2
#undef vp8_fdct_walsh_short4x4
#define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_sse2
diff --git a/vp8/encoder/x86/encodemb_x86.h b/vp8/encoder/x86/encodemb_x86.h
index 9397a6cca..69b3edd66 100644
--- a/vp8/encoder/x86/encodemb_x86.h
+++ b/vp8/encoder/x86/encodemb_x86.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -54,7 +55,9 @@ extern prototype_submbuv(vp8_subtract_mbuv_mmx);
extern prototype_berr(vp8_block_error_xmm);
extern prototype_mberr(vp8_mbblock_error_xmm);
extern prototype_mbuverr(vp8_mbuverror_xmm);
-
+extern prototype_subb(vp8_subtract_b_sse2);
+extern prototype_submby(vp8_subtract_mby_sse2);
+extern prototype_submbuv(vp8_subtract_mbuv_sse2);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_encodemb_berr
@@ -66,6 +69,15 @@ extern prototype_mbuverr(vp8_mbuverror_xmm);
#undef vp8_encodemb_mbuverr
#define vp8_encodemb_mbuverr vp8_mbuverror_xmm
+#undef vp8_encodemb_subb
+#define vp8_encodemb_subb vp8_subtract_b_sse2
+
+#undef vp8_encodemb_submby
+#define vp8_encodemb_submby vp8_subtract_mby_sse2
+
+#undef vp8_encodemb_submbuv
+#define vp8_encodemb_submbuv vp8_subtract_mbuv_sse2
+
#endif
#endif
diff --git a/vp8/encoder/x86/encodeopt.asm b/vp8/encoder/x86/encodeopt.asm
index 194047155..c0f06bbbb 100644
--- a/vp8/encoder/x86/encodeopt.asm
+++ b/vp8/encoder/x86/encodeopt.asm
@@ -1,16 +1,16 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
-
;int vp8_block_error_xmm(short *coeff_ptr, short *dcoef_ptr)
global sym(vp8_block_error_xmm)
sym(vp8_block_error_xmm):
@@ -19,11 +19,9 @@ sym(vp8_block_error_xmm):
SHADOW_ARGS_TO_STACK 2
push rsi
push rdi
- ; end prolog
-
+ ; end prologue
mov rsi, arg(0) ;coeff_ptr
- pxor xmm7, xmm7
mov rdi, arg(1) ;dcoef_ptr
movdqa xmm3, [rsi]
@@ -32,33 +30,27 @@ sym(vp8_block_error_xmm):
movdqa xmm5, [rsi+16]
movdqa xmm6, [rdi+16]
- pxor xmm1, xmm1 ; from movd xmm1, dc; dc=0
+ psubw xmm3, xmm4
- movdqa xmm2, xmm7
psubw xmm5, xmm6
-
- por xmm1, xmm2
+ pmaddwd xmm3, xmm3
pmaddwd xmm5, xmm5
- pcmpeqw xmm1, xmm7
- psubw xmm3, xmm4
+ paddd xmm3, xmm5
- pand xmm1, xmm3
- pmaddwd xmm1, xmm1
-
- paddd xmm1, xmm5
- movdqa xmm0, xmm1
+ pxor xmm7, xmm7
+ movdqa xmm0, xmm3
punpckldq xmm0, xmm7
- punpckhdq xmm1, xmm7
+ punpckhdq xmm3, xmm7
- paddd xmm0, xmm1
- movdqa xmm1, xmm0
+ paddd xmm0, xmm3
+ movdqa xmm3, xmm0
psrldq xmm0, 8
- paddd xmm0, xmm1
+ paddd xmm0, xmm3
- movd rax, xmm0
+ movq rax, xmm0
pop rdi
pop rsi
@@ -67,7 +59,6 @@ sym(vp8_block_error_xmm):
pop rbp
ret
-
;int vp8_block_error_mmx(short *coeff_ptr, short *dcoef_ptr)
global sym(vp8_block_error_mmx)
sym(vp8_block_error_mmx):
@@ -124,7 +115,7 @@ sym(vp8_block_error_mmx):
psrlq mm1, 32
paddd mm0, mm1
- movd rax, mm0
+ movq rax, mm0
pop rdi
pop rsi
@@ -201,7 +192,7 @@ mberror_loop_mmx:
psrlq mm2, 32
paddd mm0, mm2
- movd rax, mm0
+ movq rax, mm0
pop rdi
pop rsi
@@ -269,7 +260,7 @@ mberror_loop:
psrldq xmm0, 8
paddd xmm0, xmm1
- movd rax, xmm0
+ movq rax, xmm0
pop rdi
pop rsi
@@ -326,7 +317,7 @@ mbuverror_loop_mmx:
psrlq mm7, 32
paddd mm0, mm7
- movd rax, mm0
+ movq rax, mm0
pop rdi
pop rsi
@@ -383,7 +374,7 @@ mbuverror_loop:
psrldq xmm1, 8
paddd xmm1, xmm2
- movd rax, xmm1
+ movq rax, xmm1
pop rdi
pop rsi
diff --git a/vp8/encoder/x86/fwalsh_sse2.asm b/vp8/encoder/x86/fwalsh_sse2.asm
index 7d8620178..39439f0d8 100644
--- a/vp8/encoder/x86/fwalsh_sse2.asm
+++ b/vp8/encoder/x86/fwalsh_sse2.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
@@ -16,102 +17,148 @@ sym(vp8_short_walsh4x4_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 3
+ SAVE_XMM
+ GET_GOT rbx
push rsi
push rdi
; end prolog
- mov rsi, arg(0)
- mov rdi, arg(1)
-
- movdqu xmm4, [rsi + 0] ;ip[4] ip[0]
- movdqu xmm0, [rsi + 16] ;ip[12] ip[8]
-
- pxor xmm7, xmm7
- ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- ; 13 12 11 10 03 02 01 00
- ;
- ; 33 32 31 30 23 22 21 20
- ;
- movdqa xmm3, xmm4 ; 13 12 11 10 03 02 01 00
- punpcklwd xmm4, xmm0 ; 23 03 22 02 21 01 20 00
- punpckhwd xmm3, xmm0 ; 33 13 32 12 31 11 30 10
- movdqa xmm1, xmm4 ; 23 03 22 02 21 01 20 00
- punpcklwd xmm4, xmm3 ; 31 21 11 01 30 20 10 00
- punpckhwd xmm1, xmm3 ; 33 23 13 03 32 22 12 02
- ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- pshufd xmm2, xmm1, 4eh ;ip[8] ip[12]
- movdqa xmm3, xmm4 ;ip[4] ip[0]
-
- paddw xmm4, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
- psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
-
+ mov rsi, arg(0) ; input
+ mov rdi, arg(1) ; output
+ movsxd rdx, dword ptr arg(2) ; pitch
+
+ ; first for loop
+ movq xmm0, MMWORD PTR [rsi] ; load input
+ movq xmm1, MMWORD PTR [rsi + rdx]
+ lea rsi, [rsi + rdx*2]
+ movq xmm2, MMWORD PTR [rsi]
+ movq xmm3, MMWORD PTR [rsi + rdx]
+
+ punpcklwd xmm0, xmm1
+ punpcklwd xmm2, xmm3
+
+ movdqa xmm1, xmm0
+ punpckldq xmm0, xmm2 ; ip[1] ip[0]
+ punpckhdq xmm1, xmm2 ; ip[3] ip[2]
+
+ movdqa xmm2, xmm0
+ paddw xmm0, xmm1
+ psubw xmm2, xmm1
+
+ psllw xmm0, 2 ; d1 a1
+ psllw xmm2, 2 ; c1 b1
+
+ movdqa xmm1, xmm0
+ punpcklqdq xmm0, xmm2 ; b1 a1
+ punpckhqdq xmm1, xmm2 ; c1 d1
+
+ pxor xmm6, xmm6
+ movq xmm6, xmm0
+ pxor xmm7, xmm7
+ pcmpeqw xmm7, xmm6
+ paddw xmm7, [GLOBAL(c1)]
+
+ movdqa xmm2, xmm0
+ paddw xmm0, xmm1 ; b1+c1 a1+d1
+ psubw xmm2, xmm1 ; b1-c1 a1-d1
+ paddw xmm0, xmm7 ; b1+c1 a1+d1+(a1!=0)
+
+ ; second for loop
+ ; input: 13 9 5 1 12 8 4 0 (xmm0)
+ ; 14 10 6 2 15 11 7 3 (xmm2)
+ ; after shuffle:
+ ; 13 5 9 1 12 4 8 0 (xmm0)
+ ; 14 6 10 2 15 7 11 3 (xmm1)
+ pshuflw xmm3, xmm0, 0xd8
+ pshufhw xmm0, xmm3, 0xd8
+ pshuflw xmm3, xmm2, 0xd8
+ pshufhw xmm1, xmm3, 0xd8
+
+ movdqa xmm2, xmm0
+ pmaddwd xmm0, [GLOBAL(c1)] ; d11 a11 d10 a10
+ pmaddwd xmm2, [GLOBAL(cn1)] ; c11 b11 c10 b10
+ movdqa xmm3, xmm1
+ pmaddwd xmm1, [GLOBAL(c1)] ; d12 a12 d13 a13
+ pmaddwd xmm3, [GLOBAL(cn1)] ; c12 b12 c13 b13
+
+ pshufd xmm4, xmm0, 0xd8 ; d11 d10 a11 a10
+ pshufd xmm5, xmm2, 0xd8 ; c11 c10 b11 b10
+ pshufd xmm6, xmm1, 0x72 ; d13 d12 a13 a12
+ pshufd xmm7, xmm3, 0x72 ; c13 c12 b13 b12
+
+ movdqa xmm0, xmm4
+ punpcklqdq xmm0, xmm5 ; b11 b10 a11 a10
+ punpckhqdq xmm4, xmm5 ; c11 c10 d11 d10
+ movdqa xmm1, xmm6
+ punpcklqdq xmm1, xmm7 ; b13 b12 a13 a12
+ punpckhqdq xmm6, xmm7 ; c13 c12 d13 d12
+
+ movdqa xmm2, xmm0
+ paddd xmm0, xmm4 ; b21 b20 a21 a20
+ psubd xmm2, xmm4 ; c21 c20 d21 d20
+ movdqa xmm3, xmm1
+ paddd xmm1, xmm6 ; b23 b22 a23 a22
+ psubd xmm3, xmm6 ; c23 c22 d23 d22
+
+ pxor xmm4, xmm4
movdqa xmm5, xmm4
- punpcklqdq xmm4, xmm3 ;d1 a1
- punpckhqdq xmm5, xmm3 ;c1 b1
-
- movdqa xmm1, xmm5 ;c1 b1
- paddw xmm5, xmm4 ;dl+cl a1+b1 aka op[4] op[0]
- psubw xmm4, xmm1 ;d1-c1 a1-b1 aka op[12] op[8]
- ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- ; 13 12 11 10 03 02 01 00
- ;
- ; 33 32 31 30 23 22 21 20
- ;
- movdqa xmm0, xmm5 ; 13 12 11 10 03 02 01 00
- punpcklwd xmm5, xmm4 ; 23 03 22 02 21 01 20 00
- punpckhwd xmm0, xmm4 ; 33 13 32 12 31 11 30 10
- movdqa xmm1, xmm5 ; 23 03 22 02 21 01 20 00
- punpcklwd xmm5, xmm0 ; 31 21 11 01 30 20 10 00
- punpckhwd xmm1, xmm0 ; 33 23 13 03 32 22 12 02
- ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- pshufd xmm2, xmm1, 4eh ;ip[8] ip[12]
- movdqa xmm3, xmm5 ;ip[4] ip[0]
-
- paddw xmm5, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
- psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
-
- movdqa xmm6, xmm5
- punpcklqdq xmm5, xmm3 ;d1 a1
- punpckhqdq xmm6, xmm3 ;c1 b1
-
- movdqa xmm1, xmm6 ;c1 b1
- paddw xmm6, xmm5 ;dl+cl a1+b1 aka op[4] op[0]
- psubw xmm5, xmm1 ;d1-c1 a1-b1 aka op[12] op[8]
-
- movdqa xmm0, xmm6 ;aka b2 a2
- movdqa xmm1, xmm5 ;aka d2 c2
-
- pcmpgtw xmm0, xmm7
- pcmpgtw xmm1, xmm7
-
- psrlw xmm0, 15
- psrlw xmm1, 15
-
- paddw xmm6, xmm0
- paddw xmm5, xmm1
-
- psraw xmm6, 1
- psraw xmm5, 1
-
- ; a2 = a1 + b1;
- ; b2 = c1 + d1;
- ; c2 = a1 - b1;
- ; d2 = d1 - c1;
- ; a2 += (a2>0);
- ; b2 += (b2>0);
- ; c2 += (c2>0);
- ; d2 += (d2>0);
- ; op[0] = (a2)>>1;
- ; op[4] = (b2)>>1;
- ; op[8] = (c2)>>1;
- ; op[12]= (d2)>>1;
-
- movdqu [rdi + 0], xmm6
- movdqu [rdi + 16], xmm5
+ pcmpgtd xmm4, xmm0
+ pcmpgtd xmm5, xmm2
+ pand xmm4, [GLOBAL(cd1)]
+ pand xmm5, [GLOBAL(cd1)]
+
+ pxor xmm6, xmm6
+ movdqa xmm7, xmm6
+ pcmpgtd xmm6, xmm1
+ pcmpgtd xmm7, xmm3
+ pand xmm6, [GLOBAL(cd1)]
+ pand xmm7, [GLOBAL(cd1)]
+
+ paddd xmm0, xmm4
+ paddd xmm2, xmm5
+ paddd xmm0, [GLOBAL(cd3)]
+ paddd xmm2, [GLOBAL(cd3)]
+ paddd xmm1, xmm6
+ paddd xmm3, xmm7
+ paddd xmm1, [GLOBAL(cd3)]
+ paddd xmm3, [GLOBAL(cd3)]
+
+ psrad xmm0, 3
+ psrad xmm1, 3
+ psrad xmm2, 3
+ psrad xmm3, 3
+ movdqa xmm4, xmm0
+ punpcklqdq xmm0, xmm1 ; a23 a22 a21 a20
+ punpckhqdq xmm4, xmm1 ; b23 b22 b21 b20
+ movdqa xmm5, xmm2
+ punpckhqdq xmm2, xmm3 ; c23 c22 c21 c20
+ punpcklqdq xmm5, xmm3 ; d23 d22 d21 d20
+
+ packssdw xmm0, xmm4 ; b23 b22 b21 b20 a23 a22 a21 a20
+ packssdw xmm2, xmm5 ; d23 d22 d21 d20 c23 c22 c21 c20
+
+ movdqa XMMWORD PTR [rdi], xmm0
+ movdqa XMMWORD PTR [rdi + 16], xmm2
; begin epilog
pop rdi
pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
+
+SECTION_RODATA
+align 16
+c1:
+ dw 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001
+align 16
+cn1:
+ dw 0x0001, 0xffff, 0x0001, 0xffff, 0x0001, 0xffff, 0x0001, 0xffff
+align 16
+cd1:
+ dd 0x00000001, 0x00000001, 0x00000001, 0x00000001
+align 16
+cd3:
+ dd 0x00000003, 0x00000003, 0x00000003, 0x00000003
diff --git a/vp8/encoder/x86/mcomp_x86.h b/vp8/encoder/x86/mcomp_x86.h
index 5661491ad..3b7b29c21 100644
--- a/vp8/encoder/x86/mcomp_x86.h
+++ b/vp8/encoder/x86/mcomp_x86.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -23,5 +24,14 @@
#endif
#endif
+#if HAVE_SSE4_1
+#if !CONFIG_RUNTIME_CPU_DETECT
+
+#undef vp8_search_full_search
+#define vp8_search_full_search vp8_full_search_sadx8
+
+#endif
+#endif
+
#endif
diff --git a/vp8/encoder/x86/preproc_mmx.c b/vp8/encoder/x86/preproc_mmx.c
index 69617ca47..a182c8856 100644
--- a/vp8/encoder/x86/preproc_mmx.c
+++ b/vp8/encoder/x86/preproc_mmx.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
diff --git a/vp8/encoder/x86/quantize_mmx.asm b/vp8/encoder/x86/quantize_mmx.asm
index 847fc6e37..f29a54ecd 100644
--- a/vp8/encoder/x86/quantize_mmx.asm
+++ b/vp8/encoder/x86/quantize_mmx.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
@@ -248,7 +249,7 @@ sym(vp8_fast_quantize_b_impl_mmx):
paddd mm0, mm5
; eob adjustment begins here
- movd rcx, mm0
+ movq rcx, mm0
and rcx, 0xffff
xor rdx, rdx
@@ -261,7 +262,7 @@ sym(vp8_fast_quantize_b_impl_mmx):
and rax, rdx
; Substitute the sse assembly for the old mmx mixed assembly/C. The
; following is kept as reference
- ; movd rcx, mm0
+ ; movq rcx, mm0
; bsr rax, rcx
;
; mov eob, rax
@@ -283,156 +284,3 @@ sym(vp8_fast_quantize_b_impl_mmx):
UNSHADOW_ARGS
pop rbp
ret
-
-
-;int vp8_fast_quantize_b_impl_sse(short *coeff_ptr, short *zbin_ptr,
-; short *qcoeff_ptr,short *dequant_ptr,
-; short *scan_mask, short *round_ptr,
-; short *quant_ptr, short *dqcoeff_ptr);
-global sym(vp8_fast_quantize_b_impl_sse)
-sym(vp8_fast_quantize_b_impl_sse):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 8
- push rsi
- push rdi
- ; end prolog
-
-
- mov rsi, arg(0) ;coeff_ptr
- movdqa xmm0, [rsi]
-
- mov rax, arg(1) ;zbin_ptr
- movdqa xmm1, [rax]
-
- movdqa xmm3, xmm0
- psraw xmm0, 15
-
- pxor xmm3, xmm0
- psubw xmm3, xmm0 ; abs
-
- movdqa xmm2, xmm3
- pcmpgtw xmm1, xmm2
-
- pandn xmm1, xmm2
- movdqa xmm3, xmm1
-
- mov rdx, arg(6) ; quant_ptr
- movdqa xmm1, [rdx]
-
- mov rcx, arg(5) ; round_ptr
- movdqa xmm2, [rcx]
-
- paddw xmm3, xmm2
- pmulhuw xmm3, xmm1
-
- pxor xmm3, xmm0
- psubw xmm3, xmm0 ;gain the sign back
-
- mov rdi, arg(2) ;qcoeff_ptr
- movdqa xmm0, xmm3
-
- movdqa [rdi], xmm3
-
- mov rax, arg(3) ;dequant_ptr
- movdqa xmm2, [rax]
-
- pmullw xmm3, xmm2
- mov rax, arg(7) ;dqcoeff_ptr
-
- movdqa [rax], xmm3
-
- ; next 8
- movdqa xmm4, [rsi+16]
-
- mov rax, arg(1) ;zbin_ptr
- movdqa xmm5, [rax+16]
-
- movdqa xmm7, xmm4
- psraw xmm4, 15
-
- pxor xmm7, xmm4
- psubw xmm7, xmm4 ; abs
-
- movdqa xmm6, xmm7
- pcmpgtw xmm5, xmm6
-
- pandn xmm5, xmm6
- movdqa xmm7, xmm5
-
- movdqa xmm5, [rdx+16]
- movdqa xmm6, [rcx+16]
-
-
- paddw xmm7, xmm6
- pmulhuw xmm7, xmm5
-
- pxor xmm7, xmm4
- psubw xmm7, xmm4;gain the sign back
-
- mov rdi, arg(2) ;qcoeff_ptr
-
- movdqa xmm1, xmm7
- movdqa [rdi+16], xmm7
-
- mov rax, arg(3) ;dequant_ptr
- movdqa xmm6, [rax+16]
-
- pmullw xmm7, xmm6
- mov rax, arg(7) ;dqcoeff_ptr
-
- movdqa [rax+16], xmm7
- mov rdi, arg(4) ;scan_mask
-
- pxor xmm7, xmm7
- movdqa xmm2, [rdi]
-
- movdqa xmm3, [rdi+16];
- pcmpeqw xmm0, xmm7
-
- pcmpeqw xmm1, xmm7
- pcmpeqw xmm6, xmm6
-
- pxor xmm0, xmm6
- pxor xmm1, xmm6
-
- psrlw xmm0, 15
- psrlw xmm1, 15
-
- pmaddwd xmm0, xmm2
- pmaddwd xmm1, xmm3
-
- movq xmm2, xmm0
- movq xmm3, xmm1
-
- psrldq xmm0, 8
- psrldq xmm1, 8
-
- paddd xmm0, xmm1
- paddd xmm2, xmm3
-
- paddd xmm0, xmm2
- movq xmm1, xmm0
-
- psrldq xmm0, 4
- paddd xmm1, xmm0
-
- movd rcx, xmm1
- and rcx, 0xffff
-
- xor rdx, rdx
- sub rdx, rcx
-
- bsr rax, rcx
- inc rax
-
- sar rdx, 31
- and rax, rdx
-
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm
new file mode 100644
index 000000000..1e0bd5c48
--- /dev/null
+++ b/vp8/encoder/x86/quantize_sse2.asm
@@ -0,0 +1,388 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+
+;int vp8_regular_quantize_b_impl_sse2(short *coeff_ptr, short *zbin_ptr,
+; short *qcoeff_ptr,short *dequant_ptr,
+; const int *default_zig_zag, short *round_ptr,
+; short *quant_ptr, short *dqcoeff_ptr,
+; unsigned short zbin_oq_value,
+; short *zbin_boost_ptr);
+;
+global sym(vp8_regular_quantize_b_impl_sse2)
+sym(vp8_regular_quantize_b_impl_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 10
+ push rsi
+ push rdi
+ push rbx
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+
+ %define abs_minus_zbin_lo 0
+ %define abs_minus_zbin_hi 16
+ %define temp_qcoeff_lo 32
+ %define temp_qcoeff_hi 48
+ %define save_xmm6 64
+ %define save_xmm7 80
+ %define eob 96
+
+ %define vp8_regularquantizeb_stack_size eob + 16
+
+ sub rsp, vp8_regularquantizeb_stack_size
+
+ movdqa OWORD PTR[rsp + save_xmm6], xmm6
+ movdqa OWORD PTR[rsp + save_xmm7], xmm7
+
+ mov rdx, arg(0) ;coeff_ptr
+ mov eax, arg(8) ;zbin_oq_value
+
+ mov rcx, arg(1) ;zbin_ptr
+ movd xmm7, eax
+
+ movdqa xmm0, OWORD PTR[rdx]
+ movdqa xmm4, OWORD PTR[rdx + 16]
+
+ movdqa xmm1, xmm0
+ movdqa xmm5, xmm4
+
+ psraw xmm0, 15 ;sign of z (aka sz)
+ psraw xmm4, 15 ;sign of z (aka sz)
+
+ pxor xmm1, xmm0
+ pxor xmm5, xmm4
+
+ movdqa xmm2, OWORD PTR[rcx] ;load zbin_ptr
+ movdqa xmm3, OWORD PTR[rcx + 16] ;load zbin_ptr
+
+ pshuflw xmm7, xmm7, 0
+ psubw xmm1, xmm0 ;x = abs(z)
+
+ punpcklwd xmm7, xmm7 ;duplicated zbin_oq_value
+ psubw xmm5, xmm4 ;x = abs(z)
+
+ paddw xmm2, xmm7
+ paddw xmm3, xmm7
+
+ psubw xmm1, xmm2 ;sub (zbin_ptr + zbin_oq_value)
+ psubw xmm5, xmm3 ;sub (zbin_ptr + zbin_oq_value)
+
+ mov rdi, arg(5) ;round_ptr
+ mov rsi, arg(6) ;quant_ptr
+
+ movdqa OWORD PTR[rsp + abs_minus_zbin_lo], xmm1
+ movdqa OWORD PTR[rsp + abs_minus_zbin_hi], xmm5
+
+ paddw xmm1, xmm2 ;add (zbin_ptr + zbin_oq_value) back
+ paddw xmm5, xmm3 ;add (zbin_ptr + zbin_oq_value) back
+
+ movdqa xmm2, OWORD PTR[rdi]
+ movdqa xmm3, OWORD PTR[rsi]
+
+ movdqa xmm6, OWORD PTR[rdi + 16]
+ movdqa xmm7, OWORD PTR[rsi + 16]
+
+ paddw xmm1, xmm2
+ paddw xmm5, xmm6
+
+ pmulhw xmm1, xmm3
+ pmulhw xmm5, xmm7
+
+ mov rsi, arg(2) ;qcoeff_ptr
+ pxor xmm6, xmm6
+
+ pxor xmm1, xmm0
+ pxor xmm5, xmm4
+
+ psubw xmm1, xmm0
+ psubw xmm5, xmm4
+
+ movdqa OWORD PTR[rsp + temp_qcoeff_lo], xmm1
+ movdqa OWORD PTR[rsp + temp_qcoeff_hi], xmm5
+
+ movdqa OWORD PTR[rsi], xmm6 ;zero qcoeff
+ movdqa OWORD PTR[rsi + 16], xmm6 ;zero qcoeff
+
+ xor rax, rax
+ mov rcx, -1
+
+ mov [rsp + eob], rcx
+ mov rsi, arg(9) ;zbin_boost_ptr
+
+ mov rbx, arg(4) ;default_zig_zag
+
+rq_zigzag_loop:
+ movsxd rcx, DWORD PTR[rbx + rax*4] ;now we have rc
+ movsx edi, WORD PTR [rsi] ;*zbin_boost_ptr aka zbin
+ lea rsi, [rsi + 2] ;zbin_boost_ptr++
+
+ movsx edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2]
+
+ sub edx, edi ;x - zbin
+ jl rq_zigzag_1
+
+ mov rdi, arg(2) ;qcoeff_ptr
+
+ movsx edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2]
+
+ cmp edx, 0
+ je rq_zigzag_1
+
+ mov WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc]
+
+ mov rsi, arg(9) ;zbin_boost_ptr
+ mov [rsp + eob], rax ;eob = i
+
+rq_zigzag_1:
+ movsxd rcx, DWORD PTR[rbx + rax*4 + 4]
+ movsx edi, WORD PTR [rsi] ;*zbin_boost_ptr aka zbin
+ lea rsi, [rsi + 2] ;zbin_boost_ptr++
+
+ movsx edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2]
+ lea rax, [rax + 1]
+
+ sub edx, edi ;x - zbin
+ jl rq_zigzag_1a
+
+ mov rdi, arg(2) ;qcoeff_ptr
+
+ movsx edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2]
+
+ cmp edx, 0
+ je rq_zigzag_1a
+
+ mov WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc]
+
+ mov rsi, arg(9) ;zbin_boost_ptr
+ mov [rsp + eob], rax ;eob = i
+
+rq_zigzag_1a:
+ movsxd rcx, DWORD PTR[rbx + rax*4 + 4]
+ movsx edi, WORD PTR [rsi] ;*zbin_boost_ptr aka zbin
+ lea rsi, [rsi + 2] ;zbin_boost_ptr++
+
+ movsx edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2]
+ lea rax, [rax + 1]
+
+ sub edx, edi ;x - zbin
+ jl rq_zigzag_1b
+
+ mov rdi, arg(2) ;qcoeff_ptr
+
+ movsx edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2]
+
+ cmp edx, 0
+ je rq_zigzag_1b
+
+ mov WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc]
+
+ mov rsi, arg(9) ;zbin_boost_ptr
+ mov [rsp + eob], rax ;eob = i
+
+rq_zigzag_1b:
+ movsxd rcx, DWORD PTR[rbx + rax*4 + 4]
+ movsx edi, WORD PTR [rsi] ;*zbin_boost_ptr aka zbin
+ lea rsi, [rsi + 2] ;zbin_boost_ptr++
+
+ movsx edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2]
+ lea rax, [rax + 1]
+
+ sub edx, edi ;x - zbin
+ jl rq_zigzag_1c
+
+ mov rdi, arg(2) ;qcoeff_ptr
+
+ movsx edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2]
+
+ cmp edx, 0
+ je rq_zigzag_1c
+
+ mov WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc]
+
+ mov rsi, arg(9) ;zbin_boost_ptr
+ mov [rsp + eob], rax ;eob = i
+
+rq_zigzag_1c:
+ lea rax, [rax + 1]
+
+ cmp rax, 16
+ jl rq_zigzag_loop
+
+ mov rdi, arg(2) ;qcoeff_ptr
+ mov rcx, arg(3) ;dequant_ptr
+ mov rsi, arg(7) ;dqcoeff_ptr
+
+ movdqa xmm2, OWORD PTR[rdi]
+ movdqa xmm3, OWORD PTR[rdi + 16]
+
+ movdqa xmm0, OWORD PTR[rcx]
+ movdqa xmm1, OWORD PTR[rcx + 16]
+
+ pmullw xmm0, xmm2
+ pmullw xmm1, xmm3
+
+ movdqa OWORD PTR[rsi], xmm0 ;store dqcoeff
+ movdqa OWORD PTR[rsi + 16], xmm1 ;store dqcoeff
+
+ mov rax, [rsp + eob]
+
+ movdqa xmm6, OWORD PTR[rsp + save_xmm6]
+ movdqa xmm7, OWORD PTR[rsp + save_xmm7]
+
+ add rax, 1
+
+ add rsp, vp8_regularquantizeb_stack_size
+ pop rsp
+
+ ; begin epilog
+ pop rbx
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr,
+; short *qcoeff_ptr,short *dequant_ptr,
+; short *scan_mask, short *round_ptr,
+; short *quant_ptr, short *dqcoeff_ptr);
+global sym(vp8_fast_quantize_b_impl_sse2)
+sym(vp8_fast_quantize_b_impl_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ push rsi
+ push rdi
+ push rbx
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+
+ %define save_xmm6 0
+ %define save_xmm7 16
+
+ %define vp8_fastquantizeb_stack_size save_xmm7 + 16
+
+ sub rsp, vp8_fastquantizeb_stack_size
+
+ movdqa XMMWORD PTR[rsp + save_xmm6], xmm6
+ movdqa XMMWORD PTR[rsp + save_xmm7], xmm7
+
+ mov rdx, arg(0) ;coeff_ptr
+ mov rcx, arg(2) ;dequant_ptr
+ mov rax, arg(3) ;scan_mask
+ mov rdi, arg(4) ;round_ptr
+ mov rsi, arg(5) ;quant_ptr
+
+ movdqa xmm0, XMMWORD PTR[rdx]
+ movdqa xmm4, XMMWORD PTR[rdx + 16]
+
+ movdqa xmm6, XMMWORD PTR[rdi] ;round lo
+ movdqa xmm7, XMMWORD PTR[rdi + 16] ;round hi
+
+ movdqa xmm1, xmm0
+ movdqa xmm5, xmm4
+
+ psraw xmm0, 15 ;sign of z (aka sz)
+ psraw xmm4, 15 ;sign of z (aka sz)
+
+ pxor xmm1, xmm0
+ pxor xmm5, xmm4
+ psubw xmm1, xmm0 ;x = abs(z)
+ psubw xmm5, xmm4 ;x = abs(z)
+
+ paddw xmm1, xmm6
+ paddw xmm5, xmm7
+
+ pmulhw xmm1, XMMWORD PTR[rsi]
+ pmulhw xmm5, XMMWORD PTR[rsi + 16]
+
+ mov rdi, arg(1) ;qcoeff_ptr
+ mov rsi, arg(6) ;dqcoeff_ptr
+
+ movdqa xmm6, XMMWORD PTR[rcx]
+ movdqa xmm7, XMMWORD PTR[rcx + 16]
+
+ pxor xmm1, xmm0
+ pxor xmm5, xmm4
+ psubw xmm1, xmm0
+ psubw xmm5, xmm4
+
+ movdqa XMMWORD PTR[rdi], xmm1
+ movdqa XMMWORD PTR[rdi + 16], xmm5
+
+ pmullw xmm6, xmm1
+ pmullw xmm7, xmm5
+
+ movdqa xmm2, XMMWORD PTR[rax]
+ movdqa xmm3, XMMWORD PTR[rax+16];
+
+ pxor xmm4, xmm4 ;clear all bits
+ pcmpeqw xmm1, xmm4
+ pcmpeqw xmm5, xmm4
+
+ pcmpeqw xmm4, xmm4 ;set all bits
+ pxor xmm1, xmm4
+ pxor xmm5, xmm4
+
+ psrlw xmm1, 15
+ psrlw xmm5, 15
+
+ pmaddwd xmm1, xmm2
+ pmaddwd xmm5, xmm3
+
+ movq xmm2, xmm1
+ movq xmm3, xmm5
+
+ psrldq xmm1, 8
+ psrldq xmm5, 8
+
+ paddd xmm1, xmm5
+ paddd xmm2, xmm3
+
+ paddd xmm1, xmm2
+ movq xmm5, xmm1
+
+ psrldq xmm1, 4
+ paddd xmm5, xmm1
+
+ movq rcx, xmm5
+ and rcx, 0xffff
+
+ xor rdx, rdx
+ sub rdx, rcx
+
+ bsr rax, rcx
+ inc rax
+
+ sar rdx, 31
+ and rax, rdx
+
+ movdqa XMMWORD PTR[rsi], xmm6 ;store dqcoeff
+ movdqa XMMWORD PTR[rsi + 16], xmm7 ;store dqcoeff
+
+ movdqa xmm6, XMMWORD PTR[rsp + save_xmm6]
+ movdqa xmm7, XMMWORD PTR[rsp + save_xmm7]
+
+ add rsp, vp8_fastquantizeb_stack_size
+ pop rsp
+
+ ; begin epilog
+ pop rbx
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm
new file mode 100755
index 000000000..2f33199e5
--- /dev/null
+++ b/vp8/encoder/x86/quantize_ssse3.asm
@@ -0,0 +1,114 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+
+;int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr
+; short *qcoeff_ptr,short *dequant_ptr,
+; short *round_ptr,
+; short *quant_ptr, short *dqcoeff_ptr);
+;
+global sym(vp8_fast_quantize_b_impl_ssse3)
+sym(vp8_fast_quantize_b_impl_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rdx, arg(0) ;coeff_ptr
+ mov rdi, arg(3) ;round_ptr
+ mov rsi, arg(4) ;quant_ptr
+
+ movdqa xmm0, [rdx]
+ movdqa xmm4, [rdx + 16]
+
+ movdqa xmm2, [rdi] ;round lo
+ movdqa xmm3, [rdi + 16] ;round hi
+
+ movdqa xmm1, xmm0
+ movdqa xmm5, xmm4
+
+ psraw xmm0, 15 ;sign of z (aka sz)
+ psraw xmm4, 15 ;sign of z (aka sz)
+
+ pabsw xmm1, xmm1
+ pabsw xmm5, xmm5
+
+ paddw xmm1, xmm2
+ paddw xmm5, xmm3
+
+ pmulhw xmm1, [rsi]
+ pmulhw xmm5, [rsi + 16]
+
+ mov rdi, arg(1) ;qcoeff_ptr
+ mov rcx, arg(2) ;dequant_ptr
+ mov rsi, arg(5) ;dqcoeff_ptr
+
+ pxor xmm1, xmm0
+ pxor xmm5, xmm4
+ psubw xmm1, xmm0
+ psubw xmm5, xmm4
+
+ movdqa [rdi], xmm1
+ movdqa [rdi + 16], xmm5
+
+ movdqa xmm2, [rcx]
+ movdqa xmm3, [rcx + 16]
+
+ pxor xmm4, xmm4
+ pmullw xmm2, xmm1
+ pmullw xmm3, xmm5
+
+ pcmpeqw xmm1, xmm4 ;non zero mask
+ pcmpeqw xmm5, xmm4 ;non zero mask
+ packsswb xmm1, xmm5
+ pshufb xmm1, [ GLOBAL(zz_shuf)]
+
+ pmovmskb edx, xmm1
+
+; xor ecx, ecx
+; mov eax, -1
+;find_eob_loop:
+; shr edx, 1
+; jc fq_skip
+; mov eax, ecx
+;fq_skip:
+; inc ecx
+; cmp ecx, 16
+; jne find_eob_loop
+ xor rdi, rdi
+ mov eax, -1
+ xor dx, ax ;flip the bits for bsr
+ bsr eax, edx
+
+ movdqa [rsi], xmm2 ;store dqcoeff
+ movdqa [rsi + 16], xmm3 ;store dqcoeff
+
+ sub edi, edx ;check for all zeros in bit mask
+ sar edi, 31 ;0 or -1
+ add eax, 1
+ and eax, edi ;if the bit mask was all zero,
+ ;then eob = 0
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+zz_shuf:
+ db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
diff --git a/vp8/encoder/x86/quantize_x86.h b/vp8/encoder/x86/quantize_x86.h
new file mode 100644
index 000000000..b5b22c022
--- /dev/null
+++ b/vp8/encoder/x86/quantize_x86.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+#ifndef QUANTIZE_X86_H
+#define QUANTIZE_X86_H
+
+
+/* Note:
+ *
+ * This platform is commonly built for runtime CPU detection. If you modify
+ * any of the function mappings present in this file, be sure to also update
+ * them in the function pointer initialization code
+ */
+#if HAVE_MMX
+
+#endif
+
+
+#if HAVE_SSE2
+extern prototype_quantize_block(vp8_regular_quantize_b_sse2);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+
+/* The sse2 quantizer has not been updated to match the new exact
+ * quantizer introduced in commit e04e2935
+ *#undef vp8_quantize_quantb
+ *#define vp8_quantize_quantb vp8_regular_quantize_b_sse2
+ */
+
+#endif
+
+#endif
+
+
+#endif
diff --git a/vp8/encoder/x86/sad_mmx.asm b/vp8/encoder/x86/sad_mmx.asm
index a825698e7..85cb023a4 100644
--- a/vp8/encoder/x86/sad_mmx.asm
+++ b/vp8/encoder/x86/sad_mmx.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
@@ -16,8 +17,6 @@ global sym(vp8_sad8x8_mmx)
global sym(vp8_sad4x4_mmx)
global sym(vp8_sad16x8_mmx)
-%idefine QWORD
-
;unsigned int vp8_sad16x16_mmx(
; unsigned char *src_ptr,
; int src_stride,
@@ -99,7 +98,7 @@ x16x16sad_mmx_loop:
psrlq mm0, 32
paddw mm7, mm0
- movd rax, mm7
+ movq rax, mm7
pop rdi
pop rsi
@@ -171,7 +170,7 @@ x8x16sad_mmx_loop:
psrlq mm0, 32
paddw mm7, mm0
- movd rax, mm7
+ movq rax, mm7
pop rdi
pop rsi
@@ -241,7 +240,7 @@ x8x8sad_mmx_loop:
psrlq mm0, 32
paddw mm7, mm0
- movd rax, mm7
+ movq rax, mm7
pop rdi
pop rsi
@@ -271,11 +270,11 @@ sym(vp8_sad4x4_mmx):
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
- movd mm0, QWORD PTR [rsi]
- movd mm1, QWORD PTR [rdi]
+ movd mm0, DWORD PTR [rsi]
+ movd mm1, DWORD PTR [rdi]
- movd mm2, QWORD PTR [rsi+rax]
- movd mm3, QWORD PTR [rdi+rdx]
+ movd mm2, DWORD PTR [rsi+rax]
+ movd mm3, DWORD PTR [rdi+rdx]
punpcklbw mm0, mm2
punpcklbw mm1, mm3
@@ -297,11 +296,11 @@ sym(vp8_sad4x4_mmx):
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rdx*2]
- movd mm4, QWORD PTR [rsi]
- movd mm5, QWORD PTR [rdi]
+ movd mm4, DWORD PTR [rsi]
+ movd mm5, DWORD PTR [rdi]
- movd mm6, QWORD PTR [rsi+rax]
- movd mm7, QWORD PTR [rdi+rdx]
+ movd mm6, DWORD PTR [rsi+rax]
+ movd mm7, DWORD PTR [rdi+rdx]
punpcklbw mm4, mm6
punpcklbw mm5, mm7
@@ -330,7 +329,7 @@ sym(vp8_sad4x4_mmx):
psrlq mm0, 32
paddw mm0, mm1
- movd rax, mm0
+ movq rax, mm0
pop rdi
pop rsi
@@ -417,7 +416,7 @@ x16x8sad_mmx_loop:
psrlq mm0, 32
paddw mm7, mm0
- movd rax, mm7
+ movq rax, mm7
pop rdi
pop rsi
diff --git a/vp8/encoder/x86/sad_sse2.asm b/vp8/encoder/x86/sad_sse2.asm
index 53240bbf1..39ed79604 100644
--- a/vp8/encoder/x86/sad_sse2.asm
+++ b/vp8/encoder/x86/sad_sse2.asm
@@ -1,17 +1,16 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
-%idefine QWORD
-
;unsigned int vp8_sad16x16_wmt(
; unsigned char *src_ptr,
; int src_stride,
@@ -74,7 +73,7 @@ x16x16sad_wmt_loop:
psrldq xmm7, 8
paddw xmm0, xmm7
- movd rax, xmm0
+ movq rax, xmm0
; begin epilog
pop rdi
@@ -112,7 +111,7 @@ sym(vp8_sad8x16_wmt):
x8x16sad_wmt_loop:
- movd rax, mm7
+ movq rax, mm7
cmp rax, arg(4)
jg x8x16sad_wmt_early_exit
@@ -134,7 +133,7 @@ x8x16sad_wmt_loop:
cmp rsi, rcx
jne x8x16sad_wmt_loop
- movd rax, mm7
+ movq rax, mm7
x8x16sad_wmt_early_exit:
@@ -173,7 +172,7 @@ sym(vp8_sad8x8_wmt):
x8x8sad_wmt_loop:
- movd rax, mm7
+ movq rax, mm7
cmp rax, arg(4)
jg x8x8sad_wmt_early_exit
@@ -189,7 +188,7 @@ x8x8sad_wmt_loop:
cmp rsi, rcx
jne x8x8sad_wmt_loop
- movd rax, mm7
+ movq rax, mm7
x8x8sad_wmt_early_exit:
; begin epilog
@@ -220,11 +219,11 @@ sym(vp8_sad4x4_wmt):
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
- movd mm0, QWORD PTR [rsi]
- movd mm1, QWORD PTR [rdi]
+ movd mm0, DWORD PTR [rsi]
+ movd mm1, DWORD PTR [rdi]
- movd mm2, QWORD PTR [rsi+rax]
- movd mm3, QWORD PTR [rdi+rdx]
+ movd mm2, DWORD PTR [rsi+rax]
+ movd mm3, DWORD PTR [rdi+rdx]
punpcklbw mm0, mm2
punpcklbw mm1, mm3
@@ -233,19 +232,19 @@ sym(vp8_sad4x4_wmt):
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rdx*2]
- movd mm4, QWORD PTR [rsi]
+ movd mm4, DWORD PTR [rsi]
- movd mm5, QWORD PTR [rdi]
- movd mm6, QWORD PTR [rsi+rax]
+ movd mm5, DWORD PTR [rdi]
+ movd mm6, DWORD PTR [rsi+rax]
- movd mm7, QWORD PTR [rdi+rdx]
+ movd mm7, DWORD PTR [rdi+rdx]
punpcklbw mm4, mm6
punpcklbw mm5, mm7
psadbw mm4, mm5
paddw mm0, mm4
- movd rax, mm0
+ movq rax, mm0
; begin epilog
pop rdi
@@ -282,7 +281,7 @@ sym(vp8_sad16x8_wmt):
x16x8sad_wmt_loop:
- movd rax, mm7
+ movq rax, mm7
cmp rax, arg(4)
jg x16x8sad_wmt_early_exit
@@ -316,7 +315,7 @@ x16x8sad_wmt_loop:
cmp rsi, rcx
jne x16x8sad_wmt_loop
- movd rax, mm7
+ movq rax, mm7
x16x8sad_wmt_early_exit:
diff --git a/vp8/encoder/x86/sad_sse3.asm b/vp8/encoder/x86/sad_sse3.asm
index 38cc02957..1b7293c20 100644
--- a/vp8/encoder/x86/sad_sse3.asm
+++ b/vp8/encoder/x86/sad_sse3.asm
@@ -1,32 +1,31 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
-%idefine QWORD
-
%macro PROCESS_16X2X3 1
%if %1
- movdqa xmm0, [rsi]
- lddqu xmm5, [rdi]
- lddqu xmm6, [rdi+1]
- lddqu xmm7, [rdi+2]
+ movdqa xmm0, XMMWORD PTR [rsi]
+ lddqu xmm5, XMMWORD PTR [rdi]
+ lddqu xmm6, XMMWORD PTR [rdi+1]
+ lddqu xmm7, XMMWORD PTR [rdi+2]
psadbw xmm5, xmm0
psadbw xmm6, xmm0
psadbw xmm7, xmm0
%else
- movdqa xmm0, [rsi]
- lddqu xmm1, [rdi]
- lddqu xmm2, [rdi+1]
- lddqu xmm3, [rdi+2]
+ movdqa xmm0, XMMWORD PTR [rsi]
+ lddqu xmm1, XMMWORD PTR [rdi]
+ lddqu xmm2, XMMWORD PTR [rdi+1]
+ lddqu xmm3, XMMWORD PTR [rdi+2]
psadbw xmm1, xmm0
psadbw xmm2, xmm0
@@ -36,10 +35,10 @@
paddw xmm6, xmm2
paddw xmm7, xmm3
%endif
- movdqa xmm0, QWORD PTR [rsi+rax]
- lddqu xmm1, QWORD PTR [rdi+rdx]
- lddqu xmm2, QWORD PTR [rdi+rdx+1]
- lddqu xmm3, QWORD PTR [rdi+rdx+2]
+ movdqa xmm0, XMMWORD PTR [rsi+rax]
+ lddqu xmm1, XMMWORD PTR [rdi+rdx]
+ lddqu xmm2, XMMWORD PTR [rdi+rdx+1]
+ lddqu xmm3, XMMWORD PTR [rdi+rdx+2]
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rdx*2]
@@ -55,19 +54,19 @@
%macro PROCESS_8X2X3 1
%if %1
- movq mm0, [rsi]
- movq mm5, [rdi]
- movq mm6, [rdi+1]
- movq mm7, [rdi+2]
+ movq mm0, QWORD PTR [rsi]
+ movq mm5, QWORD PTR [rdi]
+ movq mm6, QWORD PTR [rdi+1]
+ movq mm7, QWORD PTR [rdi+2]
psadbw mm5, mm0
psadbw mm6, mm0
psadbw mm7, mm0
%else
- movq mm0, [rsi]
- movq mm1, [rdi]
- movq mm2, [rdi+1]
- movq mm3, [rdi+2]
+ movq mm0, QWORD PTR [rsi]
+ movq mm1, QWORD PTR [rdi]
+ movq mm2, QWORD PTR [rdi+1]
+ movq mm3, QWORD PTR [rdi+2]
psadbw mm1, mm0
psadbw mm2, mm0
@@ -104,45 +103,45 @@
%macro PROCESS_16X2X4 1
%if %1
- movdqa xmm0, [rsi]
- lddqu xmm4, [rcx]
- lddqu xmm5, [rdx]
- lddqu xmm6, [rbx]
- lddqu xmm7, [rdi]
+ movdqa xmm0, XMMWORD PTR [rsi]
+ lddqu xmm4, XMMWORD PTR [rcx]
+ lddqu xmm5, XMMWORD PTR [rdx]
+ lddqu xmm6, XMMWORD PTR [rbx]
+ lddqu xmm7, XMMWORD PTR [rdi]
psadbw xmm4, xmm0
psadbw xmm5, xmm0
psadbw xmm6, xmm0
psadbw xmm7, xmm0
%else
- movdqa xmm0, [rsi]
- lddqu xmm1, [rcx]
- lddqu xmm2, [rdx]
- lddqu xmm3, [rbx]
+ movdqa xmm0, XMMWORD PTR [rsi]
+ lddqu xmm1, XMMWORD PTR [rcx]
+ lddqu xmm2, XMMWORD PTR [rdx]
+ lddqu xmm3, XMMWORD PTR [rbx]
psadbw xmm1, xmm0
psadbw xmm2, xmm0
psadbw xmm3, xmm0
paddw xmm4, xmm1
- lddqu xmm1, [rdi]
+ lddqu xmm1, XMMWORD PTR [rdi]
paddw xmm5, xmm2
paddw xmm6, xmm3
psadbw xmm1, xmm0
paddw xmm7, xmm1
%endif
- movdqa xmm0, QWORD PTR [rsi+rax]
- lddqu xmm1, QWORD PTR [rcx+rbp]
- lddqu xmm2, QWORD PTR [rdx+rbp]
- lddqu xmm3, QWORD PTR [rbx+rbp]
+ movdqa xmm0, XMMWORD PTR [rsi+rax]
+ lddqu xmm1, XMMWORD PTR [rcx+rbp]
+ lddqu xmm2, XMMWORD PTR [rdx+rbp]
+ lddqu xmm3, XMMWORD PTR [rbx+rbp]
psadbw xmm1, xmm0
psadbw xmm2, xmm0
psadbw xmm3, xmm0
paddw xmm4, xmm1
- lddqu xmm1, QWORD PTR [rdi+rbp]
+ lddqu xmm1, XMMWORD PTR [rdi+rbp]
paddw xmm5, xmm2
paddw xmm6, xmm3
@@ -161,28 +160,28 @@
%macro PROCESS_8X2X4 1
%if %1
- movq mm0, [rsi]
- movq mm4, [rcx]
- movq mm5, [rdx]
- movq mm6, [rbx]
- movq mm7, [rdi]
+ movq mm0, QWORD PTR [rsi]
+ movq mm4, QWORD PTR [rcx]
+ movq mm5, QWORD PTR [rdx]
+ movq mm6, QWORD PTR [rbx]
+ movq mm7, QWORD PTR [rdi]
psadbw mm4, mm0
psadbw mm5, mm0
psadbw mm6, mm0
psadbw mm7, mm0
%else
- movq mm0, [rsi]
- movq mm1, [rcx]
- movq mm2, [rdx]
- movq mm3, [rbx]
+ movq mm0, QWORD PTR [rsi]
+ movq mm1, QWORD PTR [rcx]
+ movq mm2, QWORD PTR [rdx]
+ movq mm3, QWORD PTR [rbx]
psadbw mm1, mm0
psadbw mm2, mm0
psadbw mm3, mm0
paddw mm4, mm1
- movq mm1, [rdi]
+ movq mm1, QWORD PTR [rdi]
paddw mm5, mm2
paddw mm6, mm3
@@ -429,20 +428,20 @@ sym(vp8_sad4x4x3_sse3):
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
- movd mm0, QWORD PTR [rsi]
- movd mm1, QWORD PTR [rdi]
+ movd mm0, DWORD PTR [rsi]
+ movd mm1, DWORD PTR [rdi]
- movd mm2, QWORD PTR [rsi+rax]
- movd mm3, QWORD PTR [rdi+rdx]
+ movd mm2, DWORD PTR [rsi+rax]
+ movd mm3, DWORD PTR [rdi+rdx]
punpcklbw mm0, mm2
punpcklbw mm1, mm3
- movd mm4, QWORD PTR [rdi+1]
- movd mm5, QWORD PTR [rdi+2]
+ movd mm4, DWORD PTR [rdi+1]
+ movd mm5, DWORD PTR [rdi+2]
- movd mm2, QWORD PTR [rdi+rdx+1]
- movd mm3, QWORD PTR [rdi+rdx+2]
+ movd mm2, DWORD PTR [rdi+rdx+1]
+ movd mm3, DWORD PTR [rdi+rdx+2]
psadbw mm1, mm0
@@ -457,24 +456,24 @@ sym(vp8_sad4x4x3_sse3):
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rdx*2]
- movd mm0, QWORD PTR [rsi]
- movd mm2, QWORD PTR [rdi]
+ movd mm0, DWORD PTR [rsi]
+ movd mm2, DWORD PTR [rdi]
- movd mm3, QWORD PTR [rsi+rax]
- movd mm6, QWORD PTR [rdi+rdx]
+ movd mm3, DWORD PTR [rsi+rax]
+ movd mm6, DWORD PTR [rdi+rdx]
punpcklbw mm0, mm3
punpcklbw mm2, mm6
- movd mm3, QWORD PTR [rdi+1]
- movd mm7, QWORD PTR [rdi+2]
+ movd mm3, DWORD PTR [rdi+1]
+ movd mm7, DWORD PTR [rdi+2]
psadbw mm2, mm0
paddw mm1, mm2
- movd mm2, QWORD PTR [rdi+rdx+1]
- movd mm6, QWORD PTR [rdi+rdx+2]
+ movd mm2, DWORD PTR [rdi+rdx+1]
+ movd mm6, DWORD PTR [rdi+rdx+2]
punpcklbw mm3, mm2
punpcklbw mm7, mm6
@@ -529,7 +528,7 @@ sym(vp8_sad16x16_sse3):
vp8_sad16x16_sse3_loop:
- movd rax, mm7
+ movq rax, mm7
cmp rax, arg(4)
jg vp8_sad16x16_early_exit
@@ -563,7 +562,7 @@ vp8_sad16x16_sse3_loop:
cmp rsi, rcx
jne vp8_sad16x16_sse3_loop
- movd rax, mm7
+ movq rax, mm7
vp8_sad16x16_early_exit:
@@ -845,23 +844,23 @@ sym(vp8_sad4x4x4d_sse3):
xchg rbx, rax
- movd mm0, QWORD PTR [rsi]
- movd mm1, QWORD PTR [rcx]
+ movd mm0, DWORD PTR [rsi]
+ movd mm1, DWORD PTR [rcx]
- movd mm2, QWORD PTR [rsi+rax]
- movd mm3, QWORD PTR [rcx+rbp]
+ movd mm2, DWORD PTR [rsi+rax]
+ movd mm3, DWORD PTR [rcx+rbp]
punpcklbw mm0, mm2
punpcklbw mm1, mm3
- movd mm4, QWORD PTR [rdx]
- movd mm5, QWORD PTR [rbx]
+ movd mm4, DWORD PTR [rdx]
+ movd mm5, DWORD PTR [rbx]
- movd mm6, QWORD PTR [rdi]
- movd mm2, QWORD PTR [rdx+rbp]
+ movd mm6, DWORD PTR [rdi]
+ movd mm2, DWORD PTR [rdx+rbp]
- movd mm3, QWORD PTR [rbx+rbp]
- movd mm7, QWORD PTR [rdi+rbp]
+ movd mm3, DWORD PTR [rbx+rbp]
+ movd mm7, DWORD PTR [rdi+rbp]
psadbw mm1, mm0
@@ -884,17 +883,17 @@ sym(vp8_sad4x4x4d_sse3):
lea rdi, [rdi+rbp*2]
- movd mm0, QWORD PTR [rsi]
- movd mm2, QWORD PTR [rcx]
+ movd mm0, DWORD PTR [rsi]
+ movd mm2, DWORD PTR [rcx]
- movd mm3, QWORD PTR [rsi+rax]
- movd mm7, QWORD PTR [rcx+rbp]
+ movd mm3, DWORD PTR [rsi+rax]
+ movd mm7, DWORD PTR [rcx+rbp]
punpcklbw mm0, mm3
punpcklbw mm2, mm7
- movd mm3, QWORD PTR [rdx]
- movd mm7, QWORD PTR [rbx]
+ movd mm3, DWORD PTR [rdx]
+ movd mm7, DWORD PTR [rbx]
psadbw mm2, mm0
mov rax, rbp
@@ -905,8 +904,8 @@ sym(vp8_sad4x4x4d_sse3):
paddw mm1, mm2
movd [rsi], mm1
- movd mm2, QWORD PTR [rdx+rax]
- movd mm1, QWORD PTR [rbx+rax]
+ movd mm2, DWORD PTR [rdx+rax]
+ movd mm1, DWORD PTR [rbx+rax]
punpcklbw mm3, mm2
punpcklbw mm7, mm1
@@ -914,8 +913,8 @@ sym(vp8_sad4x4x4d_sse3):
psadbw mm3, mm0
psadbw mm7, mm0
- movd mm2, QWORD PTR [rdi]
- movd mm1, QWORD PTR [rdi+rax]
+ movd mm2, DWORD PTR [rdi]
+ movd mm1, DWORD PTR [rdi+rax]
paddw mm3, mm4
paddw mm7, mm5
diff --git a/vp8/encoder/x86/sad_sse4.asm b/vp8/encoder/x86/sad_sse4.asm
new file mode 100644
index 000000000..21e2e5007
--- /dev/null
+++ b/vp8/encoder/x86/sad_sse4.asm
@@ -0,0 +1,353 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+%macro PROCESS_16X2X8 1
+%if %1
+ movdqa xmm0, XMMWORD PTR [rsi]
+ movq xmm1, MMWORD PTR [rdi]
+ movq xmm3, MMWORD PTR [rdi+8]
+ movq xmm2, MMWORD PTR [rdi+16]
+ punpcklqdq xmm1, xmm3
+ punpcklqdq xmm3, xmm2
+
+ movdqa xmm2, xmm1
+ mpsadbw xmm1, xmm0, 0x0
+ mpsadbw xmm2, xmm0, 0x5
+
+ psrldq xmm0, 8
+
+ movdqa xmm4, xmm3
+ mpsadbw xmm3, xmm0, 0x0
+ mpsadbw xmm4, xmm0, 0x5
+
+ paddw xmm1, xmm2
+ paddw xmm1, xmm3
+ paddw xmm1, xmm4
+%else
+ movdqa xmm0, XMMWORD PTR [rsi]
+ movq xmm5, MMWORD PTR [rdi]
+ movq xmm3, MMWORD PTR [rdi+8]
+ movq xmm2, MMWORD PTR [rdi+16]
+ punpcklqdq xmm5, xmm3
+ punpcklqdq xmm3, xmm2
+
+ movdqa xmm2, xmm5
+ mpsadbw xmm5, xmm0, 0x0
+ mpsadbw xmm2, xmm0, 0x5
+
+ psrldq xmm0, 8
+
+ movdqa xmm4, xmm3
+ mpsadbw xmm3, xmm0, 0x0
+ mpsadbw xmm4, xmm0, 0x5
+
+ paddw xmm5, xmm2
+ paddw xmm5, xmm3
+ paddw xmm5, xmm4
+
+ paddw xmm1, xmm5
+%endif
+ movdqa xmm0, XMMWORD PTR [rsi + rax]
+ movq xmm5, MMWORD PTR [rdi+ rdx]
+ movq xmm3, MMWORD PTR [rdi+ rdx+8]
+ movq xmm2, MMWORD PTR [rdi+ rdx+16]
+ punpcklqdq xmm5, xmm3
+ punpcklqdq xmm3, xmm2
+
+ lea rsi, [rsi+rax*2]
+ lea rdi, [rdi+rdx*2]
+
+ movdqa xmm2, xmm5
+ mpsadbw xmm5, xmm0, 0x0
+ mpsadbw xmm2, xmm0, 0x5
+
+ psrldq xmm0, 8
+ movdqa xmm4, xmm3
+ mpsadbw xmm3, xmm0, 0x0
+ mpsadbw xmm4, xmm0, 0x5
+
+ paddw xmm5, xmm2
+ paddw xmm5, xmm3
+ paddw xmm5, xmm4
+
+ paddw xmm1, xmm5
+%endmacro
+
+%macro PROCESS_8X2X8 1
+%if %1
+ movq xmm0, MMWORD PTR [rsi]
+ movq xmm1, MMWORD PTR [rdi]
+ movq xmm3, MMWORD PTR [rdi+8]
+ punpcklqdq xmm1, xmm3
+
+ movdqa xmm2, xmm1
+ mpsadbw xmm1, xmm0, 0x0
+ mpsadbw xmm2, xmm0, 0x5
+ paddw xmm1, xmm2
+%else
+ movq xmm0, MMWORD PTR [rsi]
+ movq xmm5, MMWORD PTR [rdi]
+ movq xmm3, MMWORD PTR [rdi+8]
+ punpcklqdq xmm5, xmm3
+
+ movdqa xmm2, xmm5
+ mpsadbw xmm5, xmm0, 0x0
+ mpsadbw xmm2, xmm0, 0x5
+ paddw xmm5, xmm2
+
+ paddw xmm1, xmm5
+%endif
+ movq xmm0, MMWORD PTR [rsi + rax]
+ movq xmm5, MMWORD PTR [rdi+ rdx]
+ movq xmm3, MMWORD PTR [rdi+ rdx+8]
+ punpcklqdq xmm5, xmm3
+
+ lea rsi, [rsi+rax*2]
+ lea rdi, [rdi+rdx*2]
+
+ movdqa xmm2, xmm5
+ mpsadbw xmm5, xmm0, 0x0
+ mpsadbw xmm2, xmm0, 0x5
+ paddw xmm5, xmm2
+
+ paddw xmm1, xmm5
+%endmacro
+
+%macro PROCESS_4X2X8 1
+%if %1
+ movd xmm0, [rsi]
+ movq xmm1, MMWORD PTR [rdi]
+ movq xmm3, MMWORD PTR [rdi+8]
+ punpcklqdq xmm1, xmm3
+
+ mpsadbw xmm1, xmm0, 0x0
+%else
+ movd xmm0, [rsi]
+ movq xmm5, MMWORD PTR [rdi]
+ movq xmm3, MMWORD PTR [rdi+8]
+ punpcklqdq xmm5, xmm3
+
+ mpsadbw xmm5, xmm0, 0x0
+
+ paddw xmm1, xmm5
+%endif
+ movd xmm0, [rsi + rax]
+ movq xmm5, MMWORD PTR [rdi+ rdx]
+ movq xmm3, MMWORD PTR [rdi+ rdx+8]
+ punpcklqdq xmm5, xmm3
+
+ lea rsi, [rsi+rax*2]
+ lea rdi, [rdi+rdx*2]
+
+ mpsadbw xmm5, xmm0, 0x0
+
+ paddw xmm1, xmm5
+%endmacro
+
+
+;void vp8_sad16x16x8_sse4(
+; const unsigned char *src_ptr,
+; int src_stride,
+; const unsigned char *ref_ptr,
+; int ref_stride,
+; unsigned short *sad_array);
+global sym(vp8_sad16x16x8_sse4)
+sym(vp8_sad16x16x8_sse4):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ PROCESS_16X2X8 1
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+
+ mov rdi, arg(4) ;Results
+ movdqu XMMWORD PTR [rdi], xmm1
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp8_sad16x8x8_sse4(
+; const unsigned char *src_ptr,
+; int src_stride,
+; const unsigned char *ref_ptr,
+; int ref_stride,
+; unsigned short *sad_array
+;);
+global sym(vp8_sad16x8x8_sse4)
+sym(vp8_sad16x8x8_sse4):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ PROCESS_16X2X8 1
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+
+ mov rdi, arg(4) ;Results
+ movdqu XMMWORD PTR [rdi], xmm1
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp8_sad8x8x8_sse4(
+; const unsigned char *src_ptr,
+; int src_stride,
+; const unsigned char *ref_ptr,
+; int ref_stride,
+; unsigned short *sad_array
+;);
+global sym(vp8_sad8x8x8_sse4)
+sym(vp8_sad8x8x8_sse4):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ PROCESS_8X2X8 1
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+
+ mov rdi, arg(4) ;Results
+ movdqu XMMWORD PTR [rdi], xmm1
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp8_sad8x16x8_sse4(
+; const unsigned char *src_ptr,
+; int src_stride,
+; const unsigned char *ref_ptr,
+; int ref_stride,
+; unsigned short *sad_array
+;);
+global sym(vp8_sad8x16x8_sse4)
+sym(vp8_sad8x16x8_sse4):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ PROCESS_8X2X8 1
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ mov rdi, arg(4) ;Results
+ movdqu XMMWORD PTR [rdi], xmm1
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp8_sad4x4x8_c(
+; const unsigned char *src_ptr,
+; int src_stride,
+; const unsigned char *ref_ptr,
+; int ref_stride,
+; unsigned short *sad_array
+;);
+global sym(vp8_sad4x4x8_sse4)
+sym(vp8_sad4x4x8_sse4):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
+
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
+
+ PROCESS_4X2X8 1
+ PROCESS_4X2X8 0
+
+ mov rdi, arg(4) ;Results
+ movdqu XMMWORD PTR [rdi], xmm1
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+
+
diff --git a/vp8/encoder/x86/sad_ssse3.asm b/vp8/encoder/x86/sad_ssse3.asm
index 1bb956121..69c5eaedc 100644
--- a/vp8/encoder/x86/sad_ssse3.asm
+++ b/vp8/encoder/x86/sad_ssse3.asm
@@ -1,32 +1,31 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
-%idefine QWORD
-
%macro PROCESS_16X2X3 1
%if %1
- movdqa xmm0, [rsi]
- lddqu xmm5, [rdi]
- lddqu xmm6, [rdi+1]
- lddqu xmm7, [rdi+2]
+ movdqa xmm0, XMMWORD PTR [rsi]
+ lddqu xmm5, XMMWORD PTR [rdi]
+ lddqu xmm6, XMMWORD PTR [rdi+1]
+ lddqu xmm7, XMMWORD PTR [rdi+2]
psadbw xmm5, xmm0
psadbw xmm6, xmm0
psadbw xmm7, xmm0
%else
- movdqa xmm0, [rsi]
- lddqu xmm1, [rdi]
- lddqu xmm2, [rdi+1]
- lddqu xmm3, [rdi+2]
+ movdqa xmm0, XMMWORD PTR [rsi]
+ lddqu xmm1, XMMWORD PTR [rdi]
+ lddqu xmm2, XMMWORD PTR [rdi+1]
+ lddqu xmm3, XMMWORD PTR [rdi+2]
psadbw xmm1, xmm0
psadbw xmm2, xmm0
@@ -36,10 +35,10 @@
paddw xmm6, xmm2
paddw xmm7, xmm3
%endif
- movdqa xmm0, QWORD PTR [rsi+rax]
- lddqu xmm1, QWORD PTR [rdi+rdx]
- lddqu xmm2, QWORD PTR [rdi+rdx+1]
- lddqu xmm3, QWORD PTR [rdi+rdx+2]
+ movdqa xmm0, XMMWORD PTR [rsi+rax]
+ lddqu xmm1, XMMWORD PTR [rdi+rdx]
+ lddqu xmm2, XMMWORD PTR [rdi+rdx+1]
+ lddqu xmm3, XMMWORD PTR [rdi+rdx+2]
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rdx*2]
@@ -55,9 +54,9 @@
%macro PROCESS_16X2X3_OFFSET 2
%if %1
- movdqa xmm0, [rsi]
- movdqa xmm4, [rdi]
- movdqa xmm7, [rdi+16]
+ movdqa xmm0, XMMWORD PTR [rsi]
+ movdqa xmm4, XMMWORD PTR [rdi]
+ movdqa xmm7, XMMWORD PTR [rdi+16]
movdqa xmm5, xmm7
palignr xmm5, xmm4, %2
@@ -71,9 +70,9 @@
psadbw xmm6, xmm0
psadbw xmm7, xmm0
%else
- movdqa xmm0, [rsi]
- movdqa xmm4, [rdi]
- movdqa xmm3, [rdi+16]
+ movdqa xmm0, XMMWORD PTR [rsi]
+ movdqa xmm4, XMMWORD PTR [rdi]
+ movdqa xmm3, XMMWORD PTR [rdi+16]
movdqa xmm1, xmm3
palignr xmm1, xmm4, %2
@@ -91,9 +90,9 @@
paddw xmm6, xmm2
paddw xmm7, xmm3
%endif
- movdqa xmm0, QWORD PTR [rsi+rax]
- movdqa xmm4, QWORD PTR [rdi+rdx]
- movdqa xmm3, QWORD PTR [rdi+rdx+16]
+ movdqa xmm0, XMMWORD PTR [rsi+rax]
+ movdqa xmm4, XMMWORD PTR [rdi+rdx]
+ movdqa xmm3, XMMWORD PTR [rdi+rdx+16]
movdqa xmm1, xmm3
palignr xmm1, xmm4, %2
diff --git a/vp8/encoder/x86/subtract_mmx.asm b/vp8/encoder/x86/subtract_mmx.asm
index ce3e61066..a47e1f0d6 100644
--- a/vp8/encoder/x86/subtract_mmx.asm
+++ b/vp8/encoder/x86/subtract_mmx.asm
@@ -1,20 +1,21 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
;void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride,
-; unsigned short *diff, unsigned char *Predictor,
+; short *diff, unsigned char *Predictor,
; int pitch);
global sym(vp8_subtract_b_mmx_impl)
-sym(vp8_subtract_b_mmx_impl)
+sym(vp8_subtract_b_mmx_impl):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
@@ -150,7 +151,7 @@ submby_loop:
;void vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
global sym(vp8_subtract_mbuv_mmx)
-sym(vp8_subtract_mbuv_mmx)
+sym(vp8_subtract_mbuv_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
diff --git a/vp8/encoder/x86/subtract_sse2.asm b/vp8/encoder/x86/subtract_sse2.asm
new file mode 100644
index 000000000..3fb23d097
--- /dev/null
+++ b/vp8/encoder/x86/subtract_sse2.asm
@@ -0,0 +1,356 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride,
+; short *diff, unsigned char *Predictor,
+; int pitch);
+global sym(vp8_subtract_b_sse2_impl)
+sym(vp8_subtract_b_sse2_impl):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rdi, arg(2) ;diff
+ mov rax, arg(3) ;Predictor
+ mov rsi, arg(0) ;z
+ movsxd rdx, dword ptr arg(1);src_stride;
+ movsxd rcx, dword ptr arg(4);pitch
+ pxor mm7, mm7
+
+ movd mm0, [rsi]
+ movd mm1, [rax]
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ psubw mm0, mm1
+ movq MMWORD PTR [rdi], mm0
+
+ movd mm0, [rsi+rdx]
+ movd mm1, [rax+rcx]
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ psubw mm0, mm1
+ movq MMWORD PTR [rdi+rcx*2], mm0
+
+ movd mm0, [rsi+rdx*2]
+ movd mm1, [rax+rcx*2]
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ psubw mm0, mm1
+ movq MMWORD PTR [rdi+rcx*4], mm0
+
+ lea rsi, [rsi+rdx*2]
+ lea rcx, [rcx+rcx*2]
+
+ movd mm0, [rsi+rdx]
+ movd mm1, [rax+rcx]
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ psubw mm0, mm1
+ movq MMWORD PTR [rdi+rcx*2], mm0
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, unsigned char *pred, int stride)
+global sym(vp8_subtract_mby_sse2)
+sym(vp8_subtract_mby_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ SAVE_XMM
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(1) ;src
+ mov rdi, arg(0) ;diff
+
+ mov rax, arg(2) ;pred
+ movsxd rdx, dword ptr arg(3) ;stride
+
+ mov rcx, 8 ; do two lines at one time
+
+submby_loop:
+ movdqa xmm0, XMMWORD PTR [rsi] ; src
+ movdqa xmm1, XMMWORD PTR [rax] ; pred
+
+ movdqa xmm2, xmm0
+ psubb xmm0, xmm1
+
+ pxor xmm1, [GLOBAL(t80)] ;convert to signed values
+ pxor xmm2, [GLOBAL(t80)]
+ pcmpgtb xmm1, xmm2 ; obtain sign information
+
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ punpcklbw xmm0, xmm1 ; put sign back to subtraction
+ punpckhbw xmm2, xmm3 ; put sign back to subtraction
+
+ movdqa XMMWORD PTR [rdi], xmm0
+ movdqa XMMWORD PTR [rdi +16], xmm2
+
+ movdqa xmm4, XMMWORD PTR [rsi + rdx]
+ movdqa xmm5, XMMWORD PTR [rax + 16]
+
+ movdqa xmm6, xmm4
+ psubb xmm4, xmm5
+
+ pxor xmm5, [GLOBAL(t80)] ;convert to signed values
+ pxor xmm6, [GLOBAL(t80)]
+ pcmpgtb xmm5, xmm6 ; obtain sign information
+
+ movdqa xmm6, xmm4
+ movdqa xmm7, xmm5
+ punpcklbw xmm4, xmm5 ; put sign back to subtraction
+ punpckhbw xmm6, xmm7 ; put sign back to subtraction
+
+ movdqa XMMWORD PTR [rdi +32], xmm4
+ movdqa XMMWORD PTR [rdi +48], xmm6
+
+ add rdi, 64
+ add rax, 32
+ lea rsi, [rsi+rdx*2]
+
+ sub rcx, 1
+ jnz submby_loop
+
+ pop rdi
+ pop rsi
+ ; begin epilog
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
+global sym(vp8_subtract_mbuv_sse2)
+sym(vp8_subtract_mbuv_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rdi, arg(0) ;diff
+ mov rax, arg(3) ;pred
+ mov rsi, arg(1) ;z = usrc
+ add rdi, 256*2 ;diff = diff + 256 (shorts)
+ add rax, 256 ;Predictor = pred + 256
+ movsxd rdx, dword ptr arg(4) ;stride;
+ lea rcx, [rdx + rdx*2]
+
+ ;u
+ ;line 0 1
+ movq xmm0, MMWORD PTR [rsi] ; src
+ movq xmm2, MMWORD PTR [rsi+rdx]
+ movdqa xmm1, XMMWORD PTR [rax] ; pred
+ punpcklqdq xmm0, xmm2
+
+ movdqa xmm2, xmm0
+ psubb xmm0, xmm1 ; subtraction with sign missed
+
+ pxor xmm1, [GLOBAL(t80)] ;convert to signed values
+ pxor xmm2, [GLOBAL(t80)]
+ pcmpgtb xmm1, xmm2 ; obtain sign information
+
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ punpcklbw xmm0, xmm1 ; put sign back to subtraction
+ punpckhbw xmm2, xmm3 ; put sign back to subtraction
+
+ movdqa XMMWORD PTR [rdi], xmm0
+ movdqa XMMWORD PTR [rdi +16], xmm2
+
+ ;line 2 3
+ movq xmm0, MMWORD PTR [rsi+rdx*2] ; src
+ movq xmm2, MMWORD PTR [rsi+rcx]
+ movdqa xmm1, XMMWORD PTR [rax+16] ; pred
+ punpcklqdq xmm0, xmm2
+
+ movdqa xmm2, xmm0
+ psubb xmm0, xmm1 ; subtraction with sign missed
+
+ pxor xmm1, [GLOBAL(t80)] ;convert to signed values
+ pxor xmm2, [GLOBAL(t80)]
+ pcmpgtb xmm1, xmm2 ; obtain sign information
+
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ punpcklbw xmm0, xmm1 ; put sign back to subtraction
+ punpckhbw xmm2, xmm3 ; put sign back to subtraction
+
+ movdqa XMMWORD PTR [rdi + 32], xmm0
+ movdqa XMMWORD PTR [rdi + 48], xmm2
+
+ ;line 4 5
+ lea rsi, [rsi + rdx*4]
+
+ movq xmm0, MMWORD PTR [rsi] ; src
+ movq xmm2, MMWORD PTR [rsi+rdx]
+ movdqa xmm1, XMMWORD PTR [rax + 32] ; pred
+ punpcklqdq xmm0, xmm2
+
+ movdqa xmm2, xmm0
+ psubb xmm0, xmm1 ; subtraction with sign missed
+
+ pxor xmm1, [GLOBAL(t80)] ;convert to signed values
+ pxor xmm2, [GLOBAL(t80)]
+ pcmpgtb xmm1, xmm2 ; obtain sign information
+
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ punpcklbw xmm0, xmm1 ; put sign back to subtraction
+ punpckhbw xmm2, xmm3 ; put sign back to subtraction
+
+ movdqa XMMWORD PTR [rdi + 64], xmm0
+ movdqa XMMWORD PTR [rdi + 80], xmm2
+
+ ;line 6 7
+ movq xmm0, MMWORD PTR [rsi+rdx*2] ; src
+ movq xmm2, MMWORD PTR [rsi+rcx]
+ movdqa xmm1, XMMWORD PTR [rax+ 48] ; pred
+ punpcklqdq xmm0, xmm2
+
+ movdqa xmm2, xmm0
+ psubb xmm0, xmm1 ; subtraction with sign missed
+
+ pxor xmm1, [GLOBAL(t80)] ;convert to signed values
+ pxor xmm2, [GLOBAL(t80)]
+ pcmpgtb xmm1, xmm2 ; obtain sign information
+
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ punpcklbw xmm0, xmm1 ; put sign back to subtraction
+ punpckhbw xmm2, xmm3 ; put sign back to subtraction
+
+ movdqa XMMWORD PTR [rdi + 96], xmm0
+ movdqa XMMWORD PTR [rdi + 112], xmm2
+
+ ;v
+ mov rsi, arg(2) ;z = vsrc
+ add rdi, 64*2 ;diff = diff + 320 (shorts)
+ add rax, 64 ;Predictor = pred + 320
+
+ ;line 0 1
+ movq xmm0, MMWORD PTR [rsi] ; src
+ movq xmm2, MMWORD PTR [rsi+rdx]
+ movdqa xmm1, XMMWORD PTR [rax] ; pred
+ punpcklqdq xmm0, xmm2
+
+ movdqa xmm2, xmm0
+ psubb xmm0, xmm1 ; subtraction with sign missed
+
+ pxor xmm1, [GLOBAL(t80)] ;convert to signed values
+ pxor xmm2, [GLOBAL(t80)]
+ pcmpgtb xmm1, xmm2 ; obtain sign information
+
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ punpcklbw xmm0, xmm1 ; put sign back to subtraction
+ punpckhbw xmm2, xmm3 ; put sign back to subtraction
+
+ movdqa XMMWORD PTR [rdi], xmm0
+ movdqa XMMWORD PTR [rdi +16], xmm2
+
+ ;line 2 3
+ movq xmm0, MMWORD PTR [rsi+rdx*2] ; src
+ movq xmm2, MMWORD PTR [rsi+rcx]
+ movdqa xmm1, XMMWORD PTR [rax+16] ; pred
+ punpcklqdq xmm0, xmm2
+
+ movdqa xmm2, xmm0
+ psubb xmm0, xmm1 ; subtraction with sign missed
+
+ pxor xmm1, [GLOBAL(t80)] ;convert to signed values
+ pxor xmm2, [GLOBAL(t80)]
+ pcmpgtb xmm1, xmm2 ; obtain sign information
+
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ punpcklbw xmm0, xmm1 ; put sign back to subtraction
+ punpckhbw xmm2, xmm3 ; put sign back to subtraction
+
+ movdqa XMMWORD PTR [rdi + 32], xmm0
+ movdqa XMMWORD PTR [rdi + 48], xmm2
+
+ ;line 4 5
+ lea rsi, [rsi + rdx*4]
+
+ movq xmm0, MMWORD PTR [rsi] ; src
+ movq xmm2, MMWORD PTR [rsi+rdx]
+ movdqa xmm1, XMMWORD PTR [rax + 32] ; pred
+ punpcklqdq xmm0, xmm2
+
+ movdqa xmm2, xmm0
+ psubb xmm0, xmm1 ; subtraction with sign missed
+
+ pxor xmm1, [GLOBAL(t80)] ;convert to signed values
+ pxor xmm2, [GLOBAL(t80)]
+ pcmpgtb xmm1, xmm2 ; obtain sign information
+
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ punpcklbw xmm0, xmm1 ; put sign back to subtraction
+ punpckhbw xmm2, xmm3 ; put sign back to subtraction
+
+ movdqa XMMWORD PTR [rdi + 64], xmm0
+ movdqa XMMWORD PTR [rdi + 80], xmm2
+
+ ;line 6 7
+ movq xmm0, MMWORD PTR [rsi+rdx*2] ; src
+ movq xmm2, MMWORD PTR [rsi+rcx]
+ movdqa xmm1, XMMWORD PTR [rax+ 48] ; pred
+ punpcklqdq xmm0, xmm2
+
+ movdqa xmm2, xmm0
+ psubb xmm0, xmm1 ; subtraction with sign missed
+
+ pxor xmm1, [GLOBAL(t80)] ;convert to signed values
+ pxor xmm2, [GLOBAL(t80)]
+ pcmpgtb xmm1, xmm2 ; obtain sign information
+
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ punpcklbw xmm0, xmm1 ; put sign back to subtraction
+ punpckhbw xmm2, xmm3 ; put sign back to subtraction
+
+ movdqa XMMWORD PTR [rdi + 96], xmm0
+ movdqa XMMWORD PTR [rdi + 112], xmm2
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+t80:
+ times 16 db 0x80
diff --git a/vp8/encoder/x86/variance_impl_mmx.asm b/vp8/encoder/x86/variance_impl_mmx.asm
index d0da82ad4..67a9b4d3e 100644
--- a/vp8/encoder/x86/variance_impl_mmx.asm
+++ b/vp8/encoder/x86/variance_impl_mmx.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
@@ -497,7 +498,7 @@ sym(vp8_get4x4sse_cs_mmx):
psrlq mm7, 32
paddd mm0, mm7
- movd rax, mm0
+ movq rax, mm0
; begin epilog
@@ -555,7 +556,7 @@ sym(vp8_filter_block2d_bil4x4_var_mmx):
pmullw mm3, [rax+8] ;
paddw mm1, mm3 ;
- paddw mm1, [mmx_bi_rd GLOBAL] ;
+ paddw mm1, [GLOBAL(mmx_bi_rd)] ;
psraw mm1, mmx_filter_shift ;
movq mm5, mm1
@@ -579,7 +580,7 @@ filter_block2d_bil4x4_var_mmx_loop:
pmullw mm3, [rax+8] ;
paddw mm1, mm3 ;
- paddw mm1, [mmx_bi_rd GLOBAL] ;
+ paddw mm1, [GLOBAL(mmx_bi_rd)] ;
psraw mm1, mmx_filter_shift ;
movq mm3, mm5 ;
@@ -591,7 +592,7 @@ filter_block2d_bil4x4_var_mmx_loop:
paddw mm1, mm3 ;
- paddw mm1, [mmx_bi_rd GLOBAL] ;
+ paddw mm1, [GLOBAL(mmx_bi_rd)] ;
psraw mm1, mmx_filter_shift ;
movd mm3, [rdi] ;
@@ -709,10 +710,10 @@ sym(vp8_filter_block2d_bil_var_mmx):
paddw mm1, mm3 ;
paddw mm2, mm4 ;
- paddw mm1, [mmx_bi_rd GLOBAL] ;
+ paddw mm1, [GLOBAL(mmx_bi_rd)] ;
psraw mm1, mmx_filter_shift ;
- paddw mm2, [mmx_bi_rd GLOBAL] ;
+ paddw mm2, [GLOBAL(mmx_bi_rd)] ;
psraw mm2, mmx_filter_shift ;
movq mm5, mm1
@@ -748,10 +749,10 @@ filter_block2d_bil_var_mmx_loop:
paddw mm1, mm3 ;
paddw mm2, mm4 ;
- paddw mm1, [mmx_bi_rd GLOBAL] ;
+ paddw mm1, [GLOBAL(mmx_bi_rd)] ;
psraw mm1, mmx_filter_shift ;
- paddw mm2, [mmx_bi_rd GLOBAL] ;
+ paddw mm2, [GLOBAL(mmx_bi_rd)] ;
psraw mm2, mmx_filter_shift ;
movq mm3, mm5 ;
@@ -772,8 +773,8 @@ filter_block2d_bil_var_mmx_loop:
paddw mm1, mm3 ;
paddw mm2, mm4 ;
- paddw mm1, [mmx_bi_rd GLOBAL] ;
- paddw mm2, [mmx_bi_rd GLOBAL] ;
+ paddw mm1, [GLOBAL(mmx_bi_rd)] ;
+ paddw mm2, [GLOBAL(mmx_bi_rd)] ;
psraw mm1, mmx_filter_shift ;
psraw mm2, mmx_filter_shift ;
diff --git a/vp8/encoder/x86/variance_impl_sse2.asm b/vp8/encoder/x86/variance_impl_sse2.asm
index 7e5ee284b..cefa0a956 100644
--- a/vp8/encoder/x86/variance_impl_sse2.asm
+++ b/vp8/encoder/x86/variance_impl_sse2.asm
@@ -1,10 +1,11 @@
;
-; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
;
@@ -57,7 +58,7 @@ NEXTROW:
movdqa xmm3,xmm4
psrldq xmm4,4
paddd xmm4,xmm3
- movd rax,xmm4
+ movq rax,xmm4
; begin epilog
@@ -470,7 +471,7 @@ sym(vp8_get8x8var_sse2):
mov rax, arg(5) ;[Sum]
mov rdi, arg(4) ;[SSE]
- movd rdx, xmm7
+ movq rdx, xmm7
movsx rcx, dx
mov dword ptr [rax], ecx
@@ -531,7 +532,7 @@ sym(vp8_filter_block2d_bil_var_sse2):
pmullw xmm3, [rax+16] ;
paddw xmm1, xmm3 ;
- paddw xmm1, [xmm_bi_rd GLOBAL] ;
+ paddw xmm1, [GLOBAL(xmm_bi_rd)] ;
psraw xmm1, xmm_filter_shift ;
movdqa xmm5, xmm1
@@ -553,7 +554,7 @@ filter_block2d_bil_var_sse2_loop:
pmullw xmm3, [rax+16] ;
paddw xmm1, xmm3 ;
- paddw xmm1, [xmm_bi_rd GLOBAL] ;
+ paddw xmm1, [GLOBAL(xmm_bi_rd)] ;
psraw xmm1, xmm_filter_shift ;
movdqa xmm3, xmm5 ;
@@ -564,7 +565,7 @@ filter_block2d_bil_var_sse2_loop:
pmullw xmm1, [rdx+16] ;
paddw xmm1, xmm3 ;
- paddw xmm1, [xmm_bi_rd GLOBAL] ;
+ paddw xmm1, [GLOBAL(xmm_bi_rd)] ;
psraw xmm1, xmm_filter_shift ;
movq xmm3, QWORD PTR [rdi] ;
diff --git a/vp8/encoder/x86/variance_mmx.c b/vp8/encoder/x86/variance_mmx.c
index 4a5b25b0d..2df73a635 100644
--- a/vp8/encoder/x86/variance_mmx.c
+++ b/vp8/encoder/x86/variance_mmx.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -14,7 +15,7 @@
extern void filter_block1d_h6_mmx
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
unsigned short *output_ptr,
unsigned int src_pixels_per_line,
unsigned int pixel_step,
@@ -24,7 +25,7 @@ extern void filter_block1d_h6_mmx
);
extern void filter_block1d_v6_mmx
(
- short *src_ptr,
+ const short *src_ptr,
unsigned char *output_ptr,
unsigned int pixels_per_line,
unsigned int pixel_step,
@@ -36,34 +37,34 @@ extern void filter_block1d_v6_mmx
extern unsigned int vp8_get_mb_ss_mmx(short *src_ptr);
extern unsigned int vp8_get8x8var_mmx
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *SSE,
int *Sum
);
extern unsigned int vp8_get4x4var_mmx
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *SSE,
int *Sum
);
extern unsigned int vp8_get4x4sse_cs_mmx
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride
);
extern void vp8_filter_block2d_bil4x4_var_mmx
(
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int ref_pixels_per_line,
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
const short *HFilter,
const short *VFilter,
@@ -72,9 +73,9 @@ extern void vp8_filter_block2d_bil4x4_var_mmx
);
extern void vp8_filter_block2d_bil_var_mmx
(
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int ref_pixels_per_line,
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
unsigned int Height,
const short *HFilter,
@@ -125,9 +126,9 @@ void vp8_test_get_mb_ss(void)
unsigned int vp8_get16x16var_mmx(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned *SSE,
unsigned *SUM
@@ -156,9 +157,9 @@ unsigned int vp8_get16x16var_mmx(
unsigned int vp8_variance4x4_mmx(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
@@ -172,9 +173,9 @@ unsigned int vp8_variance4x4_mmx(
}
unsigned int vp8_variance8x8_mmx(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
@@ -189,9 +190,9 @@ unsigned int vp8_variance8x8_mmx(
}
unsigned int vp8_mse16x16_mmx(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
@@ -211,9 +212,9 @@ unsigned int vp8_mse16x16_mmx(
unsigned int vp8_variance16x16_mmx(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
int *sse)
{
@@ -233,9 +234,9 @@ unsigned int vp8_variance16x16_mmx(
}
unsigned int vp8_variance16x8_mmx(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
@@ -254,9 +255,9 @@ unsigned int vp8_variance16x8_mmx(
unsigned int vp8_variance8x16_mmx(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
@@ -295,11 +296,11 @@ DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[8][8]) =
unsigned int vp8_sub_pixel_variance4x4_mmx
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
- unsigned char *dst_ptr,
+ const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse)
@@ -319,11 +320,11 @@ unsigned int vp8_sub_pixel_variance4x4_mmx
unsigned int vp8_sub_pixel_variance8x8_mmx
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
- unsigned char *dst_ptr,
+ const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
@@ -343,11 +344,11 @@ unsigned int vp8_sub_pixel_variance8x8_mmx
unsigned int vp8_sub_pixel_variance16x16_mmx
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
- unsigned char *dst_ptr,
+ const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
@@ -382,11 +383,11 @@ unsigned int vp8_sub_pixel_variance16x16_mmx
}
unsigned int vp8_sub_pixel_mse16x16_mmx(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
- unsigned char *dst_ptr,
+ const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
@@ -397,11 +398,11 @@ unsigned int vp8_sub_pixel_mse16x16_mmx(
unsigned int vp8_sub_pixel_variance16x8_mmx
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
- unsigned char *dst_ptr,
+ const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
@@ -434,11 +435,11 @@ unsigned int vp8_sub_pixel_variance16x8_mmx
unsigned int vp8_sub_pixel_variance8x16_mmx
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
- unsigned char *dst_ptr,
+ const unsigned char *dst_ptr,
int dst_pixels_per_line,
int *sse
)
@@ -456,9 +457,9 @@ unsigned int vp8_sub_pixel_variance8x16_mmx
}
unsigned int vp8_i_variance16x16_mmx(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
@@ -479,9 +480,9 @@ unsigned int vp8_i_variance16x16_mmx(
}
unsigned int vp8_i_variance8x16_mmx(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
@@ -500,11 +501,11 @@ unsigned int vp8_i_variance8x16_mmx(
unsigned int vp8_i_sub_pixel_variance16x16_mmx
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
- unsigned char *dst_ptr,
+ const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
@@ -559,11 +560,11 @@ unsigned int vp8_i_sub_pixel_variance16x16_mmx
unsigned int vp8_i_sub_pixel_variance8x16_mmx
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
- unsigned char *dst_ptr,
+ const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
@@ -594,3 +595,39 @@ unsigned int vp8_i_sub_pixel_variance8x16_mmx
*sse = xxsum0;
return (xxsum0 - ((xsum0 * xsum0) >> 7));
}
+
+
+unsigned int vp8_variance_halfpixvar16x16_h_mmx(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse)
+{
+ return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0,
+ ref_ptr, recon_stride, sse);
+}
+
+
+unsigned int vp8_variance_halfpixvar16x16_v_mmx(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse)
+{
+ return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4,
+ ref_ptr, recon_stride, sse);
+}
+
+
+unsigned int vp8_variance_halfpixvar16x16_hv_mmx(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse)
+{
+ return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4,
+ ref_ptr, recon_stride, sse);
+}
diff --git a/vp8/encoder/x86/variance_sse2.c b/vp8/encoder/x86/variance_sse2.c
index ea80753bd..006e0a24a 100644
--- a/vp8/encoder/x86/variance_sse2.c
+++ b/vp8/encoder/x86/variance_sse2.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -12,16 +13,16 @@
#include "pragmas.h"
#include "vpx_ports/mem.h"
-extern void filter_block1d_h6_mmx(unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
-extern void filter_block1d_v6_mmx(short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
-extern void filter_block1d8_h6_sse2(unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
-extern void filter_block1d8_v6_sse2(short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
+extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
+extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
+extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
+extern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
extern void vp8_filter_block2d_bil4x4_var_mmx
(
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int ref_pixels_per_line,
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
const short *HFilter,
const short *VFilter,
@@ -31,9 +32,9 @@ extern void vp8_filter_block2d_bil4x4_var_mmx
extern unsigned int vp8_get4x4var_mmx
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *SSE,
int *Sum
@@ -41,38 +42,38 @@ extern unsigned int vp8_get4x4var_mmx
unsigned int vp8_get_mb_ss_sse2
(
- short *src_ptr
+ const short *src_ptr
);
unsigned int vp8_get16x16var_sse2
(
- unsigned char *src_ptr,
- int source_stride,
- unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *SSE,
- int *Sum
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *SSE,
+ int *Sum
);
unsigned int vp8_get16x16pred_error_sse2
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int ref_stride
);
unsigned int vp8_get8x8var_sse2
(
- unsigned char *src_ptr,
- int source_stride,
- unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *SSE,
- int *Sum
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *SSE,
+ int *Sum
);
void vp8_filter_block2d_bil_var_sse2
(
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int ref_pixels_per_line,
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
unsigned int Height,
const short *HFilter,
@@ -82,9 +83,9 @@ void vp8_filter_block2d_bil_var_sse2
);
void vp8_half_horiz_vert_variance16x_h_sse2
(
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int ref_pixels_per_line,
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
unsigned int Height,
int *sum,
@@ -92,9 +93,9 @@ void vp8_half_horiz_vert_variance16x_h_sse2
);
void vp8_half_horiz_variance16x_h_sse2
(
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int ref_pixels_per_line,
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
unsigned int Height,
int *sum,
@@ -102,9 +103,9 @@ void vp8_half_horiz_variance16x_h_sse2
);
void vp8_half_vert_variance16x_h_sse2
(
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int ref_pixels_per_line,
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
unsigned int Height,
int *sum,
@@ -114,9 +115,9 @@ void vp8_half_vert_variance16x_h_sse2
DECLARE_ALIGNED(16, extern short, vp8_vp7_bilinear_filters_mmx[8][8]);
unsigned int vp8_variance4x4_wmt(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride)
{
unsigned int var;
@@ -131,9 +132,9 @@ unsigned int vp8_variance4x4_wmt(
unsigned int vp8_variance8x8_wmt
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride)
{
unsigned int var;
@@ -148,9 +149,9 @@ unsigned int vp8_variance8x8_wmt
unsigned int vp8_variance16x16_wmt
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
@@ -163,9 +164,9 @@ unsigned int vp8_variance16x16_wmt
return (sse0 - ((sum0 * sum0) >> 8));
}
unsigned int vp8_mse16x16_wmt(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
@@ -181,9 +182,9 @@ unsigned int vp8_mse16x16_wmt(
unsigned int vp8_variance16x8_wmt
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
@@ -202,9 +203,9 @@ unsigned int vp8_variance16x8_wmt
unsigned int vp8_variance8x16_wmt
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
@@ -238,11 +239,11 @@ DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_xmm[8][16]) =
};
unsigned int vp8_sub_pixel_variance4x4_wmt
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
- unsigned char *dst_ptr,
+ const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
@@ -262,11 +263,11 @@ unsigned int vp8_sub_pixel_variance4x4_wmt
unsigned int vp8_sub_pixel_variance8x8_wmt
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
- unsigned char *dst_ptr,
+ const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
@@ -287,11 +288,11 @@ unsigned int vp8_sub_pixel_variance8x8_wmt
unsigned int vp8_sub_pixel_variance16x16_wmt
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
- unsigned char *dst_ptr,
+ const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
@@ -363,11 +364,11 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
}
unsigned int vp8_sub_pixel_mse16x16_wmt(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
- unsigned char *dst_ptr,
+ const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
@@ -378,11 +379,11 @@ unsigned int vp8_sub_pixel_mse16x16_wmt(
unsigned int vp8_sub_pixel_variance16x8_wmt
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
- unsigned char *dst_ptr,
+ const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
@@ -416,11 +417,11 @@ unsigned int vp8_sub_pixel_variance16x8_wmt
unsigned int vp8_sub_pixel_variance8x16_wmt
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
- unsigned char *dst_ptr,
+ const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
@@ -439,9 +440,9 @@ unsigned int vp8_sub_pixel_variance8x16_wmt
}
unsigned int vp8_i_variance16x16_wmt(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
@@ -463,9 +464,9 @@ unsigned int vp8_i_variance16x16_wmt(
}
unsigned int vp8_i_variance8x16_wmt(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int source_stride,
- unsigned char *ref_ptr,
+ const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
@@ -485,11 +486,11 @@ unsigned int vp8_i_variance8x16_wmt(
unsigned int vp8_i_sub_pixel_variance16x16_wmt
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
- unsigned char *dst_ptr,
+ const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
@@ -500,11 +501,11 @@ unsigned int vp8_i_sub_pixel_variance16x16_wmt
unsigned int vp8_i_sub_pixel_variance8x16_wmt
(
- unsigned char *src_ptr,
+ const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
- unsigned char *dst_ptr,
+ const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
@@ -512,3 +513,84 @@ unsigned int vp8_i_sub_pixel_variance8x16_wmt
return vp8_sub_pixel_variance8x16_wmt(src_ptr, (src_pixels_per_line >> 1), xoffset, yoffset, dst_ptr, (dst_pixels_per_line >> 1), sse);
}
+
+
+unsigned int vp8_variance_halfpixvar16x16_h_wmt(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse)
+{
+ int xsum0, xsum1;
+ unsigned int xxsum0, xxsum1;
+
+ vp8_half_horiz_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ &xsum0, &xxsum0);
+
+ vp8_half_horiz_variance16x_h_sse2(
+ src_ptr + 8, src_pixels_per_line,
+ dst_ptr + 8, dst_pixels_per_line, 16,
+ &xsum1, &xxsum1);
+
+ xsum0 += xsum1;
+ xxsum0 += xxsum1;
+ *sse = xxsum0;
+ return (xxsum0 - ((xsum0 * xsum0) >> 8));
+}
+
+
+unsigned int vp8_variance_halfpixvar16x16_v_wmt(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse)
+{
+ int xsum0, xsum1;
+ unsigned int xxsum0, xxsum1;
+
+ vp8_half_vert_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ &xsum0, &xxsum0);
+
+ vp8_half_vert_variance16x_h_sse2(
+ src_ptr + 8, src_pixels_per_line,
+ dst_ptr + 8, dst_pixels_per_line, 16,
+ &xsum1, &xxsum1);
+
+ xsum0 += xsum1;
+ xxsum0 += xxsum1;
+ *sse = xxsum0;
+ return (xxsum0 - ((xsum0 * xsum0) >> 8));
+}
+
+
+unsigned int vp8_variance_halfpixvar16x16_hv_wmt(
+ const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse)
+{
+ int xsum0, xsum1;
+ unsigned int xxsum0, xxsum1;
+
+ vp8_half_horiz_vert_variance16x_h_sse2(
+ src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pixels_per_line, 16,
+ &xsum0, &xxsum0);
+
+ vp8_half_horiz_vert_variance16x_h_sse2(
+ src_ptr + 8, src_pixels_per_line,
+ dst_ptr + 8, dst_pixels_per_line, 16,
+ &xsum1, &xxsum1);
+
+ xsum0 += xsum1;
+ xxsum0 += xxsum1;
+ *sse = xxsum0;
+ return (xxsum0 - ((xsum0 * xsum0) >> 8));
+}
diff --git a/vp8/encoder/x86/variance_x86.h b/vp8/encoder/x86/variance_x86.h
index 35fc90c48..6bea15ebc 100644
--- a/vp8/encoder/x86/variance_x86.h
+++ b/vp8/encoder/x86/variance_x86.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -34,6 +35,9 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_mmx);
extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_mmx);
extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_mmx);
extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_mmx);
+extern prototype_variance(vp8_variance_halfpixvar16x16_h_mmx);
+extern prototype_variance(vp8_variance_halfpixvar16x16_v_mmx);
+extern prototype_variance(vp8_variance_halfpixvar16x16_hv_mmx);
extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_mmx);
extern prototype_getmbss(vp8_get_mb_ss_mmx);
extern prototype_variance(vp8_mse16x16_mmx);
@@ -88,6 +92,15 @@ extern prototype_sad(vp8_get4x4sse_cs_mmx);
#undef vp8_variance_subpixvar16x16
#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_mmx
+#undef vp8_variance_halfpixvar16x16_h
+#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_mmx
+
+#undef vp8_variance_halfpixvar16x16_v
+#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_mmx
+
+#undef vp8_variance_halfpixvar16x16_hv
+#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_mmx
+
#undef vp8_variance_subpixmse16x16
#define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_mmx
@@ -129,6 +142,9 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_wmt);
extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_wmt);
extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_wmt);
extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_wmt);
+extern prototype_variance(vp8_variance_halfpixvar16x16_h_wmt);
+extern prototype_variance(vp8_variance_halfpixvar16x16_v_wmt);
+extern prototype_variance(vp8_variance_halfpixvar16x16_hv_wmt);
extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_wmt);
extern prototype_getmbss(vp8_get_mb_ss_sse2);
extern prototype_variance(vp8_mse16x16_wmt);
@@ -182,6 +198,15 @@ extern prototype_variance2(vp8_get16x16var_sse2);
#undef vp8_variance_subpixvar16x16
#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_wmt
+#undef vp8_variance_halfpixvar16x16_h
+#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_wmt
+
+#undef vp8_variance_halfpixvar16x16_v
+#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_wmt
+
+#undef vp8_variance_halfpixvar16x16_hv
+#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_wmt
+
#undef vp8_variance_subpixmse16x16
#define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_wmt
@@ -240,7 +265,7 @@ extern prototype_sad_multi_dif_address(vp8_sad4x4x4d_sse3);
#define vp8_variance_sad4x4x3 vp8_sad4x4x3_sse3
#undef vp8_variance_sad16x16x4d
-#define vp8_variance_sad16x16x4 vp8_sad16x16x4d_sse3
+#define vp8_variance_sad16x16x4d vp8_sad16x16x4d_sse3
#undef vp8_variance_sad16x8x4d
#define vp8_variance_sad16x8x4d vp8_sad16x8x4d_sse3
@@ -272,4 +297,31 @@ extern prototype_sad_multi_same_address(vp8_sad16x8x3_ssse3);
#endif
#endif
+
+#if HAVE_SSE4_1
+extern prototype_sad_multi_same_address_1(vp8_sad16x16x8_sse4);
+extern prototype_sad_multi_same_address_1(vp8_sad16x8x8_sse4);
+extern prototype_sad_multi_same_address_1(vp8_sad8x16x8_sse4);
+extern prototype_sad_multi_same_address_1(vp8_sad8x8x8_sse4);
+extern prototype_sad_multi_same_address_1(vp8_sad4x4x8_sse4);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef vp8_variance_sad16x16x8
+#define vp8_variance_sad16x16x8 vp8_sad16x16x8_sse4
+
+#undef vp8_variance_sad16x8x8
+#define vp8_variance_sad16x8x8 vp8_sad16x8x8_sse4
+
+#undef vp8_variance_sad8x16x8
+#define vp8_variance_sad8x16x8 vp8_sad8x16x8_sse4
+
+#undef vp8_variance_sad8x8x8
+#define vp8_variance_sad8x8x8 vp8_sad8x8x8_sse4
+
+#undef vp8_variance_sad4x4x8
+#define vp8_variance_sad4x4x8 vp8_sad4x4x8_sse4
+
+#endif
+#endif
+
#endif
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index f1391ba8c..fb1b37ccb 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -17,15 +18,10 @@
#if HAVE_MMX
void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch)
{
- vp8_short_fdct4x4_mmx(input, output, pitch);
- vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch);
+ vp8_short_fdct4x4_c(input, output, pitch);
+ vp8_short_fdct4x4_c(input + 4, output + 16, pitch);
}
-void vp8_fast_fdct8x4_mmx(short *input, short *output, int pitch)
-{
- vp8_fast_fdct4x4_mmx(input, output , pitch);
- vp8_fast_fdct4x4_mmx(input + 4, output + 16, pitch);
-}
int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
short *qcoeff_ptr, short *dequant_ptr,
@@ -33,14 +29,14 @@ int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
short *quant_ptr, short *dqcoeff_ptr);
void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
{
- short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
- short *coeff_ptr = &b->coeff[0];
- short *zbin_ptr = &b->zbin[0][0];
- short *round_ptr = &b->round[0][0];
- short *quant_ptr = &b->quant[0][0];
- short *qcoeff_ptr = d->qcoeff;
+ short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
+ short *coeff_ptr = b->coeff;
+ short *zbin_ptr = b->zbin;
+ short *round_ptr = b->round;
+ short *quant_ptr = b->quant;
+ short *qcoeff_ptr = d->qcoeff;
short *dqcoeff_ptr = d->dqcoeff;
- short *dequant_ptr = &d->dequant[0][0];
+ short *dequant_ptr = d->dequant;
d->eob = vp8_fast_quantize_b_impl_mmx(
coeff_ptr,
@@ -86,30 +82,28 @@ void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
#endif
#if HAVE_SSE2
-void vp8_short_fdct8x4_wmt(short *input, short *output, int pitch)
+void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch)
{
- vp8_short_fdct4x4_wmt(input, output, pitch);
- vp8_short_fdct4x4_wmt(input + 4, output + 16, pitch);
+ vp8_short_fdct4x4_sse2(input, output, pitch);
+ vp8_short_fdct4x4_sse2(input + 4, output + 16, pitch);
}
-int vp8_fast_quantize_b_impl_sse(short *coeff_ptr, short *zbin_ptr,
+int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr,
short *qcoeff_ptr, short *dequant_ptr,
short *scan_mask, short *round_ptr,
short *quant_ptr, short *dqcoeff_ptr);
-void vp8_fast_quantize_b_sse(BLOCK *b, BLOCKD *d)
+void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
{
- short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
- short *coeff_ptr = &b->coeff[0];
- short *zbin_ptr = &b->zbin[0][0];
- short *round_ptr = &b->round[0][0];
- short *quant_ptr = &b->quant[0][0];
- short *qcoeff_ptr = d->qcoeff;
+ short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
+ short *coeff_ptr = b->coeff;
+ short *round_ptr = b->round;
+ short *quant_ptr = b->quant;
+ short *qcoeff_ptr = d->qcoeff;
short *dqcoeff_ptr = d->dqcoeff;
- short *dequant_ptr = &d->dequant[0][0];
+ short *dequant_ptr = d->dequant;
- d->eob = vp8_fast_quantize_b_impl_sse(
+ d->eob = vp8_fast_quantize_b_impl_sse2(
coeff_ptr,
- zbin_ptr,
qcoeff_ptr,
dequant_ptr,
scan_mask,
@@ -120,6 +114,41 @@ void vp8_fast_quantize_b_sse(BLOCK *b, BLOCKD *d)
);
}
+
+int vp8_regular_quantize_b_impl_sse2(short *coeff_ptr, short *zbin_ptr,
+ short *qcoeff_ptr,short *dequant_ptr,
+ const int *default_zig_zag, short *round_ptr,
+ short *quant_ptr, short *dqcoeff_ptr,
+ unsigned short zbin_oq_value,
+ short *zbin_boost_ptr);
+
+void vp8_regular_quantize_b_sse2(BLOCK *b,BLOCKD *d)
+{
+ short *zbin_boost_ptr = b->zrun_zbin_boost;
+ short *coeff_ptr = b->coeff;
+ short *zbin_ptr = b->zbin;
+ short *round_ptr = b->round;
+ short *quant_ptr = b->quant;
+ short *qcoeff_ptr = d->qcoeff;
+ short *dqcoeff_ptr = d->dqcoeff;
+ short *dequant_ptr = d->dequant;
+ short zbin_oq_value = b->zbin_extra;
+
+ d->eob = vp8_regular_quantize_b_impl_sse2(
+ coeff_ptr,
+ zbin_ptr,
+ qcoeff_ptr,
+ dequant_ptr,
+ vp8_default_zig_zag1d,
+
+ round_ptr,
+ quant_ptr,
+ dqcoeff_ptr,
+ zbin_oq_value,
+ zbin_boost_ptr
+ );
+}
+
int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
int vp8_mbblock_error_xmm(MACROBLOCK *mb, int dc)
{
@@ -136,8 +165,39 @@ int vp8_mbuverror_xmm(MACROBLOCK *mb)
return vp8_mbuverror_xmm_impl(s_ptr, d_ptr);
}
+void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride,
+ short *diff, unsigned char *predictor,
+ int pitch);
+void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
+{
+ unsigned char *z = *(be->base_src) + be->src;
+ unsigned int src_stride = be->src_stride;
+ short *diff = &be->src_diff[0];
+ unsigned char *predictor = &bd->predictor[0];
+ vp8_subtract_b_sse2_impl(z, src_stride, diff, predictor, pitch);
+}
+
+#endif
+
+#if HAVE_SSSE3
+int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr,
+ short *qcoeff_ptr, short *dequant_ptr,
+ short *round_ptr,
+ short *quant_ptr, short *dqcoeff_ptr);
+void vp8_fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d)
+{
+ d->eob = vp8_fast_quantize_b_impl_ssse3(
+ b->coeff,
+ d->qcoeff,
+ d->dequant,
+ b->round,
+ b->quant,
+ d->dqcoeff
+ );
+}
#endif
+
void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
{
#if CONFIG_RUNTIME_CPU_DETECT
@@ -147,6 +207,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
int wmt_enabled = flags & HAS_SSE2;
int SSE3Enabled = flags & HAS_SSE3;
int SSSE3Enabled = flags & HAS_SSSE3;
+ int SSE4_1Enabled = flags & HAS_SSE4_1;
/* Note:
*
@@ -157,7 +218,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
/* Override default functions with fastest ones for this CPU. */
#if HAVE_MMX
-
if (mmx_enabled)
{
cpi->rtcd.variance.sad16x16 = vp8_sad16x16_mmx;
@@ -177,6 +237,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_mmx;
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_mmx;
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_mmx;
+ cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx;
+ cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_mmx;
+ cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_mmx;
cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_mmx;
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_mmx;
@@ -186,11 +249,19 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.get8x8var = vp8_get8x8var_mmx;
cpi->rtcd.variance.get16x16var = vp8_get16x16var_mmx;
cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_mmx;
-
+#if 0 // new fdct
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_mmx;
cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_mmx;
- cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_mmx;
- cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_mmx;
+ cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_mmx;
+ cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_mmx;
+#else
+ cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c;
+ cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c;
+ cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_c;
+ cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_c;
+
+#endif
+
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c;
cpi->rtcd.encodemb.berr = vp8_block_error_mmx;
@@ -200,12 +271,11 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.encodemb.submby = vp8_subtract_mby_mmx;
cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_mmx;
- cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_mmx;
+ /*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_mmx;*/
}
-
#endif
-#if HAVE_SSE2
+#if HAVE_SSE2
if (wmt_enabled)
{
cpi->rtcd.variance.sad16x16 = vp8_sad16x16_wmt;
@@ -225,6 +295,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_wmt;
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_wmt;
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_wmt;
+ cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt;
+ cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_wmt;
+ cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_wmt;
cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_wmt;
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_wmt;
@@ -235,26 +308,26 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.get16x16var = vp8_get16x16var_sse2;
/* cpi->rtcd.variance.get4x4sse_cs not implemented for wmt */;
-#if 0
- /* short SSE2 DCT currently disabled, does not match the MMX version */
- cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_wmt;
- cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_wmt;
-#endif
- /* cpi->rtcd.fdct.fast4x4 not implemented for wmt */;
- cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_wmt;
- cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_sse2;
+ cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_sse2;
+ cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_sse2;
+ cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_sse2;
+ cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_sse2;
+
+ cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_sse2 ;
cpi->rtcd.encodemb.berr = vp8_block_error_xmm;
cpi->rtcd.encodemb.mberr = vp8_mbblock_error_xmm;
cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_xmm;
- /* cpi->rtcd.encodemb.sub* not implemented for wmt */
+ cpi->rtcd.encodemb.subb = vp8_subtract_b_sse2;
+ cpi->rtcd.encodemb.submby = vp8_subtract_mby_sse2;
+ cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_sse2;
- cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse;
+ /*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse2;*/
+ cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse2;
}
-
#endif
-#if HAVE_SSE3
+#if HAVE_SSE3
if (SSE3Enabled)
{
cpi->rtcd.variance.sad16x16 = vp8_sad16x16_sse3;
@@ -272,16 +345,30 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.sad4x4x4d = vp8_sad4x4x4d_sse3;
cpi->rtcd.search.diamond_search = vp8_diamond_search_sadx4;
}
-
#endif
-#if HAVE_SSSE3
+#if HAVE_SSSE3
if (SSSE3Enabled)
{
cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_ssse3;
cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_ssse3;
+
+ cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_ssse3;
+
}
+#endif
+#if HAVE_SSE4_1
+ if (SSE4_1Enabled)
+ {
+ cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_sse4;
+ cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_sse4;
+ cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_sse4;
+ cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_sse4;
+ cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_sse4;
+ cpi->rtcd.search.full_search = vp8_full_search_sadx8;
+ }
#endif
+
#endif
}
diff --git a/vp8/exports_dec b/vp8/exports_dec
new file mode 100644
index 000000000..100ac5c27
--- /dev/null
+++ b/vp8/exports_dec
@@ -0,0 +1,2 @@
+data vpx_codec_vp8_dx_algo
+text vpx_codec_vp8_dx
diff --git a/vp8/exports_enc b/vp8/exports_enc
new file mode 100644
index 000000000..29ff35ef7
--- /dev/null
+++ b/vp8/exports_enc
@@ -0,0 +1,2 @@
+data vpx_codec_vp8_cx_algo
+text vpx_codec_vp8_cx
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index a9efbd753..bb3f8259c 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -1,10 +1,11 @@
##
-## Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
##
-## Use of this source code is governed by a BSD-style license and patent
-## grant that can be found in the LICENSE file in the root of the source
-## tree. All contributing project authors may be found in the AUTHORS
-## file in the root of the source tree.
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
##
@@ -26,7 +27,6 @@ VP8_COMMON_SRCS-yes += common/onyxd.h
CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)common
-VP8_COMMON_SRCS-yes += common/segmentation_common.c
VP8_COMMON_SRCS-yes += common/alloccommon.c
VP8_COMMON_SRCS-yes += common/blockd.c
VP8_COMMON_SRCS-yes += common/coefupdateprobs.h
@@ -63,7 +63,6 @@ VP8_COMMON_SRCS-yes += common/recon.h
VP8_COMMON_SRCS-yes += common/reconinter.h
VP8_COMMON_SRCS-yes += common/reconintra.h
VP8_COMMON_SRCS-yes += common/reconintra4x4.h
-VP8_COMMON_SRCS-yes += common/segmentation_common.h
VP8_COMMON_SRCS-yes += common/setupintrarecon.h
VP8_COMMON_SRCS-yes += common/subpixel.h
VP8_COMMON_SRCS-yes += common/swapyv12buffer.h
@@ -97,42 +96,37 @@ VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp8_asm_stubs.c
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/loopfilter_x86.c
VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.h
VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.c
-VP8_COMMON_SRCS-$(CONFIG_VP8_ENCODER) += common/postproc.h
-VP8_COMMON_SRCS-$(CONFIG_VP8_ENCODER) += common/postproc.c
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx.asm
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/iwalsh_mmx.asm
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/recon_mmx.asm
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/subpixel_mmx.asm
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/loopfilter_mmx.asm
+VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idctllm_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/subpixel_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/iwalsh_sse2.asm
+VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/subpixel_ssse3.asm
ifeq ($(CONFIG_POSTPROC),yes)
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/postproc_mmx.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm
endif
+VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/arm_systemdependent.c
+
# common (c)
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/bilinearfilter_arm.c
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/filter_arm.c
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/loopfilter_arm.c
-VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/recon_arm.c
-VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/reconintra4x4_arm.c
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/reconintra_arm.c
-VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/systemdependent.c
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/vpx_asm_offsets.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6) += common/filter_c.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6) += common/recon.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6) += common/reconintra4x4.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6) += common/generic/systemdependent.c
-
# common (armv6)
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/bilinearfilter_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/copymem8x4_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/copymem8x8_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/copymem16x16_v6$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/dc_only_idct_add_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/iwalsh_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/filter_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/idct_v6$(ASM)
@@ -149,17 +143,12 @@ VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/bilinearpredict16x16_neon$(ASM
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/copymem8x4_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/copymem8x8_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/copymem16x16_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/dc_only_idct_add_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/iwalsh_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfilter_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfiltersimplehorizontaledge_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfiltersimpleverticaledge_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfilterhorizontaledge_uv_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfilterhorizontaledge_y_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfilterverticaledge_uv_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfilterverticaledge_y_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/mbloopfilterhorizontaledge_uv_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/mbloopfilterhorizontaledge_y_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/mbloopfilterverticaledge_uv_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/mbloopfilterverticaledge_y_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/mbloopfilter_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon2b_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon4b_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/reconb_neon$(ASM)
@@ -172,6 +161,7 @@ VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/sixtappredict16x16_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon16x16mb_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/buildintrapredictorsmby_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/save_neon_reg$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon_neon.c
#
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 32c5f3b21..4f780a38c 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -13,6 +14,7 @@
#include "vpx_version.h"
#include "onyx_int.h"
#include "vpx/vp8e.h"
+#include "vp8/encoder/firstpass.h"
#include "onyx.h"
#include <stdlib.h>
#include <string.h>
@@ -52,19 +54,19 @@ static const struct extraconfig_map extracfg_map[] =
NULL,
#if !(CONFIG_REALTIME_ONLY)
VP8_BEST_QUALITY_ENCODING, /* Encoding Mode */
- -4, /* cpu_used */
+ 0, /* cpu_used */
#else
VP8_REAL_TIME_ENCODING, /* Encoding Mode */
- -8, /* cpu_used */
+ 4, /* cpu_used */
#endif
0, /* enable_auto_alt_ref */
0, /* noise_sensitivity */
0, /* Sharpness */
- 800, /* static_thresh */
+ 0, /* static_thresh */
VP8_ONE_TOKENPARTITION, /* token_partitions */
- 0, /* arnr_max_frames */
- 0, /* arnr_strength */
- 0, /* arnr_type*/
+ 0, /* arnr_max_frames */
+ 3, /* arnr_strength */
+ 3, /* arnr_type*/
0, /* experimental mode */
}
}
@@ -109,10 +111,15 @@ update_error_state(vpx_codec_alg_priv_t *ctx,
} while(0)
#define RANGE_CHECK(p,memb,lo,hi) do {\
- if(!((p)->memb >= (lo) && (p)->memb <= hi)) \
+ if(!(((p)->memb == lo || (p)->memb > (lo)) && (p)->memb <= hi)) \
ERROR(#memb " out of range ["#lo".."#hi"]");\
} while(0)
+#define RANGE_CHECK_HI(p,memb,hi) do {\
+ if(!((p)->memb <= (hi))) \
+ ERROR(#memb " out of range [.."#hi"]");\
+ } while(0)
+
#define RANGE_CHECK_LO(p,memb,lo) do {\
if(!((p)->memb >= (lo))) \
ERROR(#memb " out of range ["#lo"..]");\
@@ -130,24 +137,24 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK(cfg, g_h, 2, 16384);
RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000);
RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den);
- RANGE_CHECK(cfg, g_profile, 0, 3);
- RANGE_CHECK(cfg, rc_min_quantizer, 0, 63);
- RANGE_CHECK(cfg, rc_max_quantizer, 0, 63);
- RANGE_CHECK(cfg, g_threads, 0, 64);
+ RANGE_CHECK_HI(cfg, g_profile, 3);
+ RANGE_CHECK_HI(cfg, rc_min_quantizer, 63);
+ RANGE_CHECK_HI(cfg, rc_max_quantizer, 63);
+ RANGE_CHECK_HI(cfg, g_threads, 64);
#if !(CONFIG_REALTIME_ONLY)
- RANGE_CHECK(cfg, g_lag_in_frames, 0, 25);
+ RANGE_CHECK_HI(cfg, g_lag_in_frames, 25);
#else
- RANGE_CHECK(cfg, g_lag_in_frames, 0, 0);
+ RANGE_CHECK_HI(cfg, g_lag_in_frames, 0);
#endif
RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_CBR);
- RANGE_CHECK(cfg, rc_undershoot_pct, 0, 100);
- RANGE_CHECK(cfg, rc_2pass_vbr_bias_pct, 0, 100);
+ RANGE_CHECK_HI(cfg, rc_undershoot_pct, 100);
+ RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100);
RANGE_CHECK(cfg, kf_mode, VPX_KF_DISABLED, VPX_KF_AUTO);
//RANGE_CHECK_BOOL(cfg, g_delete_firstpassfile);
RANGE_CHECK_BOOL(cfg, rc_resize_allowed);
- RANGE_CHECK(cfg, rc_dropframe_thresh, 0, 100);
- RANGE_CHECK(cfg, rc_resize_up_thresh, 0, 100);
- RANGE_CHECK(cfg, rc_resize_down_thresh, 0, 100);
+ RANGE_CHECK_HI(cfg, rc_dropframe_thresh, 100);
+ RANGE_CHECK_HI(cfg, rc_resize_up_thresh, 100);
+ RANGE_CHECK_HI(cfg, rc_resize_down_thresh, 100);
#if !(CONFIG_REALTIME_ONLY)
RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_LAST_PASS);
#else
@@ -166,7 +173,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
#if !(CONFIG_REALTIME_ONLY)
RANGE_CHECK(vp8_cfg, encoding_mode, VP8_BEST_QUALITY_ENCODING, VP8_REAL_TIME_ENCODING);
RANGE_CHECK(vp8_cfg, cpu_used, -16, 16);
- RANGE_CHECK(vp8_cfg, noise_sensitivity, 0, 6);
+ RANGE_CHECK_HI(vp8_cfg, noise_sensitivity, 6);
#else
RANGE_CHECK(vp8_cfg, encoding_mode, VP8_REAL_TIME_ENCODING, VP8_REAL_TIME_ENCODING);
@@ -177,29 +184,32 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
#endif
RANGE_CHECK(vp8_cfg, token_partitions, VP8_ONE_TOKENPARTITION, VP8_EIGHT_TOKENPARTITION);
- RANGE_CHECK(vp8_cfg, Sharpness, 0, 7);
- RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 25);
- RANGE_CHECK(vp8_cfg, arnr_strength, 0, 6);
- RANGE_CHECK(vp8_cfg, arnr_type, 0, 0xffffffff);
+ RANGE_CHECK_HI(vp8_cfg, Sharpness, 7);
+ RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 15);
+ RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6);
+ RANGE_CHECK(vp8_cfg, arnr_type, 1, 3);
if (cfg->g_pass == VPX_RC_LAST_PASS)
{
- int n_doubles = cfg->rc_twopass_stats_in.sz / sizeof(double);
- int n_packets = cfg->rc_twopass_stats_in.sz / sizeof(FIRSTPASS_STATS);
- double frames;
+ int mb_r = (cfg->g_h + 15) / 16;
+ int mb_c = (cfg->g_w + 15) / 16;
+ size_t packet_sz = vp8_firstpass_stats_sz(mb_r * mb_c);
+ int n_packets = cfg->rc_twopass_stats_in.sz / packet_sz;
+ FIRSTPASS_STATS *stats;
if (!cfg->rc_twopass_stats_in.buf)
ERROR("rc_twopass_stats_in.buf not set.");
- if (cfg->rc_twopass_stats_in.sz % sizeof(FIRSTPASS_STATS))
+ if (cfg->rc_twopass_stats_in.sz % packet_sz)
ERROR("rc_twopass_stats_in.sz indicates truncated packet.");
- if (cfg->rc_twopass_stats_in.sz < 2 * sizeof(FIRSTPASS_STATS))
+ if (cfg->rc_twopass_stats_in.sz < 2 * packet_sz)
ERROR("rc_twopass_stats_in requires at least two packets.");
- frames = ((double *)cfg->rc_twopass_stats_in.buf)[n_doubles - 1];
+ stats = (void*)((char *)cfg->rc_twopass_stats_in.buf
+ + (n_packets - 1) * packet_sz);
- if ((int)(frames + 0.5) != n_packets - 1)
+ if ((int)(stats->count + 0.5) != n_packets - 1)
ERROR("rc_twopass_stats_in missing EOS stats packet");
}
@@ -297,9 +307,9 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
oxcf->under_shoot_pct = cfg.rc_undershoot_pct;
//oxcf->over_shoot_pct = cfg.rc_overshoot_pct;
- oxcf->maximum_buffer_size = cfg.rc_buf_sz / 1000;
- oxcf->starting_buffer_level = cfg.rc_buf_initial_sz / 1000;
- oxcf->optimal_buffer_level = cfg.rc_buf_optimal_sz / 1000;
+ oxcf->maximum_buffer_size = cfg.rc_buf_sz;
+ oxcf->starting_buffer_level = cfg.rc_buf_initial_sz;
+ oxcf->optimal_buffer_level = cfg.rc_buf_optimal_sz;
oxcf->two_pass_vbrbias = cfg.rc_2pass_vbr_bias_pct;
oxcf->two_pass_vbrmin_section = cfg.rc_2pass_vbr_minsection_pct;
@@ -774,12 +784,13 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
{
pkt.data.frame.flags |= VPX_FRAME_IS_INVISIBLE;
- // TODO: ideally this timestamp should be as close as
- // possible to the prior PTS so that if a decoder uses
- // pts to schedule when to do this, we start right after
- // last frame was decoded. Maybe should be set to
- // last time stamp. Invisible frames have no duration..
- pkt.data.frame.pts --;
+ // This timestamp should be as close as possible to the
+ // prior PTS so that if a decoder uses pts to schedule when
+ // to do this, we start right after last frame was decoded.
+ // Invisible frames have no duration.
+ pkt.data.frame.pts = ((cpi->last_time_stamp_seen
+ * ctx->cfg.g_timebase.den + round)
+ / ctx->cfg.g_timebase.num / 10000000) + 1;
pkt.data.frame.duration = 0;
}
@@ -846,7 +857,9 @@ static vpx_codec_err_t vp8e_set_previewpp(vpx_codec_alg_priv_t *ctx,
int ctr_id,
va_list args)
{
+#if CONFIG_POSTPROC
vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *);
+ (void)ctr_id;
if (data)
{
@@ -855,6 +868,12 @@ static vpx_codec_err_t vp8e_set_previewpp(vpx_codec_alg_priv_t *ctx,
}
else
return VPX_CODEC_INVALID_PARAM;
+#else
+ (void)ctx;
+ (void)ctr_id;
+ (void)args;
+ return VPX_CODEC_INCAPABLE;
+#endif
}
@@ -1044,7 +1063,7 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] =
0, /* g_lag_in_frames */
- 70, /* rc_dropframe_thresh */
+ 0, /* rc_dropframe_thresh */
0, /* rc_resize_allowed */
60, /* rc_resize_down_thresold */
30, /* rc_resize_up_thresold */
@@ -1086,9 +1105,9 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] =
#ifndef VERSION_STRING
#define VERSION_STRING
#endif
-vpx_codec_iface_t vpx_codec_vp8_cx_algo =
+CODEC_INTERFACE(vpx_codec_vp8_cx) =
{
- "vpx Technologies VP8 Encoder" VERSION_STRING,
+ "WebM Project VP8 Encoder" VERSION_STRING,
VPX_CODEC_INTERNAL_ABI_VERSION,
VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR,
/* vpx_codec_caps_t caps; */
@@ -1207,7 +1226,7 @@ static vpx_codec_err_t api1_encode(vpx_codec_alg_priv_t *ctx,
vpx_codec_iface_t vpx_enc_vp8_algo =
{
- "vpx Technologies VP8 Encoder (Deprecated API)" VERSION_STRING,
+ "WebM Project VP8 Encoder (Deprecated API)" VERSION_STRING,
VPX_CODEC_INTERNAL_ABI_VERSION,
VPX_CODEC_CAP_ENCODER,
/* vpx_codec_caps_t caps; */
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 19c59cd80..a85cad1b4 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
*/
@@ -40,7 +41,7 @@ typedef enum
VP8_SEG_ALG_PRIV = 256,
VP8_SEG_MAX
} mem_seg_id_t;
-#define NELEMENTS(x) (sizeof(x)/sizeof(x[0]))
+#define NELEMENTS(x) ((int)(sizeof(x)/sizeof(x[0])))
static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_t);
@@ -169,7 +170,7 @@ static void vp8_init_ctx(vpx_codec_ctx_t *ctx, const vpx_codec_mmap_t *mmap)
}
}
-static void *mmap_lkup(vpx_codec_alg_priv_t *ctx, int id)
+static void *mmap_lkup(vpx_codec_alg_priv_t *ctx, unsigned int id)
{
int i;
@@ -195,9 +196,6 @@ static void vp8_finalize_mmaps(vpx_codec_alg_priv_t *ctx)
ctx->pbi->fb_storage_ptr[0] = mmap_lkup(ctx, VP6_SEG_IMG0_STRG);
ctx->pbi->fb_storage_ptr[1] = mmap_lkup(ctx, VP6_SEG_IMG1_STRG);
ctx->pbi->fb_storage_ptr[2] = mmap_lkup(ctx, VP6_SEG_IMG2_STRG);
- #if CONFIG_NEW_TOKENS
- ctx->pbi->token_graph = mmap_lkup(ctx, VP6_SEG_TOKEN_GRAPH);
- #endif
#if CONFIG_POSTPROC
ctx->pbi->postproc.deblock.fragment_variances = mmap_lkup(ctx, VP6_SEG_DEBLOCKER);
ctx->pbi->fb_storage_ptr[3] = mmap_lkup(ctx, VP6_SEG_PP_IMG_STRG);
@@ -225,11 +223,12 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx)
res = vp8_mmap_alloc(&mmap);
if (!res)
+ {
vp8_init_ctx(ctx, &mmap);
- ctx->priv->alg_priv->defer_alloc = 1;
- /*post processing level initialized to do nothing */
-
+ ctx->priv->alg_priv->defer_alloc = 1;
+ /*post processing level initialized to do nothing */
+ }
}
return res;
@@ -257,12 +256,12 @@ static vpx_codec_err_t vp8_peek_si(const uint8_t *data,
vpx_codec_err_t res = VPX_CODEC_OK;
{
- /*Parse from VP8 compressed data, the implies knowledge of the
- *VP8 bitsteam.
- * First 3 byte header including version, frame type and an offset
- * Next 3 bytes are image sizewith 12 bit each for width and height
+ /* Parse uncompresssed part of key frame header.
+ * 3 bytes:- including version, frame type and an offset
+ * 3 bytes:- sync code (0x9d, 0x01, 0x2a)
+ * 4 bytes:- including image width and height in the lowest 14 bits
+ * of each 2-byte value.
*/
-
si->is_kf = 0;
if (data_sz >= 10 && !(data[0] & 0x01)) /* I-Frame */
@@ -270,14 +269,14 @@ static vpx_codec_err_t vp8_peek_si(const uint8_t *data,
const uint8_t *c = data + 3;
si->is_kf = 1;
- // vet via sync code
+ /* vet via sync code */
if (c[0] != 0x9d || c[1] != 0x01 || c[2] != 0x2a)
res = VPX_CODEC_UNSUP_BITSTREAM;
si->w = swap2(*(const unsigned short *)(c + 3)) & 0x3fff;
si->h = swap2(*(const unsigned short *)(c + 5)) & 0x3fff;
- //printf("w=%d, h=%d\n", si->w, si->h);
+ /*printf("w=%d, h=%d\n", si->w, si->h);*/
if (!(si->h | si->w))
res = VPX_CODEC_UNSUP_BITSTREAM;
}
@@ -529,7 +528,7 @@ static vpx_codec_err_t vp8_xma_set_mmap(vpx_codec_ctx_t *ctx,
done = 1;
- if (ctx->priv->alg_priv)
+ if (!res && ctx->priv->alg_priv)
{
for (i = 0; i < NELEMENTS(vp8_mem_req_segs); i++)
{
@@ -654,9 +653,9 @@ vpx_codec_ctrl_fn_map_t vp8_ctf_maps[] =
#ifndef VERSION_STRING
#define VERSION_STRING
#endif
-vpx_codec_iface_t vpx_codec_vp8_dx_algo =
+CODEC_INTERFACE(vpx_codec_vp8_dx) =
{
- "vpx Technologies VP8 Decoder" VERSION_STRING,
+ "WebM Project VP8 Decoder" VERSION_STRING,
VPX_CODEC_INTERNAL_ABI_VERSION,
VPX_CODEC_CAP_DECODER | VP8_CAP_POSTPROC,
/* vpx_codec_caps_t caps; */
@@ -671,7 +670,14 @@ vpx_codec_iface_t vpx_codec_vp8_dx_algo =
vp8_decode, /* vpx_codec_decode_fn_t decode; */
vp8_get_frame, /* vpx_codec_frame_get_fn_t frame_get; */
},
- {NOT_IMPLEMENTED} /* encoder functions */
+ { /* encoder functions */
+ NOT_IMPLEMENTED,
+ NOT_IMPLEMENTED,
+ NOT_IMPLEMENTED,
+ NOT_IMPLEMENTED,
+ NOT_IMPLEMENTED,
+ NOT_IMPLEMENTED
+ }
};
/*
@@ -679,7 +685,7 @@ vpx_codec_iface_t vpx_codec_vp8_dx_algo =
*/
vpx_codec_iface_t vpx_codec_vp8_algo =
{
- "vpx Technologies VP8 Decoder (Deprecated API)" VERSION_STRING,
+ "WebM Project VP8 Decoder (Deprecated API)" VERSION_STRING,
VPX_CODEC_INTERNAL_ABI_VERSION,
VPX_CODEC_CAP_DECODER | VP8_CAP_POSTPROC,
/* vpx_codec_caps_t caps; */
@@ -694,5 +700,12 @@ vpx_codec_iface_t vpx_codec_vp8_algo =
vp8_decode, /* vpx_codec_decode_fn_t decode; */
vp8_get_frame, /* vpx_codec_frame_get_fn_t frame_get; */
},
- {NOT_IMPLEMENTED} /* encoder functions */
+ { /* encoder functions */
+ NOT_IMPLEMENTED,
+ NOT_IMPLEMENTED,
+ NOT_IMPLEMENTED,
+ NOT_IMPLEMENTED,
+ NOT_IMPLEMENTED,
+ NOT_IMPLEMENTED
+ }
};
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index 651ee7767..683d785e6 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -1,14 +1,18 @@
##
-## Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
##
-## Use of this source code is governed by a BSD-style license and patent
-## grant that can be found in the LICENSE file in the root of the source
-## tree. All contributing project authors may be found in the AUTHORS
-## file in the root of the source tree.
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
##
include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
+
+VP8_CX_EXPORTS += exports_enc
+
VP8_CX_SRCS-yes += $(VP8_COMMON_SRCS-yes)
VP8_CX_SRCS-no += $(VP8_COMMON_SRCS-no)
VP8_CX_SRCS_REMOVE-yes += $(VP8_COMMON_SRCS_REMOVE-yes)
@@ -70,10 +74,16 @@ VP8_CX_SRCS-yes += encoder/quantize.c
VP8_CX_SRCS-yes += encoder/ratectrl.c
VP8_CX_SRCS-yes += encoder/rdopt.c
VP8_CX_SRCS-yes += encoder/sad_c.c
-VP8_CX_SRCS-yes += encoder/ssim.c
+VP8_CX_SRCS-yes += encoder/segmentation.c
+VP8_CX_SRCS-yes += encoder/segmentation.h
+VP8_CX_SRCS-$(CONFIG_PSNR) += encoder/ssim.c
VP8_CX_SRCS-yes += encoder/tokenize.c
VP8_CX_SRCS-yes += encoder/treewriter.c
VP8_CX_SRCS-yes += encoder/variance_c.c
+VP8_CX_SRCS-$(CONFIG_PSNR) += common/postproc.h
+VP8_CX_SRCS-$(CONFIG_PSNR) += common/postproc.c
+VP8_CX_SRCS-yes += encoder/temporal_filter.c
+VP8_CX_SRCS-yes += encoder/temporal_filter.h
ifeq ($(CONFIG_REALTIME_ONLY),yes)
VP8_CX_SRCS_REMOVE-yes += encoder/firstpass.c
@@ -83,19 +93,24 @@ VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodemb_x86.h
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/dct_x86.h
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/mcomp_x86.h
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/variance_x86.h
+VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_x86.h
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/x86_csystemdependent.c
VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_mmx.c
VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_impl_mmx.asm
VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/sad_mmx.asm
VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/dct_mmx.asm
VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/subtract_mmx.asm
+VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/variance_sse2.c
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/variance_impl_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/sad_sse2.asm
-VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
+VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm
+VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE3) += encoder/x86/sad_sse3.asm
VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/sad_ssse3.asm
+VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.asm
+VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/sad_sse4.asm
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm
diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk
index f0753d93e..da27e0897 100644
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -1,10 +1,11 @@
##
-## Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
##
-## Use of this source code is governed by a BSD-style license and patent
-## grant that can be found in the LICENSE file in the root of the source
-## tree. All contributing project authors may be found in the AUTHORS
-## file in the root of the source tree.
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
##
@@ -12,17 +13,21 @@
#File list for arm
# encoder
-VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/csystemdependent.c
+VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/arm_csystemdependent.c
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/encodemb_arm.c
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/quantize_arm.c
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/picklpf_arm.c
-VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/boolhuff_arm.c
-VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/mcomp_arm.c
+VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/boolhuff_arm.c
-VP8_CX_SRCS_REMOVE-$(HAVE_ARMV6) += encoder/generic/csystemdependent.c
-VP8_CX_SRCS_REMOVE-$(HAVE_ARMV7) += encoder/boolhuff.c
-VP8_CX_SRCS_REMOVE-$(HAVE_ARMV7) += encoder/mcomp.c
+VP8_CX_SRCS_REMOVE-$(HAVE_ARMV5TE) += encoder/boolhuff.c
+
+#File list for armv5te
+# encoder
+VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/boolhuff_armv5te$(ASM)
+VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_armv5$(ASM)
+VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_mbrow_armv5$(ASM)
+VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_partitions_armv5$(ASM)
#File list for armv6
# encoder
@@ -43,10 +48,6 @@ VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_subpixelvariance8x8_neon$(ASM
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_subpixelvariance16x16_neon$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_memcpy_neon$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_packtokens_armv7$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_packtokens_mbrow_armv7$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_packtokens_partitions_armv7$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/boolhuff_armv7$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_shortwalsh4x4_neon$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/vpx_vp8_enc_asm_offsets.c
diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk
index 76368eb53..1acd67453 100644
--- a/vp8/vp8dx.mk
+++ b/vp8/vp8dx.mk
@@ -1,14 +1,18 @@
##
-## Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
##
-## Use of this source code is governed by a BSD-style license and patent
-## grant that can be found in the LICENSE file in the root of the source
-## tree. All contributing project authors may be found in the AUTHORS
-## file in the root of the source tree.
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
##
include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
+
+VP8_DX_EXPORTS += exports_dec
+
VP8_DX_SRCS-yes += $(VP8_COMMON_SRCS-yes)
VP8_DX_SRCS-no += $(VP8_COMMON_SRCS-no)
VP8_DX_SRCS_REMOVE-yes += $(VP8_COMMON_SRCS_REMOVE-yes)
@@ -26,7 +30,6 @@ CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)decoder
# common
#define ARM
#define DISABLE_THREAD
-#define INLINE=__forceinline
#INCLUDES += algo/vpx_common/vpx_mem/include
#INCLUDES += common
@@ -40,7 +43,6 @@ CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)decoder
# decoder
#define ARM
#define DISABLE_THREAD
-#define INLINE=__forceinline
#INCLUDES += algo/vpx_common/vpx_mem/include
#INCLUDES += common
@@ -52,23 +54,26 @@ CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)decoder
VP8_DX_SRCS-yes += decoder/dboolhuff.c
VP8_DX_SRCS-yes += decoder/decodemv.c
VP8_DX_SRCS-yes += decoder/decodframe.c
-VP8_DX_SRCS-yes += decoder/demode.c
VP8_DX_SRCS-yes += decoder/dequantize.c
VP8_DX_SRCS-yes += decoder/detokenize.c
VP8_DX_SRCS-yes += decoder/generic/dsystemdependent.c
VP8_DX_SRCS-yes += decoder/dboolhuff.h
VP8_DX_SRCS-yes += decoder/decodemv.h
VP8_DX_SRCS-yes += decoder/decoderthreading.h
-VP8_DX_SRCS-yes += decoder/demode.h
VP8_DX_SRCS-yes += decoder/dequantize.h
VP8_DX_SRCS-yes += decoder/detokenize.h
VP8_DX_SRCS-yes += decoder/onyxd_int.h
VP8_DX_SRCS-yes += decoder/treereader.h
VP8_DX_SRCS-yes += decoder/onyxd_if.c
VP8_DX_SRCS-yes += decoder/threading.c
+VP8_DX_SRCS-yes += decoder/idct_blk.c
+VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/reconintra_mt.h
+VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/reconintra_mt.c
VP8_DX_SRCS-yes := $(filter-out $(VP8_DX_SRCS_REMOVE-yes),$(VP8_DX_SRCS-yes))
VP8_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/dequantize_x86.h
VP8_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/x86_dsystemdependent.c
VP8_DX_SRCS-$(HAVE_MMX) += decoder/x86/dequantize_mmx.asm
+VP8_DX_SRCS-$(HAVE_MMX) += decoder/x86/idct_blk_mmx.c
+VP8_DX_SRCS-$(HAVE_SSE2) += decoder/x86/idct_blk_sse2.c
diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk
index 1b4a7ecf7..0803a9cb0 100644
--- a/vp8/vp8dx_arm.mk
+++ b/vp8/vp8dx_arm.mk
@@ -1,44 +1,32 @@
##
-## Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
##
-## Use of this source code is governed by a BSD-style license and patent
-## grant that can be found in the LICENSE file in the root of the source
-## tree. All contributing project authors may be found in the AUTHORS
-## file in the root of the source tree.
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
##
#VP8_DX_SRCS list is modified according to different platforms.
-#File list for arm
-# decoder
-#VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/decodframe_arm.c
-VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/dequantize_arm.c
-VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/dsystemdependent.c
+VP8_DX_SRCS-$(ARCH_ARM) += decoder/arm/arm_dsystemdependent.c
-#VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6) += decoder/decodframe.c
-VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6) += decoder/dequantize.c
-VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6) += decoder/generic/dsystemdependent.c
+VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/dequantize_arm.c
+VP8_DX_SRCS-$(CONFIG_ARM_ASM_DETOK) += decoder/arm/detokenize$(ASM)
#File list for armv6
-# decoder
-VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/dequantdcidct_v6$(ASM)
-VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/dequantidct_v6$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/dequant_dc_idct_v6$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/dequant_idct_v6$(ASM)
VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/dequantize_v6$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/idct_blk_v6.c
#File list for neon
-# decoder
-VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/dequantdcidct_neon$(ASM)
-VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/dequantidct_neon$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/idct_dequant_dc_full_2x_neon$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/idct_dequant_dc_0_2x_neon$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/dequant_idct_neon$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/idct_dequant_full_2x_neon$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/idct_dequant_0_2x_neon$(ASM)
VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/dequantizeb_neon$(ASM)
-
-
-#for new token test
-ifeq ($(ARCH_ARM),yes)
-VP8_DX_SRCS-$(CONFIG_NEW_TOKENS) += decoder/arm/detokenize_arm_sjl.c
-VP8_DX_SRCS-$(CONFIG_NEW_TOKENS) += decoder/arm/detokenize_arm_v6$(ASM)
-VP8_DX_SRCS-$(CONFIG_NEW_TOKENS) += decoder/onyxd_if_sjl.c
-
-VP8_DX_SRCS_REMOVE-$(CONFIG_NEW_TOKENS) += decoder/arm/detokenize_arm.c
-VP8_DX_SRCS_REMOVE-$(CONFIG_NEW_TOKENS) += decoder/onyxd_if.c
-endif
+VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/idct_blk_neon.c