diff options
-rw-r--r-- | build/make/Android.mk | 6 | ||||
-rw-r--r-- | test/test_libvpx.cc | 16 | ||||
-rw-r--r-- | third_party/x86inc/README.libvpx | 1 | ||||
-rw-r--r-- | third_party/x86inc/x86inc.asm | 2 | ||||
-rw-r--r-- | vp8/common/rtcd.c | 6 | ||||
-rw-r--r-- | vp8/vp8_cx_iface.c | 4 | ||||
-rw-r--r-- | vp8/vp8_dx_iface.c | 4 | ||||
-rw-r--r-- | vp9/common/vp9_rtcd.c | 3 | ||||
-rw-r--r-- | vp9/decoder/vp9_decoder.c | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 42 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.h | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 1 | ||||
-rw-r--r-- | vp9/vp9_dx_iface.c | 4 | ||||
-rw-r--r-- | vpx_ports/vpx_once.h | 2 | ||||
-rw-r--r-- | vpx_scale/vpx_scale_rtcd.c | 4 | ||||
-rw-r--r-- | vpx_scale/win32/scaleopt.c | 525 |
17 files changed, 64 insertions, 564 deletions
diff --git a/build/make/Android.mk b/build/make/Android.mk index d897b44cc..3d3f57ddc 100644 --- a/build/make/Android.mk +++ b/build/make/Android.mk @@ -184,7 +184,11 @@ clean: @$(RM) -r $(ASM_CNV_PATH) @$(RM) $(CLEAN-OBJS) -include $(BUILD_SHARED_LIBRARY) +ifeq ($(ENABLE_SHARED),1) + include $(BUILD_SHARED_LIBRARY) +else + include $(BUILD_STATIC_LIBRARY) +endif ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes) $(call import-module,cpufeatures) diff --git a/test/test_libvpx.cc b/test/test_libvpx.cc index fc035af8f..dcf5fc52b 100644 --- a/test/test_libvpx.cc +++ b/test/test_libvpx.cc @@ -14,11 +14,12 @@ #endif extern "C" { #if CONFIG_VP8 -extern void vp8_rtcd(); -#endif +#include "./vp8_rtcd.h" +#endif // CONFIG_VP8 #if CONFIG_VP9 -extern void vp9_rtcd(); -#endif +#include "./vp9_rtcd.h" +#endif // CONFIG_VP9 +#include "./vpx_scale_rtcd.h" } #include "third_party/googletest/src/include/gtest/gtest.h" @@ -59,11 +60,12 @@ int main(int argc, char **argv) { #if CONFIG_VP8 vp8_rtcd(); -#endif +#endif // CONFIG_VP8 #if CONFIG_VP9 vp9_rtcd(); -#endif -#endif +#endif // CONFIG_VP9 + vpx_scale_rtcd(); +#endif // !CONFIG_SHARED return RUN_ALL_TESTS(); } diff --git a/third_party/x86inc/README.libvpx b/third_party/x86inc/README.libvpx index 02cd9ab4e..343bcf962 100644 --- a/third_party/x86inc/README.libvpx +++ b/third_party/x86inc/README.libvpx @@ -9,3 +9,4 @@ defines that help automatically allow assembly to work cross-platform. Local Modifications: Some modifications to allow PIC to work with x86inc. +Conditionally define program_name to allow overriding. diff --git a/third_party/x86inc/x86inc.asm b/third_party/x86inc/x86inc.asm index 9273fc97e..bc8116995 100644 --- a/third_party/x86inc/x86inc.asm +++ b/third_party/x86inc/x86inc.asm @@ -36,7 +36,9 @@ %include "vpx_config.asm" +%ifndef program_name %define program_name vp9 +%endif %define UNIX64 0 diff --git a/vp8/common/rtcd.c b/vp8/common/rtcd.c index 0b371b094..ab0e9b47f 100644 --- a/vp8/common/rtcd.c +++ b/vp8/common/rtcd.c @@ -7,15 +7,13 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#include "vpx_config.h" +#include "./vpx_config.h" #define RTCD_C -#include "vp8_rtcd.h" +#include "./vp8_rtcd.h" #include "vpx_ports/vpx_once.h" -extern void vpx_scale_rtcd(void); void vp8_rtcd() { - vpx_scale_rtcd(); once(setup_rtcd_internal); } diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c index bf8a8537c..a5dfd07f9 100644 --- a/vp8/vp8_cx_iface.c +++ b/vp8/vp8_cx_iface.c @@ -10,7 +10,8 @@ #include "./vpx_config.h" -#include "vp8_rtcd.h" +#include "./vp8_rtcd.h" +#include "./vpx_scale_rtcd.h" #include "vpx/vpx_codec.h" #include "vpx/internal/vpx_codec_internal.h" #include "vpx_version.h" @@ -649,6 +650,7 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx, vp8_rtcd(); + vpx_scale_rtcd(); if (!ctx->priv) { diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c index 67a0fef64..e0eb30aa5 100644 --- a/vp8/vp8_dx_iface.c +++ b/vp8/vp8_dx_iface.c @@ -11,7 +11,8 @@ #include <stdlib.h> #include <string.h> -#include "vp8_rtcd.h" +#include "./vp8_rtcd.h" +#include "./vpx_scale_rtcd.h" #include "vpx/vpx_decoder.h" #include "vpx/vp8dx.h" #include "vpx/internal/vpx_codec_internal.h" @@ -106,6 +107,7 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx, (void) data; vp8_rtcd(); + vpx_scale_rtcd(); /* This function only allocates space for the vpx_codec_alg_priv_t * structure. More memory may be required at the time the stream diff --git a/vp9/common/vp9_rtcd.c b/vp9/common/vp9_rtcd.c index c777bc81f..2dfa09f50 100644 --- a/vp9/common/vp9_rtcd.c +++ b/vp9/common/vp9_rtcd.c @@ -12,10 +12,7 @@ #include "./vp9_rtcd.h" #include "vpx_ports/vpx_once.h" -void vpx_scale_rtcd(void); - void vp9_rtcd() { - vpx_scale_rtcd(); // TODO(JBB): Remove this once, by insuring that both the encoder and // decoder setup functions are protected by once(); once(setup_rtcd_internal); diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c index 358f22a8d..54802226b 100644 --- a/vp9/decoder/vp9_decoder.c +++ b/vp9/decoder/vp9_decoder.c @@ -12,6 +12,7 @@ #include <limits.h> #include <stdio.h> +#include "./vp9_rtcd.h" #include "./vpx_scale_rtcd.h" #include "vpx_mem/vpx_mem.h" @@ -39,6 +40,7 @@ static void initialize_dec(void) { if (!init_done) { vp9_rtcd(); + vpx_scale_rtcd(); vp9_init_intra_predictors(); init_done = 1; } diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 367ab3c36..e59d2c2a0 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -458,8 +458,7 @@ static int set_vt_partitioning(VP9_COMP *cpi, return 0; } - -void vp9_set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) { +void vp9_set_vbp_thresholds(VP9_COMP *cpi, int q) { SPEED_FEATURES *const sf = &cpi->sf; if (sf->partition_search_type != VAR_BASED_PARTITION && sf->partition_search_type != REFERENCE_PARTITION) { @@ -480,25 +479,41 @@ void vp9_set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) { // Array index: 0 - threshold_64x64; 1 - threshold_32x32; // 2 - threshold_16x16; 3 - vbp_threshold_8x8; if (is_key_frame) { - thresholds[0] = threshold_base; - thresholds[1] = threshold_base >> 2; - thresholds[2] = threshold_base >> 2; - thresholds[3] = threshold_base << 2; + cpi->vbp_thresholds[0] = threshold_base; + cpi->vbp_thresholds[1] = threshold_base >> 2; + cpi->vbp_thresholds[2] = threshold_base >> 2; + cpi->vbp_thresholds[3] = threshold_base << 2; cpi->vbp_bsize_min = BLOCK_8X8; } else { - thresholds[1] = threshold_base; + cpi->vbp_thresholds[1] = threshold_base; if (cm->width <= 352 && cm->height <= 288) { - thresholds[0] = threshold_base >> 2; - thresholds[2] = threshold_base << 3; + cpi->vbp_thresholds[0] = threshold_base >> 2; + cpi->vbp_thresholds[2] = threshold_base << 3; } else { - thresholds[0] = threshold_base; - thresholds[2] = threshold_base << cpi->oxcf.speed; + cpi->vbp_thresholds[0] = threshold_base; + cpi->vbp_thresholds[2] = threshold_base << cpi->oxcf.speed; } cpi->vbp_bsize_min = BLOCK_16X16; } } } +static void modify_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) { + VP9_COMMON *const cm = &cpi->common; + const int64_t threshold_base = (int64_t)(cpi->y_dequant[q][1]); + + // Array index: 0 - threshold_64x64; 1 - threshold_32x32; + // 2 - threshold_16x16; 3 - vbp_threshold_8x8; + thresholds[1] = threshold_base; + if (cm->width <= 352 && cm->height <= 288) { + thresholds[0] = threshold_base >> 2; + thresholds[2] = threshold_base << 3; + } else { + thresholds[0] = threshold_base; + thresholds[2] = threshold_base << cpi->oxcf.speed; + } +} + static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d, int dp, int x8_idx, int y8_idx, v8x8 *vst, #if CONFIG_VP9_HIGHBITDEPTH @@ -611,7 +626,7 @@ static void choose_partitioning(VP9_COMP *cpi, if (cyclic_refresh_segment_id_boosted(segment_id)) { int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); - vp9_set_vbp_thresholds(cpi, thresholds, q); + modify_vbp_thresholds(cpi, thresholds, q); } } @@ -3854,6 +3869,9 @@ static void encode_frame_internal(VP9_COMP *cpi) { } vp9_zero(x->zcoeff_blk); + if (cm->frame_type != KEY_FRAME && cpi->rc.frames_since_golden == 0) + cpi->ref_frame_flags &= (~VP9_GOLD_FLAG); + if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION) source_var_based_partition_search_method(cpi); } diff --git a/vp9/encoder/vp9_encodeframe.h b/vp9/encoder/vp9_encodeframe.h index 102713029..1acde0283 100644 --- a/vp9/encoder/vp9_encodeframe.h +++ b/vp9/encoder/vp9_encodeframe.h @@ -40,7 +40,7 @@ void vp9_init_tile_data(struct VP9_COMP *cpi); void vp9_encode_tile(struct VP9_COMP *cpi, struct ThreadData *td, int tile_row, int tile_col); -void vp9_set_vbp_thresholds(struct VP9_COMP *cpi, int64_t thresholds[], int q); +void vp9_set_vbp_thresholds(struct VP9_COMP *cpi, int q); #ifdef __cplusplus } // extern "C" diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 82f99b3e5..8a7ae8ed0 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -13,6 +13,7 @@ #include <limits.h> #include "./vpx_config.h" +#include "./vp9_rtcd.h" #include "./vpx_scale_rtcd.h" #include "vpx/internal/vpx_psnr.h" #include "vpx_ports/vpx_timer.h" @@ -293,6 +294,7 @@ void vp9_initialize_enc(void) { if (!init_done) { vp9_rtcd(); + vpx_scale_rtcd(); vp9_init_intra_predictors(); vp9_init_me_luts(); vp9_rc_init_minq_luts(); @@ -2958,7 +2960,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi) { set_size_dependent_vars(cpi, &q, &bottom_index, &top_index); vp9_set_quantizer(cm, q); - vp9_set_vbp_thresholds(cpi, cpi->vbp_thresholds, q); + vp9_set_vbp_thresholds(cpi, q); setup_frame(cpi); diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 0ad3249b5..416f6797b 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -1125,7 +1125,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, #endif if (cpi->rc.frames_since_golden == 0) { - cpi->ref_frame_flags &= (~VP9_GOLD_FLAG); usable_ref_frame = LAST_FRAME; } else { usable_ref_frame = GOLDEN_FRAME; diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index c2f782bab..e9c58cc38 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -462,7 +462,6 @@ static INLINE void check_resync(vpx_codec_alg_priv_t *const ctx, static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, const uint8_t **data, unsigned int data_sz, void *user_priv, int64_t deadline) { - vp9_ppflags_t flags = {0, 0, 0}; const VP9WorkerInterface *const winterface = vp9_get_worker_interface(); (void)deadline; @@ -547,9 +546,6 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, winterface->launch(worker); } - if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) - set_ppflags(ctx, &flags); - return VPX_CODEC_OK; } diff --git a/vpx_ports/vpx_once.h b/vpx_ports/vpx_once.h index bd9eebd64..f1df39434 100644 --- a/vpx_ports/vpx_once.h +++ b/vpx_ports/vpx_once.h @@ -110,7 +110,7 @@ static void once(void (*func)(void)) #else -/* No-op version that performs no synchronization. vp8_rtcd() is idempotent, +/* No-op version that performs no synchronization. *_rtcd() is idempotent, * so as long as your platform provides atomic loads/stores of pointers * no synchronization is strictly necessary. */ diff --git a/vpx_scale/vpx_scale_rtcd.c b/vpx_scale/vpx_scale_rtcd.c index 656a22f52..bea603fd1 100644 --- a/vpx_scale/vpx_scale_rtcd.c +++ b/vpx_scale/vpx_scale_rtcd.c @@ -7,9 +7,9 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#include "vpx_config.h" +#include "./vpx_config.h" #define RTCD_C -#include "vpx_scale_rtcd.h" +#include "./vpx_scale_rtcd.h" #include "vpx_ports/vpx_once.h" void vpx_scale_rtcd() diff --git a/vpx_scale/win32/scaleopt.c b/vpx_scale/win32/scaleopt.c deleted file mode 100644 index 4336ecea3..000000000 --- a/vpx_scale/win32/scaleopt.c +++ /dev/null @@ -1,525 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** -* -* Module Title : scaleopt.cpp -* -* Description : Optimized scaling functions -* -****************************************************************************/ -#include "pragmas.h" - -/**************************************************************************** -* Module Statics -****************************************************************************/ -__declspec(align(16)) const static unsigned short round_values[] = { 128, 128, 128, 128 }; - -#include "vpx_scale/vpx_scale.h" -#include "vpx_mem/vpx_mem.h" - -__declspec(align(16)) const static unsigned short const54_2[] = { 0, 64, 128, 192 }; -__declspec(align(16)) const static unsigned short const54_1[] = {256, 192, 128, 64 }; - - -/**************************************************************************** - * - * ROUTINE : horizontal_line_5_4_scale_mmx - * - * INPUTS : const unsigned char *source : Pointer to source data. - * unsigned int source_width : Stride of source. - * unsigned char *dest : Pointer to destination data. - * unsigned int dest_width : Stride of destination (NOT USED). - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Copies horizontal line of pixels from source to - * destination scaling up by 4 to 5. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -static -void horizontal_line_5_4_scale_mmx -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) { - /* - unsigned i; - unsigned int a, b, c, d, e; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for ( i=0; i<source_width; i+=5 ) - { - a = src[0]; - b = src[1]; - c = src[2]; - d = src[3]; - e = src[4]; - - des[0] = a; - des[1] = ((b*192 + c* 64 + 128)>>8); - des[2] = ((c*128 + d*128 + 128)>>8); - des[3] = ((d* 64 + e*192 + 128)>>8); - - src += 5; - des += 4; - } - */ - (void) dest_width; - - __asm { - - mov esi, source; - mov edi, dest; - - mov ecx, source_width; - movq mm5, const54_1; - - pxor mm7, mm7; - movq mm6, const54_2; - - movq mm4, round_values; - lea edx, [esi+ecx]; - horizontal_line_5_4_loop: - - movq mm0, QWORD PTR [esi]; - 00 01 02 03 04 05 06 07 - movq mm1, mm0; - 00 01 02 03 04 05 06 07 - - psrlq mm0, 8; - 01 02 03 04 05 06 07 xx - punpcklbw mm1, mm7; - xx 00 xx 01 xx 02 xx 03 - - punpcklbw mm0, mm7; - xx 01 xx 02 xx 03 xx 04 - pmullw mm1, mm5 - - pmullw mm0, mm6 - add esi, 5 - - add edi, 4 - paddw mm1, mm0 - - paddw mm1, mm4 - psrlw mm1, 8 - - cmp esi, edx - packuswb mm1, mm7 - - movd DWORD PTR [edi-4], mm1 - - jl horizontal_line_5_4_loop - - } - -} -__declspec(align(16)) const static unsigned short one_fourths[] = { 64, 64, 64, 64 }; -__declspec(align(16)) const static unsigned short two_fourths[] = { 128, 128, 128, 128 }; -__declspec(align(16)) const static unsigned short three_fourths[] = { 192, 192, 192, 192 }; - -static -void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { - - __asm { - push ebx - - mov esi, source // Get the source and destination pointer - mov ecx, src_pitch // Get the pitch size - - mov edi, dest // tow lines below - pxor mm7, mm7 // clear out mm7 - - mov edx, dest_pitch // Loop counter - mov ebx, dest_width - - vs_5_4_loop: - - movd mm0, DWORD ptr [esi] // src[0]; - movd mm1, DWORD ptr [esi+ecx] // src[1]; - - movd mm2, DWORD ptr [esi+ecx*2] - lea eax, [esi+ecx*2] // - - punpcklbw mm1, mm7 - punpcklbw mm2, mm7 - - movq mm3, mm2 - pmullw mm1, three_fourths - - pmullw mm2, one_fourths - movd mm4, [eax+ecx] - - pmullw mm3, two_fourths - punpcklbw mm4, mm7 - - movq mm5, mm4 - pmullw mm4, two_fourths - - paddw mm1, mm2 - movd mm6, [eax+ecx*2] - - pmullw mm5, one_fourths - paddw mm1, round_values; - - paddw mm3, mm4 - psrlw mm1, 8 - - punpcklbw mm6, mm7 - paddw mm3, round_values - - pmullw mm6, three_fourths - psrlw mm3, 8 - - packuswb mm1, mm7 - packuswb mm3, mm7 - - movd DWORD PTR [edi], mm0 - movd DWORD PTR [edi+edx], mm1 - - - paddw mm5, mm6 - movd DWORD PTR [edi+edx*2], mm3 - - lea eax, [edi+edx*2] - paddw mm5, round_values - - psrlw mm5, 8 - add edi, 4 - - packuswb mm5, mm7 - movd DWORD PTR [eax+edx], mm5 - - add esi, 4 - sub ebx, 4 - - jg vs_5_4_loop - - pop ebx - } -} - - -__declspec(align(16)) const static unsigned short const53_1[] = { 0, 85, 171, 0 }; -__declspec(align(16)) const static unsigned short const53_2[] = {256, 171, 85, 0 }; - - -static -void horizontal_line_5_3_scale_mmx -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) { - - (void) dest_width; - __asm { - - mov esi, source; - mov edi, dest; - - mov ecx, source_width; - movq mm5, const53_1; - - pxor mm7, mm7; - movq mm6, const53_2; - - movq mm4, round_values; - lea edx, [esi+ecx-5]; - horizontal_line_5_3_loop: - - movq mm0, QWORD PTR [esi]; - 00 01 02 03 04 05 06 07 - movq mm1, mm0; - 00 01 02 03 04 05 06 07 - - psllw mm0, 8; - xx 00 xx 02 xx 04 xx 06 - psrlw mm1, 8; - 01 xx 03 xx 05 xx 07 xx - - psrlw mm0, 8; - 00 xx 02 xx 04 xx 06 xx - psllq mm1, 16; - xx xx 01 xx 03 xx 05 xx - - pmullw mm0, mm6 - - pmullw mm1, mm5 - add esi, 5 - - add edi, 3 - paddw mm1, mm0 - - paddw mm1, mm4 - psrlw mm1, 8 - - cmp esi, edx - packuswb mm1, mm7 - - movd DWORD PTR [edi-3], mm1 - jl horizontal_line_5_3_loop - -// exit condition - movq mm0, QWORD PTR [esi]; - 00 01 02 03 04 05 06 07 - movq mm1, mm0; - 00 01 02 03 04 05 06 07 - - psllw mm0, 8; - xx 00 xx 02 xx 04 xx 06 - psrlw mm1, 8; - 01 xx 03 xx 05 xx 07 xx - - psrlw mm0, 8; - 00 xx 02 xx 04 xx 06 xx - psllq mm1, 16; - xx xx 01 xx 03 xx 05 xx - - pmullw mm0, mm6 - - pmullw mm1, mm5 - paddw mm1, mm0 - - paddw mm1, mm4 - psrlw mm1, 8 - - packuswb mm1, mm7 - movd eax, mm1 - - mov edx, eax - shr edx, 16 - - mov WORD PTR[edi], ax - mov BYTE PTR[edi+2], dl - - } - -} - -__declspec(align(16)) const static unsigned short one_thirds[] = { 85, 85, 85, 85 }; -__declspec(align(16)) const static unsigned short two_thirds[] = { 171, 171, 171, 171 }; - -static -void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { - - __asm { - push ebx - - mov esi, source // Get the source and destination pointer - mov ecx, src_pitch // Get the pitch size - - mov edi, dest // tow lines below - pxor mm7, mm7 // clear out mm7 - - mov edx, dest_pitch // Loop counter - movq mm5, one_thirds - - movq mm6, two_thirds - mov ebx, dest_width; - - vs_5_3_loop: - - movd mm0, DWORD ptr [esi] // src[0]; - movd mm1, DWORD ptr [esi+ecx] // src[1]; - - movd mm2, DWORD ptr [esi+ecx*2] - lea eax, [esi+ecx*2] // - - punpcklbw mm1, mm7 - punpcklbw mm2, mm7 - - pmullw mm1, mm5 - pmullw mm2, mm6 - - movd mm3, DWORD ptr [eax+ecx] - movd mm4, DWORD ptr [eax+ecx*2] - - punpcklbw mm3, mm7 - punpcklbw mm4, mm7 - - pmullw mm3, mm6 - pmullw mm4, mm5 - - - movd DWORD PTR [edi], mm0 - paddw mm1, mm2 - - paddw mm1, round_values - psrlw mm1, 8 - - packuswb mm1, mm7 - paddw mm3, mm4 - - paddw mm3, round_values - movd DWORD PTR [edi+edx], mm1 - - psrlw mm3, 8 - packuswb mm3, mm7 - - movd DWORD PTR [edi+edx*2], mm3 - - - add edi, 4 - add esi, 4 - - sub ebx, 4 - jg vs_5_3_loop - - pop ebx - } -} - - - - -/**************************************************************************** - * - * ROUTINE : horizontal_line_2_1_scale - * - * INPUTS : const unsigned char *source : - * unsigned int source_width : - * unsigned char *dest : - * unsigned int dest_width : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : 1 to 2 up-scaling of a horizontal line of pixels. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -static -void horizontal_line_2_1_scale_mmx -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) { - (void) dest_width; - (void) source_width; - __asm { - mov esi, source - mov edi, dest - - pxor mm7, mm7 - mov ecx, dest_width - - xor edx, edx - hs_2_1_loop: - - movq mm0, [esi+edx*2] - psllw mm0, 8 - - psrlw mm0, 8 - packuswb mm0, mm7 - - movd DWORD Ptr [edi+edx], mm0; - add edx, 4 - - cmp edx, ecx - jl hs_2_1_loop - - } -} - - - -static -void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { - (void) dest_pitch; - (void) src_pitch; - vpx_memcpy(dest, source, dest_width); -} - - -__declspec(align(16)) const static unsigned short three_sixteenths[] = { 48, 48, 48, 48 }; -__declspec(align(16)) const static unsigned short ten_sixteenths[] = { 160, 160, 160, 160 }; - -static -void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { - - (void) dest_pitch; - __asm { - mov esi, source - mov edi, dest - - mov eax, src_pitch - mov edx, dest_width - - pxor mm7, mm7 - sub esi, eax // back one line - - - lea ecx, [esi+edx]; - movq mm6, round_values; - - movq mm5, three_sixteenths; - movq mm4, ten_sixteenths; - - vs_2_1_i_loop: - movd mm0, [esi] // - movd mm1, [esi+eax] // - - movd mm2, [esi+eax*2] // - punpcklbw mm0, mm7 - - pmullw mm0, mm5 - punpcklbw mm1, mm7 - - pmullw mm1, mm4 - punpcklbw mm2, mm7 - - pmullw mm2, mm5 - paddw mm0, round_values - - paddw mm1, mm2 - paddw mm0, mm1 - - psrlw mm0, 8 - packuswb mm0, mm7 - - movd DWORD PTR [edi], mm0 - add esi, 4 - - add edi, 4; - cmp esi, ecx - jl vs_2_1_i_loop - - } -} - - - -void -register_mmxscalers(void) { - vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx; - vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx; - vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx; - vp8_vertical_band_2_1_scale_i = vertical_band_2_1_scale_i_mmx; - vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx; - vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx; - vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx; -} |