summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--build/make/Android.mk6
-rw-r--r--test/test_libvpx.cc16
-rw-r--r--third_party/x86inc/README.libvpx1
-rw-r--r--third_party/x86inc/x86inc.asm2
-rw-r--r--vp8/common/rtcd.c6
-rw-r--r--vp8/vp8_cx_iface.c4
-rw-r--r--vp8/vp8_dx_iface.c4
-rw-r--r--vp9/common/vp9_rtcd.c3
-rw-r--r--vp9/decoder/vp9_decoder.c2
-rw-r--r--vp9/encoder/vp9_encodeframe.c42
-rw-r--r--vp9/encoder/vp9_encodeframe.h2
-rw-r--r--vp9/encoder/vp9_encoder.c4
-rw-r--r--vp9/encoder/vp9_pickmode.c1
-rw-r--r--vp9/vp9_dx_iface.c4
-rw-r--r--vpx_ports/vpx_once.h2
-rw-r--r--vpx_scale/vpx_scale_rtcd.c4
-rw-r--r--vpx_scale/win32/scaleopt.c525
17 files changed, 64 insertions, 564 deletions
diff --git a/build/make/Android.mk b/build/make/Android.mk
index d897b44cc..3d3f57ddc 100644
--- a/build/make/Android.mk
+++ b/build/make/Android.mk
@@ -184,7 +184,11 @@ clean:
@$(RM) -r $(ASM_CNV_PATH)
@$(RM) $(CLEAN-OBJS)
-include $(BUILD_SHARED_LIBRARY)
+ifeq ($(ENABLE_SHARED),1)
+ include $(BUILD_SHARED_LIBRARY)
+else
+ include $(BUILD_STATIC_LIBRARY)
+endif
ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
$(call import-module,cpufeatures)
diff --git a/test/test_libvpx.cc b/test/test_libvpx.cc
index fc035af8f..dcf5fc52b 100644
--- a/test/test_libvpx.cc
+++ b/test/test_libvpx.cc
@@ -14,11 +14,12 @@
#endif
extern "C" {
#if CONFIG_VP8
-extern void vp8_rtcd();
-#endif
+#include "./vp8_rtcd.h"
+#endif // CONFIG_VP8
#if CONFIG_VP9
-extern void vp9_rtcd();
-#endif
+#include "./vp9_rtcd.h"
+#endif // CONFIG_VP9
+#include "./vpx_scale_rtcd.h"
}
#include "third_party/googletest/src/include/gtest/gtest.h"
@@ -59,11 +60,12 @@ int main(int argc, char **argv) {
#if CONFIG_VP8
vp8_rtcd();
-#endif
+#endif // CONFIG_VP8
#if CONFIG_VP9
vp9_rtcd();
-#endif
-#endif
+#endif // CONFIG_VP9
+ vpx_scale_rtcd();
+#endif // !CONFIG_SHARED
return RUN_ALL_TESTS();
}
diff --git a/third_party/x86inc/README.libvpx b/third_party/x86inc/README.libvpx
index 02cd9ab4e..343bcf962 100644
--- a/third_party/x86inc/README.libvpx
+++ b/third_party/x86inc/README.libvpx
@@ -9,3 +9,4 @@ defines that help automatically allow assembly to work cross-platform.
Local Modifications:
Some modifications to allow PIC to work with x86inc.
+Conditionally define program_name to allow overriding.
diff --git a/third_party/x86inc/x86inc.asm b/third_party/x86inc/x86inc.asm
index 9273fc97e..bc8116995 100644
--- a/third_party/x86inc/x86inc.asm
+++ b/third_party/x86inc/x86inc.asm
@@ -36,7 +36,9 @@
%include "vpx_config.asm"
+%ifndef program_name
%define program_name vp9
+%endif
%define UNIX64 0
diff --git a/vp8/common/rtcd.c b/vp8/common/rtcd.c
index 0b371b094..ab0e9b47f 100644
--- a/vp8/common/rtcd.c
+++ b/vp8/common/rtcd.c
@@ -7,15 +7,13 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "vpx_config.h"
+#include "./vpx_config.h"
#define RTCD_C
-#include "vp8_rtcd.h"
+#include "./vp8_rtcd.h"
#include "vpx_ports/vpx_once.h"
-extern void vpx_scale_rtcd(void);
void vp8_rtcd()
{
- vpx_scale_rtcd();
once(setup_rtcd_internal);
}
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index bf8a8537c..a5dfd07f9 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -10,7 +10,8 @@
#include "./vpx_config.h"
-#include "vp8_rtcd.h"
+#include "./vp8_rtcd.h"
+#include "./vpx_scale_rtcd.h"
#include "vpx/vpx_codec.h"
#include "vpx/internal/vpx_codec_internal.h"
#include "vpx_version.h"
@@ -649,6 +650,7 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx,
vp8_rtcd();
+ vpx_scale_rtcd();
if (!ctx->priv)
{
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 67a0fef64..e0eb30aa5 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -11,7 +11,8 @@
#include <stdlib.h>
#include <string.h>
-#include "vp8_rtcd.h"
+#include "./vp8_rtcd.h"
+#include "./vpx_scale_rtcd.h"
#include "vpx/vpx_decoder.h"
#include "vpx/vp8dx.h"
#include "vpx/internal/vpx_codec_internal.h"
@@ -106,6 +107,7 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx,
(void) data;
vp8_rtcd();
+ vpx_scale_rtcd();
/* This function only allocates space for the vpx_codec_alg_priv_t
* structure. More memory may be required at the time the stream
diff --git a/vp9/common/vp9_rtcd.c b/vp9/common/vp9_rtcd.c
index c777bc81f..2dfa09f50 100644
--- a/vp9/common/vp9_rtcd.c
+++ b/vp9/common/vp9_rtcd.c
@@ -12,10 +12,7 @@
#include "./vp9_rtcd.h"
#include "vpx_ports/vpx_once.h"
-void vpx_scale_rtcd(void);
-
void vp9_rtcd() {
- vpx_scale_rtcd();
// TODO(JBB): Remove this once, by insuring that both the encoder and
// decoder setup functions are protected by once();
once(setup_rtcd_internal);
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index 358f22a8d..54802226b 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -12,6 +12,7 @@
#include <limits.h>
#include <stdio.h>
+#include "./vp9_rtcd.h"
#include "./vpx_scale_rtcd.h"
#include "vpx_mem/vpx_mem.h"
@@ -39,6 +40,7 @@ static void initialize_dec(void) {
if (!init_done) {
vp9_rtcd();
+ vpx_scale_rtcd();
vp9_init_intra_predictors();
init_done = 1;
}
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 367ab3c36..e59d2c2a0 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -458,8 +458,7 @@ static int set_vt_partitioning(VP9_COMP *cpi,
return 0;
}
-
-void vp9_set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) {
+void vp9_set_vbp_thresholds(VP9_COMP *cpi, int q) {
SPEED_FEATURES *const sf = &cpi->sf;
if (sf->partition_search_type != VAR_BASED_PARTITION &&
sf->partition_search_type != REFERENCE_PARTITION) {
@@ -480,25 +479,41 @@ void vp9_set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) {
// Array index: 0 - threshold_64x64; 1 - threshold_32x32;
// 2 - threshold_16x16; 3 - vbp_threshold_8x8;
if (is_key_frame) {
- thresholds[0] = threshold_base;
- thresholds[1] = threshold_base >> 2;
- thresholds[2] = threshold_base >> 2;
- thresholds[3] = threshold_base << 2;
+ cpi->vbp_thresholds[0] = threshold_base;
+ cpi->vbp_thresholds[1] = threshold_base >> 2;
+ cpi->vbp_thresholds[2] = threshold_base >> 2;
+ cpi->vbp_thresholds[3] = threshold_base << 2;
cpi->vbp_bsize_min = BLOCK_8X8;
} else {
- thresholds[1] = threshold_base;
+ cpi->vbp_thresholds[1] = threshold_base;
if (cm->width <= 352 && cm->height <= 288) {
- thresholds[0] = threshold_base >> 2;
- thresholds[2] = threshold_base << 3;
+ cpi->vbp_thresholds[0] = threshold_base >> 2;
+ cpi->vbp_thresholds[2] = threshold_base << 3;
} else {
- thresholds[0] = threshold_base;
- thresholds[2] = threshold_base << cpi->oxcf.speed;
+ cpi->vbp_thresholds[0] = threshold_base;
+ cpi->vbp_thresholds[2] = threshold_base << cpi->oxcf.speed;
}
cpi->vbp_bsize_min = BLOCK_16X16;
}
}
}
+static void modify_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) {
+ VP9_COMMON *const cm = &cpi->common;
+ const int64_t threshold_base = (int64_t)(cpi->y_dequant[q][1]);
+
+ // Array index: 0 - threshold_64x64; 1 - threshold_32x32;
+ // 2 - threshold_16x16; 3 - vbp_threshold_8x8;
+ thresholds[1] = threshold_base;
+ if (cm->width <= 352 && cm->height <= 288) {
+ thresholds[0] = threshold_base >> 2;
+ thresholds[2] = threshold_base << 3;
+ } else {
+ thresholds[0] = threshold_base;
+ thresholds[2] = threshold_base << cpi->oxcf.speed;
+ }
+}
+
static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d,
int dp, int x8_idx, int y8_idx, v8x8 *vst,
#if CONFIG_VP9_HIGHBITDEPTH
@@ -611,7 +626,7 @@ static void choose_partitioning(VP9_COMP *cpi,
if (cyclic_refresh_segment_id_boosted(segment_id)) {
int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
- vp9_set_vbp_thresholds(cpi, thresholds, q);
+ modify_vbp_thresholds(cpi, thresholds, q);
}
}
@@ -3854,6 +3869,9 @@ static void encode_frame_internal(VP9_COMP *cpi) {
}
vp9_zero(x->zcoeff_blk);
+ if (cm->frame_type != KEY_FRAME && cpi->rc.frames_since_golden == 0)
+ cpi->ref_frame_flags &= (~VP9_GOLD_FLAG);
+
if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION)
source_var_based_partition_search_method(cpi);
}
diff --git a/vp9/encoder/vp9_encodeframe.h b/vp9/encoder/vp9_encodeframe.h
index 102713029..1acde0283 100644
--- a/vp9/encoder/vp9_encodeframe.h
+++ b/vp9/encoder/vp9_encodeframe.h
@@ -40,7 +40,7 @@ void vp9_init_tile_data(struct VP9_COMP *cpi);
void vp9_encode_tile(struct VP9_COMP *cpi, struct ThreadData *td,
int tile_row, int tile_col);
-void vp9_set_vbp_thresholds(struct VP9_COMP *cpi, int64_t thresholds[], int q);
+void vp9_set_vbp_thresholds(struct VP9_COMP *cpi, int q);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 82f99b3e5..8a7ae8ed0 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -13,6 +13,7 @@
#include <limits.h>
#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
#include "./vpx_scale_rtcd.h"
#include "vpx/internal/vpx_psnr.h"
#include "vpx_ports/vpx_timer.h"
@@ -293,6 +294,7 @@ void vp9_initialize_enc(void) {
if (!init_done) {
vp9_rtcd();
+ vpx_scale_rtcd();
vp9_init_intra_predictors();
vp9_init_me_luts();
vp9_rc_init_minq_luts();
@@ -2958,7 +2960,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi) {
set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
vp9_set_quantizer(cm, q);
- vp9_set_vbp_thresholds(cpi, cpi->vbp_thresholds, q);
+ vp9_set_vbp_thresholds(cpi, q);
setup_frame(cpi);
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 0ad3249b5..416f6797b 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -1125,7 +1125,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
#endif
if (cpi->rc.frames_since_golden == 0) {
- cpi->ref_frame_flags &= (~VP9_GOLD_FLAG);
usable_ref_frame = LAST_FRAME;
} else {
usable_ref_frame = GOLDEN_FRAME;
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index c2f782bab..e9c58cc38 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -462,7 +462,6 @@ static INLINE void check_resync(vpx_codec_alg_priv_t *const ctx,
static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
const uint8_t **data, unsigned int data_sz,
void *user_priv, int64_t deadline) {
- vp9_ppflags_t flags = {0, 0, 0};
const VP9WorkerInterface *const winterface = vp9_get_worker_interface();
(void)deadline;
@@ -547,9 +546,6 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
winterface->launch(worker);
}
- if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)
- set_ppflags(ctx, &flags);
-
return VPX_CODEC_OK;
}
diff --git a/vpx_ports/vpx_once.h b/vpx_ports/vpx_once.h
index bd9eebd64..f1df39434 100644
--- a/vpx_ports/vpx_once.h
+++ b/vpx_ports/vpx_once.h
@@ -110,7 +110,7 @@ static void once(void (*func)(void))
#else
-/* No-op version that performs no synchronization. vp8_rtcd() is idempotent,
+/* No-op version that performs no synchronization. *_rtcd() is idempotent,
* so as long as your platform provides atomic loads/stores of pointers
* no synchronization is strictly necessary.
*/
diff --git a/vpx_scale/vpx_scale_rtcd.c b/vpx_scale/vpx_scale_rtcd.c
index 656a22f52..bea603fd1 100644
--- a/vpx_scale/vpx_scale_rtcd.c
+++ b/vpx_scale/vpx_scale_rtcd.c
@@ -7,9 +7,9 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "vpx_config.h"
+#include "./vpx_config.h"
#define RTCD_C
-#include "vpx_scale_rtcd.h"
+#include "./vpx_scale_rtcd.h"
#include "vpx_ports/vpx_once.h"
void vpx_scale_rtcd()
diff --git a/vpx_scale/win32/scaleopt.c b/vpx_scale/win32/scaleopt.c
deleted file mode 100644
index 4336ecea3..000000000
--- a/vpx_scale/win32/scaleopt.c
+++ /dev/null
@@ -1,525 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/****************************************************************************
-*
-* Module Title : scaleopt.cpp
-*
-* Description : Optimized scaling functions
-*
-****************************************************************************/
-#include "pragmas.h"
-
-/****************************************************************************
-* Module Statics
-****************************************************************************/
-__declspec(align(16)) const static unsigned short round_values[] = { 128, 128, 128, 128 };
-
-#include "vpx_scale/vpx_scale.h"
-#include "vpx_mem/vpx_mem.h"
-
-__declspec(align(16)) const static unsigned short const54_2[] = { 0, 64, 128, 192 };
-__declspec(align(16)) const static unsigned short const54_1[] = {256, 192, 128, 64 };
-
-
-/****************************************************************************
- *
- * ROUTINE : horizontal_line_5_4_scale_mmx
- *
- * INPUTS : const unsigned char *source : Pointer to source data.
- * unsigned int source_width : Stride of source.
- * unsigned char *dest : Pointer to destination data.
- * unsigned int dest_width : Stride of destination (NOT USED).
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Copies horizontal line of pixels from source to
- * destination scaling up by 4 to 5.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
-static
-void horizontal_line_5_4_scale_mmx
-(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-) {
- /*
- unsigned i;
- unsigned int a, b, c, d, e;
- unsigned char *des = dest;
- const unsigned char *src = source;
-
- (void) dest_width;
-
- for ( i=0; i<source_width; i+=5 )
- {
- a = src[0];
- b = src[1];
- c = src[2];
- d = src[3];
- e = src[4];
-
- des[0] = a;
- des[1] = ((b*192 + c* 64 + 128)>>8);
- des[2] = ((c*128 + d*128 + 128)>>8);
- des[3] = ((d* 64 + e*192 + 128)>>8);
-
- src += 5;
- des += 4;
- }
- */
- (void) dest_width;
-
- __asm {
-
- mov esi, source;
- mov edi, dest;
-
- mov ecx, source_width;
- movq mm5, const54_1;
-
- pxor mm7, mm7;
- movq mm6, const54_2;
-
- movq mm4, round_values;
- lea edx, [esi+ecx];
- horizontal_line_5_4_loop:
-
- movq mm0, QWORD PTR [esi];
- 00 01 02 03 04 05 06 07
- movq mm1, mm0;
- 00 01 02 03 04 05 06 07
-
- psrlq mm0, 8;
- 01 02 03 04 05 06 07 xx
- punpcklbw mm1, mm7;
- xx 00 xx 01 xx 02 xx 03
-
- punpcklbw mm0, mm7;
- xx 01 xx 02 xx 03 xx 04
- pmullw mm1, mm5
-
- pmullw mm0, mm6
- add esi, 5
-
- add edi, 4
- paddw mm1, mm0
-
- paddw mm1, mm4
- psrlw mm1, 8
-
- cmp esi, edx
- packuswb mm1, mm7
-
- movd DWORD PTR [edi-4], mm1
-
- jl horizontal_line_5_4_loop
-
- }
-
-}
-__declspec(align(16)) const static unsigned short one_fourths[] = { 64, 64, 64, 64 };
-__declspec(align(16)) const static unsigned short two_fourths[] = { 128, 128, 128, 128 };
-__declspec(align(16)) const static unsigned short three_fourths[] = { 192, 192, 192, 192 };
-
-static
-void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
-
- __asm {
- push ebx
-
- mov esi, source // Get the source and destination pointer
- mov ecx, src_pitch // Get the pitch size
-
- mov edi, dest // tow lines below
- pxor mm7, mm7 // clear out mm7
-
- mov edx, dest_pitch // Loop counter
- mov ebx, dest_width
-
- vs_5_4_loop:
-
- movd mm0, DWORD ptr [esi] // src[0];
- movd mm1, DWORD ptr [esi+ecx] // src[1];
-
- movd mm2, DWORD ptr [esi+ecx*2]
- lea eax, [esi+ecx*2] //
-
- punpcklbw mm1, mm7
- punpcklbw mm2, mm7
-
- movq mm3, mm2
- pmullw mm1, three_fourths
-
- pmullw mm2, one_fourths
- movd mm4, [eax+ecx]
-
- pmullw mm3, two_fourths
- punpcklbw mm4, mm7
-
- movq mm5, mm4
- pmullw mm4, two_fourths
-
- paddw mm1, mm2
- movd mm6, [eax+ecx*2]
-
- pmullw mm5, one_fourths
- paddw mm1, round_values;
-
- paddw mm3, mm4
- psrlw mm1, 8
-
- punpcklbw mm6, mm7
- paddw mm3, round_values
-
- pmullw mm6, three_fourths
- psrlw mm3, 8
-
- packuswb mm1, mm7
- packuswb mm3, mm7
-
- movd DWORD PTR [edi], mm0
- movd DWORD PTR [edi+edx], mm1
-
-
- paddw mm5, mm6
- movd DWORD PTR [edi+edx*2], mm3
-
- lea eax, [edi+edx*2]
- paddw mm5, round_values
-
- psrlw mm5, 8
- add edi, 4
-
- packuswb mm5, mm7
- movd DWORD PTR [eax+edx], mm5
-
- add esi, 4
- sub ebx, 4
-
- jg vs_5_4_loop
-
- pop ebx
- }
-}
-
-
-__declspec(align(16)) const static unsigned short const53_1[] = { 0, 85, 171, 0 };
-__declspec(align(16)) const static unsigned short const53_2[] = {256, 171, 85, 0 };
-
-
-static
-void horizontal_line_5_3_scale_mmx
-(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-) {
-
- (void) dest_width;
- __asm {
-
- mov esi, source;
- mov edi, dest;
-
- mov ecx, source_width;
- movq mm5, const53_1;
-
- pxor mm7, mm7;
- movq mm6, const53_2;
-
- movq mm4, round_values;
- lea edx, [esi+ecx-5];
- horizontal_line_5_3_loop:
-
- movq mm0, QWORD PTR [esi];
- 00 01 02 03 04 05 06 07
- movq mm1, mm0;
- 00 01 02 03 04 05 06 07
-
- psllw mm0, 8;
- xx 00 xx 02 xx 04 xx 06
- psrlw mm1, 8;
- 01 xx 03 xx 05 xx 07 xx
-
- psrlw mm0, 8;
- 00 xx 02 xx 04 xx 06 xx
- psllq mm1, 16;
- xx xx 01 xx 03 xx 05 xx
-
- pmullw mm0, mm6
-
- pmullw mm1, mm5
- add esi, 5
-
- add edi, 3
- paddw mm1, mm0
-
- paddw mm1, mm4
- psrlw mm1, 8
-
- cmp esi, edx
- packuswb mm1, mm7
-
- movd DWORD PTR [edi-3], mm1
- jl horizontal_line_5_3_loop
-
-// exit condition
- movq mm0, QWORD PTR [esi];
- 00 01 02 03 04 05 06 07
- movq mm1, mm0;
- 00 01 02 03 04 05 06 07
-
- psllw mm0, 8;
- xx 00 xx 02 xx 04 xx 06
- psrlw mm1, 8;
- 01 xx 03 xx 05 xx 07 xx
-
- psrlw mm0, 8;
- 00 xx 02 xx 04 xx 06 xx
- psllq mm1, 16;
- xx xx 01 xx 03 xx 05 xx
-
- pmullw mm0, mm6
-
- pmullw mm1, mm5
- paddw mm1, mm0
-
- paddw mm1, mm4
- psrlw mm1, 8
-
- packuswb mm1, mm7
- movd eax, mm1
-
- mov edx, eax
- shr edx, 16
-
- mov WORD PTR[edi], ax
- mov BYTE PTR[edi+2], dl
-
- }
-
-}
-
-__declspec(align(16)) const static unsigned short one_thirds[] = { 85, 85, 85, 85 };
-__declspec(align(16)) const static unsigned short two_thirds[] = { 171, 171, 171, 171 };
-
-static
-void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
-
- __asm {
- push ebx
-
- mov esi, source // Get the source and destination pointer
- mov ecx, src_pitch // Get the pitch size
-
- mov edi, dest // tow lines below
- pxor mm7, mm7 // clear out mm7
-
- mov edx, dest_pitch // Loop counter
- movq mm5, one_thirds
-
- movq mm6, two_thirds
- mov ebx, dest_width;
-
- vs_5_3_loop:
-
- movd mm0, DWORD ptr [esi] // src[0];
- movd mm1, DWORD ptr [esi+ecx] // src[1];
-
- movd mm2, DWORD ptr [esi+ecx*2]
- lea eax, [esi+ecx*2] //
-
- punpcklbw mm1, mm7
- punpcklbw mm2, mm7
-
- pmullw mm1, mm5
- pmullw mm2, mm6
-
- movd mm3, DWORD ptr [eax+ecx]
- movd mm4, DWORD ptr [eax+ecx*2]
-
- punpcklbw mm3, mm7
- punpcklbw mm4, mm7
-
- pmullw mm3, mm6
- pmullw mm4, mm5
-
-
- movd DWORD PTR [edi], mm0
- paddw mm1, mm2
-
- paddw mm1, round_values
- psrlw mm1, 8
-
- packuswb mm1, mm7
- paddw mm3, mm4
-
- paddw mm3, round_values
- movd DWORD PTR [edi+edx], mm1
-
- psrlw mm3, 8
- packuswb mm3, mm7
-
- movd DWORD PTR [edi+edx*2], mm3
-
-
- add edi, 4
- add esi, 4
-
- sub ebx, 4
- jg vs_5_3_loop
-
- pop ebx
- }
-}
-
-
-
-
-/****************************************************************************
- *
- * ROUTINE : horizontal_line_2_1_scale
- *
- * INPUTS : const unsigned char *source :
- * unsigned int source_width :
- * unsigned char *dest :
- * unsigned int dest_width :
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : 1 to 2 up-scaling of a horizontal line of pixels.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
-static
-void horizontal_line_2_1_scale_mmx
-(
- const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width
-) {
- (void) dest_width;
- (void) source_width;
- __asm {
- mov esi, source
- mov edi, dest
-
- pxor mm7, mm7
- mov ecx, dest_width
-
- xor edx, edx
- hs_2_1_loop:
-
- movq mm0, [esi+edx*2]
- psllw mm0, 8
-
- psrlw mm0, 8
- packuswb mm0, mm7
-
- movd DWORD Ptr [edi+edx], mm0;
- add edx, 4
-
- cmp edx, ecx
- jl hs_2_1_loop
-
- }
-}
-
-
-
-static
-void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
- (void) dest_pitch;
- (void) src_pitch;
- vpx_memcpy(dest, source, dest_width);
-}
-
-
-__declspec(align(16)) const static unsigned short three_sixteenths[] = { 48, 48, 48, 48 };
-__declspec(align(16)) const static unsigned short ten_sixteenths[] = { 160, 160, 160, 160 };
-
-static
-void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) {
-
- (void) dest_pitch;
- __asm {
- mov esi, source
- mov edi, dest
-
- mov eax, src_pitch
- mov edx, dest_width
-
- pxor mm7, mm7
- sub esi, eax // back one line
-
-
- lea ecx, [esi+edx];
- movq mm6, round_values;
-
- movq mm5, three_sixteenths;
- movq mm4, ten_sixteenths;
-
- vs_2_1_i_loop:
- movd mm0, [esi] //
- movd mm1, [esi+eax] //
-
- movd mm2, [esi+eax*2] //
- punpcklbw mm0, mm7
-
- pmullw mm0, mm5
- punpcklbw mm1, mm7
-
- pmullw mm1, mm4
- punpcklbw mm2, mm7
-
- pmullw mm2, mm5
- paddw mm0, round_values
-
- paddw mm1, mm2
- paddw mm0, mm1
-
- psrlw mm0, 8
- packuswb mm0, mm7
-
- movd DWORD PTR [edi], mm0
- add esi, 4
-
- add edi, 4;
- cmp esi, ecx
- jl vs_2_1_i_loop
-
- }
-}
-
-
-
-void
-register_mmxscalers(void) {
- vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx;
- vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx;
- vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx;
- vp8_vertical_band_2_1_scale_i = vertical_band_2_1_scale_i_mmx;
- vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx;
- vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx;
- vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx;
-}