summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README6
-rwxr-xr-xbuild/make/ads2gas.pl7
-rwxr-xr-xbuild/make/configure.sh47
-rwxr-xr-xconfigure8
-rw-r--r--examples/postproc.txt2
-rw-r--r--vp8/common/alloccommon.c4
-rw-r--r--vp8/common/arm/arm_systemdependent.c1
-rw-r--r--vp8/common/arm/dequantize_arm.c6
-rw-r--r--vp8/common/blockd.h7
-rw-r--r--vp8/common/dequantize.c3
-rw-r--r--vp8/common/dequantize.h2
-rw-r--r--vp8/common/g_common.h21
-rw-r--r--vp8/common/generic/systemdependent.c1
-rw-r--r--vp8/common/invtrans.h16
-rw-r--r--vp8/common/mbpitch.c5
-rw-r--r--vp8/common/onyx.h6
-rw-r--r--vp8/common/onyxc_int.h8
-rw-r--r--vp8/common/postproc.c275
-rw-r--r--vp8/common/postproc.h1
-rw-r--r--vp8/common/ppc/systemdependent.c1
-rw-r--r--vp8/common/ppflags.h3
-rw-r--r--vp8/common/x86/idct_blk_mmx.c6
-rw-r--r--vp8/common/x86/loopfilter_sse2.asm199
-rw-r--r--vp8/common/x86/recon_sse2.asm482
-rw-r--r--vp8/common/x86/recon_wrapper_sse2.c66
-rw-r--r--vp8/common/x86/recon_x86.h16
-rw-r--r--vp8/common/x86/x86_systemdependent.c9
-rw-r--r--vp8/decoder/decodframe.c58
-rw-r--r--vp8/decoder/onyxd_if.c1
-rw-r--r--vp8/decoder/threading.c35
-rw-r--r--vp8/encoder/block.h5
-rw-r--r--vp8/encoder/encodeframe.c6
-rw-r--r--vp8/encoder/encodeintra.c1
-rw-r--r--vp8/encoder/ethreading.c25
-rw-r--r--vp8/encoder/onyx_if.c138
-rw-r--r--vp8/encoder/onyx_int.h4
-rw-r--r--vp8/encoder/pickinter.c1
-rw-r--r--vp8/encoder/quantize.c114
-rw-r--r--vp8/encoder/rdopt.c1
-rw-r--r--vp8/encoder/temporal_filter.c1
-rw-r--r--vp8/encoder/x86/quantize_ssse3.asm42
-rw-r--r--vp8/vp8_common.mk1
-rw-r--r--vp8/vp8_cx_iface.c6
-rw-r--r--vp8/vp8_dx_iface.c23
-rw-r--r--vp8_scalable_patterns.c43
-rw-r--r--vpx/vp8.h1
-rw-r--r--vpx/vp8dx.h7
-rw-r--r--vpx_ports/vpxtypes.h5
-rw-r--r--vpx_scale/arm/armv4/gen_scalers_armv4.asm774
-rw-r--r--vpx_scale/generic/vpxscale.c4
-rw-r--r--vpx_scale/include/arm/vpxscale_nofp.h68
-rw-r--r--vpx_scale/include/generic/vpxscale_nofp.h51
-rw-r--r--vpx_scale/include/symbian/vpxscale_nofp.h68
-rw-r--r--vpx_scale/include/vpxscale_nofp.h16
-rw-r--r--vpx_scale/vpxscale.h13
-rw-r--r--vpxdec.c9
56 files changed, 1323 insertions, 1406 deletions
diff --git a/README b/README
index dddc5eae4..d5ef59823 100644
--- a/README
+++ b/README
@@ -44,15 +44,9 @@ COMPILING THE APPLICATIONS/LIBRARIES:
armv5te-linux-rvct
armv5te-linux-gcc
- armv5te-symbian-gcc
armv6-darwin-gcc
armv6-linux-rvct
armv6-linux-gcc
- armv6-symbian-gcc
- iwmmxt-linux-rvct
- iwmmxt-linux-gcc
- iwmmxt2-linux-rvct
- iwmmxt2-linux-gcc
armv7-linux-rvct
armv7-linux-gcc
mips32-linux-gcc
diff --git a/build/make/ads2gas.pl b/build/make/ads2gas.pl
index cea967f93..c55ed0fe4 100755
--- a/build/make/ads2gas.pl
+++ b/build/make/ads2gas.pl
@@ -129,11 +129,14 @@ while (<STDIN>)
# ARM code
s/\sARM/.arm/g;
+ # eabi_attributes numerical equivalents can be found in the
+ # "ARM IHI 0045C" document.
+
# REQUIRE8 Stack is required to be 8-byte aligned
- s/\sREQUIRE8/.eabi_attribute Tag_ABI_align_needed, 1/g;
+ s/\sREQUIRE8/.eabi_attribute 24, 1 \@Tag_ABI_align_needed/g;
# PRESERVE8 Stack 8-byte align is preserved
- s/\sPRESERVE8/.eabi_attribute Tag_ABI_align_preserved, 1/g;
+ s/\sPRESERVE8/.eabi_attribute 25, 1 \@Tag_ABI_align_preserved/g;
# Use PROC and ENDP to give the symbols a .size directive.
# This makes them show up properly in debugging tools like gdb and valgrind.
diff --git a/build/make/configure.sh b/build/make/configure.sh
index 0426f9220..6039a5066 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -603,8 +603,8 @@ process_common_toolchain() {
# Enable the architecture family
case ${tgt_isa} in
- arm*|iwmmxt*) enable arm;;
- mips*) enable mips;;
+ arm*) enable arm;;
+ mips*) enable mips;;
esac
# PIC is probably what we want when building shared libs
@@ -665,37 +665,25 @@ process_common_toolchain() {
# Process ARM architecture variants
case ${toolchain} in
- arm*|iwmmxt*)
- # on arm, isa versions are supersets
- enabled armv7a && soft_enable armv7 ### DEBUG
- enabled armv7 && soft_enable armv6
- enabled armv7 || enabled armv6 && soft_enable armv5te
- enabled armv7 || enabled armv6 && soft_enable fast_unaligned
- enabled iwmmxt2 && soft_enable iwmmxt
- enabled iwmmxt && soft_enable armv5te
+ arm*)
+ # on arm, isa versions are supersets
+ enabled armv7a && soft_enable armv7 ### DEBUG
+ enabled armv7 && soft_enable armv6
+ enabled armv7 || enabled armv6 && soft_enable armv5te
+ enabled armv7 || enabled armv6 && soft_enable fast_unaligned
- asm_conversion_cmd="cat"
+ asm_conversion_cmd="cat"
case ${tgt_cc} in
gcc)
- if enabled iwmmxt || enabled iwmmxt2
- then
- CROSS=${CROSS:-arm-iwmmxt-linux-gnueabi-}
- elif enabled symbian; then
- CROSS=${CROSS:-arm-none-symbianelf-}
- else
- CROSS=${CROSS:-arm-none-linux-gnueabi-}
- fi
+ CROSS=${CROSS:-arm-none-linux-gnueabi-}
link_with_cc=gcc
setup_gnu_toolchain
arch_int=${tgt_isa##armv}
arch_int=${arch_int%%te}
check_add_asflags --defsym ARCHITECTURE=${arch_int}
tune_cflags="-mtune="
- if enabled iwmmxt || enabled iwmmxt2
- then
- check_add_asflags -mcpu=${tgt_isa}
- elif enabled armv7
+ if enabled armv7
then
check_add_cflags -march=armv7-a -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp #-ftree-vectorize
check_add_asflags -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp #-march=armv7-a
@@ -802,19 +790,6 @@ process_common_toolchain() {
fi
;;
- symbian*)
- enable symbian
- # Add the paths for the alternate libc
- for d in include/libc; do
- try_dir="${alt_libc}/${d}"
- [ -d "${try_dir}" ] && add_cflags -I"${try_dir}"
- done
- for d in release/armv5/urel; do
- try_dir="${alt_libc}/${d}"
- [ -d "${try_dir}" ] && add_ldflags -L"${try_dir}"
- done
- add_cflags -DIMPORT_C=
-
esac
;;
mips*)
diff --git a/configure b/configure
index 6f20c6b77..68708907b 100755
--- a/configure
+++ b/configure
@@ -83,16 +83,10 @@ EOF
all_platforms="${all_platforms} armv5te-linux-rvct"
all_platforms="${all_platforms} armv5te-linux-gcc"
all_platforms="${all_platforms} armv5te-none-rvct"
-all_platforms="${all_platforms} armv5te-symbian-gcc"
all_platforms="${all_platforms} armv6-darwin-gcc"
all_platforms="${all_platforms} armv6-linux-rvct"
all_platforms="${all_platforms} armv6-linux-gcc"
all_platforms="${all_platforms} armv6-none-rvct"
-all_platforms="${all_platforms} armv6-symbian-gcc"
-all_platforms="${all_platforms} iwmmxt-linux-rvct"
-all_platforms="${all_platforms} iwmmxt-linux-gcc"
-all_platforms="${all_platforms} iwmmxt2-linux-rvct"
-all_platforms="${all_platforms} iwmmxt2-linux-gcc"
all_platforms="${all_platforms} armv7-darwin-gcc" #neon Cortex-A8
all_platforms="${all_platforms} armv7-linux-rvct" #neon Cortex-A8
all_platforms="${all_platforms} armv7-linux-gcc" #neon Cortex-A8
@@ -198,8 +192,6 @@ ARCH_EXT_LIST="
armv5te
armv6
armv7
- iwmmxt
- iwmmxt2
mips32
diff --git a/examples/postproc.txt b/examples/postproc.txt
index 0940ea24c..51b251a04 100644
--- a/examples/postproc.txt
+++ b/examples/postproc.txt
@@ -58,7 +58,7 @@ if(frame_cnt%30 == 1) {
if(vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp))
die_codec(&codec, "Failed to turn off postproc");
} else if(frame_cnt%30 == 16) {
- vp8_postproc_cfg_t pp = {VP8_DEBLOCK | VP8_DEMACROBLOCK, 4, 0};
+ vp8_postproc_cfg_t pp = {VP8_DEBLOCK | VP8_DEMACROBLOCK | VP8_MFQE, 4, 0};
if(vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp))
die_codec(&codec, "Failed to turn on postproc");
diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c
index 97a3559a4..b606aaca0 100644
--- a/vp8/common/alloccommon.c
+++ b/vp8/common/alloccommon.c
@@ -43,6 +43,8 @@ void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame);
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
+ if (oci->post_proc_buffer_int_used)
+ vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int);
vpx_free(oci->above_context);
vpx_free(oci->mip);
@@ -101,6 +103,8 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
return 1;
}
+ oci->post_proc_buffer_int_used = 0;
+
oci->mb_rows = height >> 4;
oci->mb_cols = width >> 4;
oci->MBs = oci->mb_rows * oci->mb_cols;
diff --git a/vp8/common/arm/arm_systemdependent.c b/vp8/common/arm/arm_systemdependent.c
index 1e2467411..89a2be825 100644
--- a/vp8/common/arm/arm_systemdependent.c
+++ b/vp8/common/arm/arm_systemdependent.c
@@ -11,7 +11,6 @@
#include "vpx_config.h"
#include "vpx_ports/arm.h"
-#include "vp8/common/g_common.h"
#include "vp8/common/pragmas.h"
#include "vp8/common/subpixel.h"
#include "vp8/common/loopfilter.h"
diff --git a/vp8/common/arm/dequantize_arm.c b/vp8/common/arm/dequantize_arm.c
index 20a8ac4fc..7cf4bf943 100644
--- a/vp8/common/arm/dequantize_arm.c
+++ b/vp8/common/arm/dequantize_arm.c
@@ -23,22 +23,20 @@ extern void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
#if HAVE_ARMV7
-void vp8_dequantize_b_neon(BLOCKD *d)
+void vp8_dequantize_b_neon(BLOCKD *d, short *DQC)
{
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
- short *DQC = d->dequant;
vp8_dequantize_b_loop_neon(Q, DQC, DQ);
}
#endif
#if HAVE_ARMV6
-void vp8_dequantize_b_v6(BLOCKD *d)
+void vp8_dequantize_b_v6(BLOCKD *d, short *DQC)
{
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
- short *DQC = d->dequant;
vp8_dequantize_b_loop_v6(Q, DQC, DQ);
}
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 99b731c78..b237206e6 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -184,7 +184,6 @@ typedef struct
short *qcoeff;
short *dqcoeff;
unsigned char *predictor;
- short *diff;
short *dequant;
/* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */
@@ -203,12 +202,16 @@ typedef struct
typedef struct MacroBlockD
{
- DECLARE_ALIGNED(16, short, diff[400]); /* from idct diff */
DECLARE_ALIGNED(16, unsigned char, predictor[384]);
DECLARE_ALIGNED(16, short, qcoeff[400]);
DECLARE_ALIGNED(16, short, dqcoeff[400]);
DECLARE_ALIGNED(16, char, eobs[25]);
+ DECLARE_ALIGNED(16, short, dequant_y1[16]);
+ DECLARE_ALIGNED(16, short, dequant_y1_dc[16]);
+ DECLARE_ALIGNED(16, short, dequant_y2[16]);
+ DECLARE_ALIGNED(16, short, dequant_uv[16]);
+
/* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
BLOCKD block[25];
int fullpixel_mask;
diff --git a/vp8/common/dequantize.c b/vp8/common/dequantize.c
index 4a48a3192..96245162f 100644
--- a/vp8/common/dequantize.c
+++ b/vp8/common/dequantize.c
@@ -14,12 +14,11 @@
#include "vp8/common/idct.h"
#include "vpx_mem/vpx_mem.h"
-void vp8_dequantize_b_c(BLOCKD *d)
+void vp8_dequantize_b_c(BLOCKD *d, short *DQC)
{
int i;
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
- short *DQC = d->dequant;
for (i = 0; i < 16; i++)
{
diff --git a/vp8/common/dequantize.h b/vp8/common/dequantize.h
index f66cf2bac..429359190 100644
--- a/vp8/common/dequantize.h
+++ b/vp8/common/dequantize.h
@@ -14,7 +14,7 @@
#include "vp8/common/blockd.h"
#define prototype_dequant_block(sym) \
- void sym(BLOCKD *x)
+ void sym(BLOCKD *x, short *DQC)
#define prototype_dequant_idct_add(sym) \
void sym(short *input, short *dq, \
diff --git a/vp8/common/g_common.h b/vp8/common/g_common.h
deleted file mode 100644
index 5f523980b..000000000
--- a/vp8/common/g_common.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-extern void (*vp8_clear_system_state)(void);
-extern void (*vp8_plane_add_noise)(unsigned char *Start, unsigned int Width, unsigned int Height, int Pitch, int DPitch, int q);
-extern void (*de_interlace)
-(
- unsigned char *src_ptr,
- unsigned char *dst_ptr,
- int Width,
- int Height,
- int Stride
-);
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index dbf8d6504..01d76206d 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -10,7 +10,6 @@
#include "vpx_config.h"
-#include "vp8/common/g_common.h"
#include "vp8/common/subpixel.h"
#include "vp8/common/loopfilter.h"
#include "vp8/common/recon.h"
diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h
index 7eec58e26..f49e2e577 100644
--- a/vp8/common/invtrans.h
+++ b/vp8/common/invtrans.h
@@ -17,6 +17,10 @@
#include "blockd.h"
#include "onyxc_int.h"
+#if CONFIG_MULTITHREAD
+#include "vpx_mem/vpx_mem.h"
+#endif
+
static void eob_adjust(char *eobs, short *diff)
{
/* eob adjust.... the idct can only skip if both the dc and eob are zero */
@@ -32,9 +36,7 @@ static void eob_adjust(char *eobs, short *diff)
static void vp8_inverse_transform_mby(MACROBLOCKD *xd,
const VP8_COMMON_RTCD *rtcd)
{
- short *DQC = xd->block[0].dequant;
- /* save the dc dequant constant in case it is overridden */
- short dc_dequant_temp = DQC[0];
+ short *DQC = xd->dequant_y1;
if (xd->mode_info_context->mbmi.mode != SPLITMV)
{
@@ -51,15 +53,11 @@ static void vp8_inverse_transform_mby(MACROBLOCKD *xd,
}
eob_adjust(xd->eobs, xd->qcoeff);
- /* override the dc dequant constant */
- DQC[0] = 1;
+ DQC = xd->dequant_y1_dc;
}
DEQUANT_INVOKE (&rtcd->dequant, idct_add_y_block)
- (xd->qcoeff, xd->block[0].dequant,
+ (xd->qcoeff, DQC,
xd->dst.y_buffer,
xd->dst.y_stride, xd->eobs);
-
- /* restore the dc dequant constant */
- DQC[0] = dc_dequant_temp;
}
#endif
diff --git a/vp8/common/mbpitch.c b/vp8/common/mbpitch.c
index 11fa3ffa7..f8971d754 100644
--- a/vp8/common/mbpitch.c
+++ b/vp8/common/mbpitch.c
@@ -87,7 +87,6 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x)
{
for (c = 0; c < 4; c++)
{
- x->block[r*4+c].diff = &x->diff[r * 4 * 16 + c * 4];
x->block[r*4+c].predictor = x->predictor + r * 4 * 16 + c * 4;
}
}
@@ -96,7 +95,6 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x)
{
for (c = 0; c < 2; c++)
{
- x->block[16+r*2+c].diff = &x->diff[256 + r * 4 * 8 + c * 4];
x->block[16+r*2+c].predictor = x->predictor + 256 + r * 4 * 8 + c * 4;
}
@@ -106,14 +104,11 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x)
{
for (c = 0; c < 2; c++)
{
- x->block[20+r*2+c].diff = &x->diff[320+ r * 4 * 8 + c * 4];
x->block[20+r*2+c].predictor = x->predictor + 320 + r * 4 * 8 + c * 4;
}
}
- x->block[24].diff = &x->diff[384];
-
for (r = 0; r < 25; r++)
{
x->block[r].qcoeff = x->qcoeff + r * 16;
diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h
index d17a32b82..eb7d5458d 100644
--- a/vp8/common/onyx.h
+++ b/vp8/common/onyx.h
@@ -147,10 +147,14 @@ extern "C"
int over_shoot_pct;
// buffering parameters
- int64_t starting_buffer_level; // in seconds
+ int64_t starting_buffer_level; // in bytes
int64_t optimal_buffer_level;
int64_t maximum_buffer_size;
+ int64_t starting_buffer_level_in_ms; // in milli-seconds
+ int64_t optimal_buffer_level_in_ms;
+ int64_t maximum_buffer_size_in_ms;
+
// controlling quality
int fixed_q;
int worst_allowed_q;
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index f733ff774..f91383de8 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -93,9 +93,9 @@ typedef struct VP8Common
{
struct vpx_internal_error_info error;
- DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][2]);
+ DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][2]);
+ DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][2]);
int Width;
int Height;
@@ -114,6 +114,8 @@ typedef struct VP8Common
YV12_BUFFER_CONFIG post_proc_buffer;
YV12_BUFFER_CONFIG temp_scale_frame;
+ YV12_BUFFER_CONFIG post_proc_buffer_int;
+ int post_proc_buffer_int_used;
FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */
FRAME_TYPE frame_type;
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index ace4c113c..cb81cb52a 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -12,9 +12,12 @@
#include "vpx_config.h"
#include "vpx_scale/yv12config.h"
#include "postproc.h"
+#include "common.h"
+#include "recon.h"
#include "vpx_scale/yv12extend.h"
#include "vpx_scale/vpxscale.h"
#include "systemdependent.h"
+#include "../encoder/variance.h"
#include <math.h>
#include <stdlib.h>
@@ -26,6 +29,7 @@
( (0.439*(float)(t>>16)) - (0.368*(float)(t>>8&0xff)) - (0.071*(float)(t&0xff)) + 128)
/* global constants */
+#define MFQE_PRECISION 4
#if CONFIG_POSTPROC_VISUALIZER
static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] =
{
@@ -121,7 +125,6 @@ const short vp8_rv[] =
0, 9, 5, 5, 11, 10, 13, 9, 10, 13,
};
-
extern void vp8_blit_text(const char *msg, unsigned char *address, const int pitch);
extern void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch);
/***********************************************************************************************************
@@ -323,11 +326,11 @@ static void vp8_deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source,
}
void vp8_deblock(YV12_BUFFER_CONFIG *source,
- YV12_BUFFER_CONFIG *post,
- int q,
- int low_var_thresh,
- int flag,
- vp8_postproc_rtcd_vtable_t *rtcd)
+ YV12_BUFFER_CONFIG *post,
+ int q,
+ int low_var_thresh,
+ int flag,
+ vp8_postproc_rtcd_vtable_t *rtcd)
{
double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065;
int ppl = (int)(level + .5);
@@ -671,6 +674,128 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
}
}
+int vp8_references_buffer( VP8_COMMON *oci, int ref_frame )
+{
+ const MODE_INFO *mi = oci->mi;
+ int mb_row, mb_col;
+
+ for (mb_row = 0; mb_row < oci->mb_rows; mb_row++)
+ {
+ for (mb_col = 0; mb_col < oci->mb_cols; mb_col++,mi++)
+ {
+ if( mi->mbmi.ref_frame == ref_frame)
+ return 1;
+ }
+ mi++;
+ }
+ return 0;
+
+}
+
+static void multiframe_quality_enhance_block
+(
+ int blksize, /* Currently only values supported are 16, 8, 4 */
+ int qcurr,
+ int qprev,
+ unsigned char *y,
+ unsigned char *u,
+ unsigned char *v,
+ int y_stride,
+ int uv_stride,
+ unsigned char *yd,
+ unsigned char *ud,
+ unsigned char *vd,
+ int yd_stride,
+ int uvd_stride
+)
+{
+ static const unsigned char VP8_ZEROS[16]=
+ {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+ };
+ int blksizeby2 = blksize >> 1;
+ int blksizesq = blksize * blksize;
+
+ int i, j;
+ unsigned char *yp;
+ unsigned char *ydp;
+ unsigned char *up;
+ unsigned char *udp;
+ unsigned char *vp;
+ unsigned char *vdp;
+
+ unsigned int act, sse, sad, thr;
+ if (blksize == 16)
+ {
+ act = vp8_variance_var16x16(y, y_stride, VP8_ZEROS, 0, &sse);
+ sad = vp8_variance_sad16x16(y, y_stride, yd, yd_stride, 0);
+ }
+ else if (blksize == 8)
+ {
+ act = vp8_variance_var8x8(y, y_stride, VP8_ZEROS, 0, &sse);
+ sad = vp8_variance_sad8x8(y, y_stride, yd, yd_stride, 0);
+ }
+ else
+ {
+ act = vp8_variance_var4x4(y, y_stride, VP8_ZEROS, 0, &sse);
+ sad = vp8_variance_sad4x4(y, y_stride, yd, yd_stride, 0);
+ }
+
+ thr = 6 * blksizesq + (act >> 3);
+ if (thr > 12 * blksizesq) thr = 12 * blksizesq;
+ // These thresholds should be adapted later based on qcurr and qprev
+ if (sad < thr)
+ {
+ static const int roundoff = (1 << (MFQE_PRECISION - 1));
+ int ifactor = (sad << MFQE_PRECISION) / thr;
+ // TODO: SIMD optimize this section
+ if (ifactor)
+ {
+ int icfactor = (1 << MFQE_PRECISION) - ifactor;
+ for (yp = y, ydp = yd, i = 0; i < blksize; ++i, yp += y_stride, ydp += yd_stride)
+ {
+ for (j = 0; j < blksize; ++j)
+ ydp[j] = (int)((yp[j] * ifactor + ydp[j] * icfactor + roundoff) >> MFQE_PRECISION);
+ }
+ for (up = u, udp = ud, i = 0; i < blksizeby2; ++i, up += uv_stride, udp += uvd_stride)
+ {
+ for (j = 0; j < blksizeby2; ++j)
+ udp[j] = (int)((up[j] * ifactor + udp[j] * icfactor + roundoff) >> MFQE_PRECISION);
+ }
+ for (vp = v, vdp = vd, i = 0; i < blksizeby2; ++i, vp += uv_stride, vdp += uvd_stride)
+ {
+ for (j = 0; j < blksizeby2; ++j)
+ vdp[j] = (int)((vp[j] * ifactor + vdp[j] * icfactor + roundoff) >> MFQE_PRECISION);
+ }
+ }
+ }
+ else
+ {
+ if (blksize == 16)
+ {
+ vp8_recon_copy16x16(y, y_stride, yd, yd_stride);
+ vp8_recon_copy8x8(u, uv_stride, ud, uvd_stride);
+ vp8_recon_copy8x8(v, uv_stride, vd, uvd_stride);
+ }
+ else if (blksize == 8)
+ {
+ vp8_recon_copy8x8(y, y_stride, yd, yd_stride);
+ for (up = u, udp = ud, i = 0; i < blksizeby2; ++i, up += uv_stride, udp += uvd_stride)
+ vpx_memcpy(udp, up, blksizeby2);
+ for (vp = v, vdp = vd, i = 0; i < blksizeby2; ++i, vp += uv_stride, vdp += uvd_stride)
+ vpx_memcpy(vdp, vp, blksizeby2);
+ }
+ else
+ {
+ for (yp = y, ydp = yd, i = 0; i < blksize; ++i, yp += y_stride, ydp += yd_stride)
+ vpx_memcpy(ydp, yp, blksize);
+ for (up = u, udp = ud, i = 0; i < blksizeby2; ++i, up += uv_stride, udp += uvd_stride)
+ vpx_memcpy(udp, up, blksizeby2);
+ for (vp = v, vdp = vd, i = 0; i < blksizeby2; ++i, vp += uv_stride, vdp += uvd_stride)
+ vpx_memcpy(vdp, vp, blksizeby2);
+ }
+ }
+}
#if CONFIG_RUNTIME_CPU_DETECT
#define RTCD_VTABLE(oci) (&(oci)->rtcd.postproc)
@@ -678,6 +803,104 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
#define RTCD_VTABLE(oci) NULL
#endif
+void vp8_multiframe_quality_enhance
+(
+ VP8_COMMON *cm
+)
+{
+ YV12_BUFFER_CONFIG *show = cm->frame_to_show;
+ YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;
+
+ FRAME_TYPE frame_type = cm->frame_type;
+ /* Point at base of Mb MODE_INFO list has motion vectors etc */
+ const MODE_INFO *mode_info_context = cm->mi;
+ int mb_row;
+ int mb_col;
+ int qcurr = cm->base_qindex;
+ int qprev = cm->postproc_state.last_base_qindex;
+
+ unsigned char *y_ptr, *u_ptr, *v_ptr;
+ unsigned char *yd_ptr, *ud_ptr, *vd_ptr;
+
+ /* Set up the buffer pointers */
+ y_ptr = show->y_buffer;
+ u_ptr = show->u_buffer;
+ v_ptr = show->v_buffer;
+ yd_ptr = dest->y_buffer;
+ ud_ptr = dest->u_buffer;
+ vd_ptr = dest->v_buffer;
+
+ /* postprocess each macro block */
+ for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
+ {
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
+ {
+ /* if motion is high there will likely be no benefit */
+ if (((frame_type == INTER_FRAME &&
+ abs(mode_info_context->mbmi.mv.as_mv.row) <= 10 &&
+ abs(mode_info_context->mbmi.mv.as_mv.col) <= 10) ||
+ (frame_type == KEY_FRAME)) &&
+ mode_info_context->mbmi.mode != B_PRED)
+ {
+ multiframe_quality_enhance_block(16,
+ qcurr,
+ qprev,
+ y_ptr,
+ u_ptr,
+ v_ptr,
+ show->y_stride,
+ show->uv_stride,
+ yd_ptr,
+ ud_ptr,
+ vd_ptr,
+ dest->y_stride,
+ dest->uv_stride);
+ }
+ else if (mode_info_context->mbmi.mode == B_PRED)
+ {
+ int i, j;
+ for (i=0; i<2; ++i)
+ for (j=0; j<2; ++j)
+ multiframe_quality_enhance_block(8,
+ qcurr,
+ qprev,
+ y_ptr + 8*(i*show->y_stride+j),
+ u_ptr + 4*(i*show->uv_stride+j),
+ v_ptr + 4*(i*show->uv_stride+j),
+ show->y_stride,
+ show->uv_stride,
+ yd_ptr + 8*(i*dest->y_stride+j),
+ ud_ptr + 4*(i*dest->uv_stride+j),
+ vd_ptr + 4*(i*dest->uv_stride+j),
+ dest->y_stride,
+ dest->uv_stride);
+ }
+ else
+ {
+ vp8_recon_copy16x16(y_ptr, show->y_stride, yd_ptr, dest->y_stride);
+ vp8_recon_copy8x8(u_ptr, show->uv_stride, ud_ptr, dest->uv_stride);
+ vp8_recon_copy8x8(v_ptr, show->uv_stride, vd_ptr, dest->uv_stride);
+ }
+ y_ptr += 16;
+ u_ptr += 8;
+ v_ptr += 8;
+ yd_ptr += 16;
+ ud_ptr += 8;
+ vd_ptr += 8;
+ mode_info_context++; /* step to next MB */
+ }
+
+ y_ptr += show->y_stride * 16 - 16 * cm->mb_cols;
+ u_ptr += show->uv_stride * 8 - 8 * cm->mb_cols;
+ v_ptr += show->uv_stride * 8 - 8 * cm->mb_cols;
+ yd_ptr += dest->y_stride * 16 - 16 * cm->mb_cols;
+ ud_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols;
+ vd_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols;
+
+ mode_info_context++; /* Skip border mb */
+ }
+}
+
int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *ppflags)
{
int q = oci->filter_level * 10 / 6;
@@ -699,27 +922,65 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
dest->y_width = oci->Width;
dest->y_height = oci->Height;
dest->uv_height = dest->y_height / 2;
+ oci->postproc_state.last_base_qindex = oci->base_qindex;
return 0;
+ }
+ /* Allocate post_proc_buffer_int if needed */
+ if ((flags & VP8D_MFQE) && !oci->post_proc_buffer_int_used)
+ {
+ if ((flags & VP8D_DEBLOCK) || (flags & VP8D_DEMACROBLOCK))
+ {
+ if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer_int, oci->Width, oci->Height, VP8BORDERINPIXELS) >= 0)
+ {
+ oci->post_proc_buffer_int_used = 1;
+ }
+ }
}
#if ARCH_X86||ARCH_X86_64
vpx_reset_mmx_state();
#endif
- if (flags & VP8D_DEMACROBLOCK)
+ if ((flags & VP8D_MFQE) &&
+ oci->current_video_frame >= 2 &&
+ oci->base_qindex - oci->postproc_state.last_base_qindex >= 10)
+ {
+ vp8_multiframe_quality_enhance(oci);
+ if (((flags & VP8D_DEBLOCK) || (flags & VP8D_DEMACROBLOCK)) &&
+ oci->post_proc_buffer_int_used)
+ {
+ vp8_yv12_copy_frame_ptr(&oci->post_proc_buffer, &oci->post_proc_buffer_int);
+ if (flags & VP8D_DEMACROBLOCK)
+ {
+ vp8_deblock_and_de_macro_block(&oci->post_proc_buffer_int, &oci->post_proc_buffer,
+ q + (deblock_level - 5) * 10, 1, 0, RTCD_VTABLE(oci));
+ }
+ else if (flags & VP8D_DEBLOCK)
+ {
+ vp8_deblock(&oci->post_proc_buffer_int, &oci->post_proc_buffer,
+ q, 1, 0, RTCD_VTABLE(oci));
+ }
+ }
+ /* Move partially towards the base q of the previous frame */
+ oci->postproc_state.last_base_qindex = (3*oci->postproc_state.last_base_qindex + oci->base_qindex)>>2;
+ }
+ else if (flags & VP8D_DEMACROBLOCK)
{
vp8_deblock_and_de_macro_block(oci->frame_to_show, &oci->post_proc_buffer,
q + (deblock_level - 5) * 10, 1, 0, RTCD_VTABLE(oci));
+ oci->postproc_state.last_base_qindex = oci->base_qindex;
}
else if (flags & VP8D_DEBLOCK)
{
vp8_deblock(oci->frame_to_show, &oci->post_proc_buffer,
q, 1, 0, RTCD_VTABLE(oci));
+ oci->postproc_state.last_base_qindex = oci->base_qindex;
}
else
{
vp8_yv12_copy_frame_ptr(oci->frame_to_show, &oci->post_proc_buffer);
+ oci->postproc_state.last_base_qindex = oci->base_qindex;
}
if (flags & VP8D_ADDNOISE)
diff --git a/vp8/common/postproc.h b/vp8/common/postproc.h
index c641b9ca5..d5aaf6216 100644
--- a/vp8/common/postproc.h
+++ b/vp8/common/postproc.h
@@ -104,6 +104,7 @@ struct postproc_state
int last_q;
int last_noise;
char noise[3072];
+ int last_base_qindex;
DECLARE_ALIGNED(16, char, blackclamp[16]);
DECLARE_ALIGNED(16, char, whiteclamp[16]);
DECLARE_ALIGNED(16, char, bothclamp[16]);
diff --git a/vp8/common/ppc/systemdependent.c b/vp8/common/ppc/systemdependent.c
index 1f5d79068..7046a63e8 100644
--- a/vp8/common/ppc/systemdependent.c
+++ b/vp8/common/ppc/systemdependent.c
@@ -9,7 +9,6 @@
*/
-#include "g_common.h"
#include "subpixel.h"
#include "loopfilter.h"
#include "recon.h"
diff --git a/vp8/common/ppflags.h b/vp8/common/ppflags.h
index 65b0cab6a..665e21fd9 100644
--- a/vp8/common/ppflags.h
+++ b/vp8/common/ppflags.h
@@ -23,7 +23,8 @@ enum
VP8D_DEBUG_TXT_RATE_INFO = 1<<6,
VP8D_DEBUG_DRAW_MV = 1<<7,
VP8D_DEBUG_CLR_BLK_MODES = 1<<8,
- VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9
+ VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9,
+ VP8D_MFQE = 1<<10
};
typedef struct
diff --git a/vp8/common/x86/idct_blk_mmx.c b/vp8/common/x86/idct_blk_mmx.c
index 49cebd6f5..8ff483708 100644
--- a/vp8/common/x86/idct_blk_mmx.c
+++ b/vp8/common/x86/idct_blk_mmx.c
@@ -14,12 +14,12 @@
extern void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q);
-void vp8_dequantize_b_mmx(BLOCKD *d)
+void vp8_dequantize_b_mmx(BLOCKD *d, short *DQC)
{
short *sq = (short *) d->qcoeff;
short *dq = (short *) d->dqcoeff;
- short *q = (short *) d->dequant;
- vp8_dequantize_b_impl_mmx(sq, dq, q);
+
+ vp8_dequantize_b_impl_mmx(sq, dq, DQC);
}
void vp8_dequant_idct_add_y_block_mmx
diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index 86927d9f1..2ad010adb 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -1385,52 +1385,54 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
SHADOW_ARGS_TO_STACK 3
SAVE_XMM 7
GET_GOT rbx
- push rsi
- push rdi
; end prolog
- mov rsi, arg(0) ;src_ptr
+ mov rcx, arg(0) ;src_ptr
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
- mov rdx, arg(2) ;blimit
- movdqa xmm3, XMMWORD PTR [rdx]
- mov rdi, rsi ; rdi points to row +1 for indirect addressing
- add rdi, rax
+ lea rdx, [rcx + rax]
neg rax
; calculate mask
- movdqa xmm1, [rsi+2*rax] ; p1
- movdqa xmm0, [rdi] ; q1
+ movdqa xmm0, [rdx] ; q1
+ mov rdx, arg(2) ;blimit
+ movdqa xmm1, [rcx+2*rax] ; p1
+
movdqa xmm2, xmm1
movdqa xmm7, xmm0
- movdqa xmm4, xmm0
+
psubusb xmm0, xmm1 ; q1-=p1
- psubusb xmm1, xmm4 ; p1-=q1
+ psubusb xmm1, xmm7 ; p1-=q1
por xmm1, xmm0 ; abs(p1-q1)
pand xmm1, [GLOBAL(tfe)] ; set lsb of each byte to zero
psrlw xmm1, 1 ; abs(p1-q1)/2
- movdqa xmm5, [rsi+rax] ; p0
- movdqa xmm4, [rsi] ; q0
+ movdqa xmm3, XMMWORD PTR [rdx]
+
+ movdqa xmm5, [rcx+rax] ; p0
+ movdqa xmm4, [rcx] ; q0
movdqa xmm0, xmm4 ; q0
movdqa xmm6, xmm5 ; p0
psubusb xmm5, xmm4 ; p0-=q0
psubusb xmm4, xmm6 ; q0-=p0
por xmm5, xmm4 ; abs(p0 - q0)
+
+ movdqa xmm4, [GLOBAL(t80)]
+
paddusb xmm5, xmm5 ; abs(p0-q0)*2
paddusb xmm5, xmm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
-
psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
pxor xmm3, xmm3
pcmpeqb xmm5, xmm3
+
; start work on filters
- pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
- pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
+ pxor xmm2, xmm4 ; p1 offset to convert to signed values
+ pxor xmm7, xmm4 ; q1 offset to convert to signed values
psubsb xmm2, xmm7 ; p1 - q1
- pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values
- pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values
+ pxor xmm6, xmm4 ; offset to convert to signed values
+ pxor xmm0, xmm4 ; offset to convert to signed values
movdqa xmm3, xmm0 ; q0
psubsb xmm0, xmm6 ; q0 - p0
paddsb xmm2, xmm0 ; p1 - q1 + 1 * (q0 - p0)
@@ -1438,42 +1440,36 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
paddsb xmm2, xmm0 ; p1 - q1 + 3 * (q0 - p0)
pand xmm5, xmm2 ; mask filter values we don't care about
- ; do + 4 side
- paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4
-
- movdqa xmm0, xmm5 ; get a copy of filters
- psllw xmm0, 8 ; shift left 8
- psraw xmm0, 3 ; arithmetic shift right 11
- psrlw xmm0, 8
- movdqa xmm1, xmm5 ; get a copy of filters
- psraw xmm1, 11 ; arithmetic shift right 11
- psllw xmm1, 8 ; shift left 8 to put it back
-
- por xmm0, xmm1 ; put the two together to get result
+ paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4
+ movdqa xmm0, xmm5
+ psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4
- psubsb xmm3, xmm0 ; q0-= q0 add
- pxor xmm3, [GLOBAL(t80)] ; unoffset
- movdqa [rsi], xmm3 ; write back
+ movdqa xmm1, [GLOBAL(te0)]
+ movdqa xmm2, [GLOBAL(t1f)]
- ; now do +3 side
- psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4
+ pxor xmm7, xmm7
+ pcmpgtb xmm7, xmm0 ;save sign
+ pand xmm7, xmm1 ;preserve the upper 3 bits
+ psrlw xmm0, 3
+ pand xmm0, xmm2 ;clear out upper 3 bits
+ por xmm0, xmm7 ;add sign
+ psubsb xmm3, xmm0 ; q0-= q0sz add
- movdqa xmm0, xmm5 ; get a copy of filters
- psllw xmm0, 8 ; shift left 8
- psraw xmm0, 3 ; arithmetic shift right 11
- psrlw xmm0, 8
- psraw xmm5, 11 ; arithmetic shift right 11
- psllw xmm5, 8 ; shift left 8 to put it back
- por xmm0, xmm5 ; put the two together to get result
+ pxor xmm7, xmm7
+ pcmpgtb xmm7, xmm5 ;save sign
+ pand xmm7, xmm1 ;preserve the upper 3 bits
+ psrlw xmm5, 3
+ pand xmm5, xmm2 ;clear out upper 3 bits
+ por xmm5, xmm7 ;add sign
+ paddsb xmm6, xmm5 ; p0+= p0 add
+ pxor xmm3, xmm4 ; unoffset
+ movdqa [rcx], xmm3 ; write back
- paddsb xmm6, xmm0 ; p0+= p0 add
- pxor xmm6, [GLOBAL(t80)] ; unoffset
- movdqa [rsi+rax], xmm6 ; write back
+ pxor xmm6, xmm4 ; unoffset
+ movdqa [rcx+rax], xmm6 ; write back
; begin epilog
- pop rdi
- pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
@@ -1536,9 +1532,6 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
punpckldq xmm0, xmm1 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
punpckhdq xmm2, xmm1 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
- movdqa t0, xmm0 ; save to t0
- movdqa t1, xmm2 ; save to t1
-
lea rsi, [rsi + rax*8]
lea rdi, [rsi + rax]
lea rdx, [rsi + rax*4]
@@ -1551,26 +1544,24 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
punpckldq xmm4, xmm1 ; c3 c2 c1 c0 83 82 81 80
punpckldq xmm6, xmm3 ; d3 d2 d1 d0 93 92 91 90
- movd xmm0, [rsi + rax*2] ; a3 a2 a1 a0
+ movd xmm1, [rsi + rax*2] ; a3 a2 a1 a0
movd xmm5, [rdx + rax*2] ; e3 e2 e1 e0
- movd xmm2, [rdi + rax*2] ; b3 b2 b1 b0
+ movd xmm3, [rdi + rax*2] ; b3 b2 b1 b0
movd xmm7, [rcx + rax*2] ; f3 f2 f1 f0
- punpckldq xmm0, xmm5 ; e3 e2 e1 e0 a3 a2 a1 a0
- punpckldq xmm2, xmm7 ; f3 f2 f1 f0 b3 b2 b1 b0
+ punpckldq xmm1, xmm5 ; e3 e2 e1 e0 a3 a2 a1 a0
+ punpckldq xmm3, xmm7 ; f3 f2 f1 f0 b3 b2 b1 b0
punpcklbw xmm4, xmm6 ; d3 c3 d2 c2 d1 c1 d0 c0 93 83 92 82 91 81 90 80
- punpcklbw xmm0, xmm2 ; f3 e3 f2 e2 f1 e1 f0 e0 b3 a3 b2 a2 b1 a1 b0 a0
+ punpcklbw xmm1, xmm3 ; f3 e3 f2 e2 f1 e1 f0 e0 b3 a3 b2 a2 b1 a1 b0 a0
- movdqa xmm1, xmm4
- punpcklwd xmm4, xmm0 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80
- punpckhwd xmm1, xmm0 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0
+ movdqa xmm7, xmm4
+ punpcklwd xmm4, xmm1 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80
+ punpckhwd xmm7, xmm1 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0
movdqa xmm6, xmm4
- punpckldq xmm4, xmm1 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
- punpckhdq xmm6, xmm1 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
+ punpckldq xmm4, xmm7 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
+ punpckhdq xmm6, xmm7 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
- movdqa xmm0, t0 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
- movdqa xmm2, t1 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
movdqa xmm1, xmm0
movdqa xmm3, xmm2
@@ -1579,6 +1570,8 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
punpcklqdq xmm2, xmm6 ; q0 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
punpckhqdq xmm3, xmm6 ; q1 f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
+ mov rdx, arg(2) ;blimit
+
; calculate mask
movdqa xmm6, xmm0 ; p1
movdqa xmm7, xmm3 ; q1
@@ -1588,6 +1581,8 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
pand xmm6, [GLOBAL(tfe)] ; set lsb of each byte to zero
psrlw xmm6, 1 ; abs(p1-q1)/2
+ movdqa xmm7, [rdx]
+
movdqa xmm5, xmm1 ; p0
movdqa xmm4, xmm2 ; q0
psubusb xmm5, xmm2 ; p0-=q0
@@ -1596,8 +1591,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
paddusb xmm5, xmm5 ; abs(p0-q0)*2
paddusb xmm5, xmm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
- mov rdx, arg(2) ;blimit
- movdqa xmm7, XMMWORD PTR [rdx]
+ movdqa xmm4, [GLOBAL(t80)]
psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
pxor xmm7, xmm7
@@ -1607,59 +1601,48 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
movdqa t0, xmm0
movdqa t1, xmm3
- pxor xmm0, [GLOBAL(t80)] ; p1 offset to convert to signed values
- pxor xmm3, [GLOBAL(t80)] ; q1 offset to convert to signed values
-
+ pxor xmm0, xmm4 ; p1 offset to convert to signed values
+ pxor xmm3, xmm4 ; q1 offset to convert to signed values
psubsb xmm0, xmm3 ; p1 - q1
- movdqa xmm6, xmm1 ; p0
-
- movdqa xmm7, xmm2 ; q0
- pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values
- pxor xmm7, [GLOBAL(t80)] ; offset to convert to signed values
- movdqa xmm3, xmm7 ; offseted ; q0
-
- psubsb xmm7, xmm6 ; q0 - p0
- paddsb xmm0, xmm7 ; p1 - q1 + 1 * (q0 - p0)
+ movdqa xmm6, xmm1 ; p0
+; movdqa xmm7, xmm2 ; q0
- paddsb xmm0, xmm7 ; p1 - q1 + 2 * (q0 - p0)
- paddsb xmm0, xmm7 ; p1 - q1 + 3 * (q0 - p0)
+ pxor xmm6, xmm4 ; offset to convert to signed values
+ pxor xmm2, xmm4 ; offset to convert to signed values
+ movdqa xmm3, xmm2 ; offseted ; q0
+ psubsb xmm2, xmm6 ; q0 - p0
+ paddsb xmm0, xmm2 ; p1 - q1 + 1 * (q0 - p0)
+ paddsb xmm0, xmm2 ; p1 - q1 + 2 * (q0 - p0)
+ paddsb xmm0, xmm2 ; p1 - q1 + 3 * (q0 - p0)
pand xmm5, xmm0 ; mask filter values we don't care about
-
paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4
-
- movdqa xmm0, xmm5 ; get a copy of filters
- psllw xmm0, 8 ; shift left 8
-
- psraw xmm0, 3 ; arithmetic shift right 11
- psrlw xmm0, 8
-
- movdqa xmm7, xmm5 ; get a copy of filters
- psraw xmm7, 11 ; arithmetic shift right 11
-
- psllw xmm7, 8 ; shift left 8 to put it back
- por xmm0, xmm7 ; put the two together to get result
-
- psubsb xmm3, xmm0 ; q0-= q0sz add
- pxor xmm3, [GLOBAL(t80)] ; unoffset q0
-
- ; now do +3 side
+ movdqa xmm0, xmm5
psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4
- movdqa xmm0, xmm5 ; get a copy of filters
- psllw xmm0, 8 ; shift left 8
- psraw xmm0, 3 ; arithmetic shift right 11
+ movdqa xmm1, [GLOBAL(te0)]
+ movdqa xmm2, [GLOBAL(t1f)]
- psrlw xmm0, 8
- psraw xmm5, 11 ; arithmetic shift right 11
+ pxor xmm7, xmm7
+ pcmpgtb xmm7, xmm0 ;save sign
+ pand xmm7, xmm1 ;preserve the upper 3 bits
+ psrlw xmm0, 3
+ pand xmm0, xmm2 ;clear out upper 3 bits
+ por xmm0, xmm7 ;add sign
+ psubsb xmm3, xmm0 ; q0-= q0sz add
- psllw xmm5, 8 ; shift left 8 to put it back
- por xmm0, xmm5 ; put the two together to get result
+ pxor xmm7, xmm7
+ pcmpgtb xmm7, xmm5 ;save sign
+ pand xmm7, xmm1 ;preserve the upper 3 bits
+ psrlw xmm5, 3
+ pand xmm5, xmm2 ;clear out upper 3 bits
+ por xmm5, xmm7 ;add sign
+ paddsb xmm6, xmm5 ; p0+= p0 add
- paddsb xmm6, xmm0 ; p0+= p0 add
- pxor xmm6, [GLOBAL(t80)] ; unoffset p0
+ pxor xmm3, xmm4 ; unoffset q0
+ pxor xmm6, xmm4 ; unoffset p0
movdqa xmm0, t0 ; p1
movdqa xmm4, t1 ; q1
@@ -1763,3 +1746,9 @@ s9:
align 16
s63:
times 8 dw 0x003f
+align 16
+te0:
+ times 16 db 0xe0
+align 16
+t1f:
+ times 16 db 0x1f
diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm
index a82c1b4fd..4b68ef5f2 100644
--- a/vp8/common/x86/recon_sse2.asm
+++ b/vp8/common/x86/recon_sse2.asm
@@ -559,12 +559,492 @@ sym(vp8_intra_pred_uv_ho_%1):
vp8_intra_pred_uv_ho mmx2
vp8_intra_pred_uv_ho ssse3
+;void vp8_intra_pred_y_dc_sse2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp8_intra_pred_y_dc_sse2)
+sym(vp8_intra_pred_y_dc_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; from top
+ mov rsi, arg(2) ;src;
+ movsxd rax, dword ptr arg(3) ;src_stride;
+ sub rsi, rax
+ pxor xmm0, xmm0
+ movdqa xmm1, [rsi]
+ psadbw xmm1, xmm0
+ movq xmm2, xmm1
+ punpckhqdq xmm1, xmm1
+ paddw xmm1, xmm2
+
+ ; from left
+ dec rsi
+ lea rdi, [rax*3]
+ movzx ecx, byte [rsi+rax]
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ add ecx, edx
+ lea rsi, [rsi+rax*4]
+ movzx edx, byte [rsi]
+ add ecx, edx
+ movzx edx, byte [rsi+rax]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ add ecx, edx
+ lea rsi, [rsi+rax*4]
+ movzx edx, byte [rsi]
+ add ecx, edx
+ movzx edx, byte [rsi+rax]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ add ecx, edx
+ lea rsi, [rsi+rax*4]
+ movzx edx, byte [rsi]
+ add ecx, edx
+ movzx edx, byte [rsi+rax]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*4]
+ add ecx, edx
+
+ ; add up
+ pextrw edx, xmm1, 0x0
+ lea edx, [edx+ecx+16]
+ sar edx, 5
+ movd xmm1, edx
+ ; FIXME use pshufb for ssse3 version
+ pshuflw xmm1, xmm1, 0x0
+ punpcklqdq xmm1, xmm1
+ packuswb xmm1, xmm1
+
+ ; write out
+ mov rsi, 2
+ mov rdi, arg(0) ;dst;
+ movsxd rcx, dword ptr arg(1) ;dst_stride
+ lea rax, [rcx*3]
+
+.label
+ movdqa [rdi ], xmm1
+ movdqa [rdi+rcx ], xmm1
+ movdqa [rdi+rcx*2], xmm1
+ movdqa [rdi+rax ], xmm1
+ lea rdi, [rdi+rcx*4]
+ movdqa [rdi ], xmm1
+ movdqa [rdi+rcx ], xmm1
+ movdqa [rdi+rcx*2], xmm1
+ movdqa [rdi+rax ], xmm1
+ lea rdi, [rdi+rcx*4]
+ dec rsi
+ jnz .label
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp8_intra_pred_y_dctop_sse2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp8_intra_pred_y_dctop_sse2)
+sym(vp8_intra_pred_y_dctop_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ GET_GOT rbx
+ ; end prolog
+
+ ; from top
+ mov rcx, arg(2) ;src;
+ movsxd rax, dword ptr arg(3) ;src_stride;
+ sub rcx, rax
+ pxor xmm0, xmm0
+ movdqa xmm1, [rcx]
+ psadbw xmm1, xmm0
+ movdqa xmm2, xmm1
+ punpckhqdq xmm1, xmm1
+ paddw xmm1, xmm2
+
+ ; add up
+ paddw xmm1, [GLOBAL(dc_8)]
+ psraw xmm1, 4
+ ; FIXME use pshufb for ssse3 version
+ pshuflw xmm1, xmm1, 0x0
+ punpcklqdq xmm1, xmm1
+ packuswb xmm1, xmm1
+
+ ; write out
+ mov rsi, 2
+ mov rdx, arg(0) ;dst;
+ movsxd rcx, dword ptr arg(1) ;dst_stride
+ lea rax, [rcx*3]
+
+.label
+ movdqa [rdx ], xmm1
+ movdqa [rdx+rcx ], xmm1
+ movdqa [rdx+rcx*2], xmm1
+ movdqa [rdx+rax ], xmm1
+ lea rdx, [rdx+rcx*4]
+ movdqa [rdx ], xmm1
+ movdqa [rdx+rcx ], xmm1
+ movdqa [rdx+rcx*2], xmm1
+ movdqa [rdx+rax ], xmm1
+ lea rdx, [rdx+rcx*4]
+ dec rsi
+ jnz .label
+
+ ; begin epilog
+ RESTORE_GOT
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp8_intra_pred_y_dcleft_sse2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp8_intra_pred_y_dcleft_sse2)
+sym(vp8_intra_pred_y_dcleft_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; from left
+ mov rsi, arg(2) ;src;
+ movsxd rax, dword ptr arg(3) ;src_stride;
+ dec rsi
+ lea rdi, [rax*3]
+ movzx ecx, byte [rsi]
+ movzx edx, byte [rsi+rax]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ add ecx, edx
+ lea rsi, [rsi+rax*4]
+ movzx edx, byte [rsi]
+ add ecx, edx
+ movzx edx, byte [rsi+rax]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ add ecx, edx
+ lea rsi, [rsi+rax*4]
+ movzx edx, byte [rsi]
+ add ecx, edx
+ movzx edx, byte [rsi+rax]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ add ecx, edx
+ lea rsi, [rsi+rax*4]
+ movzx edx, byte [rsi]
+ add ecx, edx
+ movzx edx, byte [rsi+rax]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ lea edx, [ecx+edx+8]
+
+ ; add up
+ shr edx, 4
+ movd xmm1, edx
+ ; FIXME use pshufb for ssse3 version
+ pshuflw xmm1, xmm1, 0x0
+ punpcklqdq xmm1, xmm1
+ packuswb xmm1, xmm1
+
+ ; write out
+ mov rsi, 2
+ mov rdi, arg(0) ;dst;
+ movsxd rcx, dword ptr arg(1) ;dst_stride
+ lea rax, [rcx*3]
+
+.label
+ movdqa [rdi ], xmm1
+ movdqa [rdi+rcx ], xmm1
+ movdqa [rdi+rcx*2], xmm1
+ movdqa [rdi+rax ], xmm1
+ lea rdi, [rdi+rcx*4]
+ movdqa [rdi ], xmm1
+ movdqa [rdi+rcx ], xmm1
+ movdqa [rdi+rcx*2], xmm1
+ movdqa [rdi+rax ], xmm1
+ lea rdi, [rdi+rcx*4]
+ dec rsi
+ jnz .label
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp8_intra_pred_y_dc128_sse2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp8_intra_pred_y_dc128_sse2)
+sym(vp8_intra_pred_y_dc128_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ GET_GOT rbx
+ ; end prolog
+
+ ; write out
+ mov rsi, 2
+ movdqa xmm1, [GLOBAL(dc_128)]
+ mov rax, arg(0) ;dst;
+ movsxd rdx, dword ptr arg(1) ;dst_stride
+ lea rcx, [rdx*3]
+
+.label
+ movdqa [rax ], xmm1
+ movdqa [rax+rdx ], xmm1
+ movdqa [rax+rdx*2], xmm1
+ movdqa [rax+rcx ], xmm1
+ lea rax, [rax+rdx*4]
+ movdqa [rax ], xmm1
+ movdqa [rax+rdx ], xmm1
+ movdqa [rax+rdx*2], xmm1
+ movdqa [rax+rcx ], xmm1
+ lea rax, [rax+rdx*4]
+ dec rsi
+ jnz .label
+
+ ; begin epilog
+ RESTORE_GOT
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp8_intra_pred_y_tm_sse2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+%macro vp8_intra_pred_y_tm 1
+global sym(vp8_intra_pred_y_tm_%1)
+sym(vp8_intra_pred_y_tm_%1):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ GET_GOT rbx
+ ; end prolog
+
+ ; read top row
+ mov edx, 8
+ mov rsi, arg(2) ;src;
+ movsxd rax, dword ptr arg(3) ;src_stride;
+ sub rsi, rax
+ pxor xmm0, xmm0
+%ifidn %1, ssse3
+ movdqa xmm3, [GLOBAL(dc_1024)]
+%endif
+ movdqa xmm1, [rsi]
+ movdqa xmm2, xmm1
+ punpcklbw xmm1, xmm0
+ punpckhbw xmm2, xmm0
+
+ ; set up left ptrs ans subtract topleft
+ movd xmm4, [rsi-1]
+ lea rsi, [rsi+rax-1]
+%ifidn %1, sse2
+ punpcklbw xmm4, xmm0
+ pshuflw xmm4, xmm4, 0x0
+ punpcklqdq xmm4, xmm4
+%else
+ pshufb xmm4, xmm3
+%endif
+ psubw xmm1, xmm4
+ psubw xmm2, xmm4
+
+ ; set up dest ptrs
+ mov rdi, arg(0) ;dst;
+ movsxd rcx, dword ptr arg(1) ;dst_stride
+vp8_intra_pred_y_tm_%1_loop:
+ movd xmm4, [rsi]
+ movd xmm5, [rsi+rax]
+%ifidn %1, sse2
+ punpcklbw xmm4, xmm0
+ punpcklbw xmm5, xmm0
+ pshuflw xmm4, xmm4, 0x0
+ pshuflw xmm5, xmm5, 0x0
+ punpcklqdq xmm4, xmm4
+ punpcklqdq xmm5, xmm5
+%else
+ pshufb xmm4, xmm3
+ pshufb xmm5, xmm3
+%endif
+ movdqa xmm6, xmm4
+ movdqa xmm7, xmm5
+ paddw xmm4, xmm1
+ paddw xmm6, xmm2
+ paddw xmm5, xmm1
+ paddw xmm7, xmm2
+ packuswb xmm4, xmm6
+ packuswb xmm5, xmm7
+ movdqa [rdi ], xmm4
+ movdqa [rdi+rcx], xmm5
+ lea rsi, [rsi+rax*2]
+ lea rdi, [rdi+rcx*2]
+ dec edx
+ jnz vp8_intra_pred_y_tm_%1_loop
+
+ ; begin epilog
+ RESTORE_GOT
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+%endmacro
+
+vp8_intra_pred_y_tm sse2
+vp8_intra_pred_y_tm ssse3
+
+;void vp8_intra_pred_y_ve_sse2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp8_intra_pred_y_ve_sse2)
+sym(vp8_intra_pred_y_ve_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ ; end prolog
+
+ ; read from top
+ mov rax, arg(2) ;src;
+ movsxd rdx, dword ptr arg(3) ;src_stride;
+ sub rax, rdx
+ movdqa xmm1, [rax]
+
+ ; write out
+ mov rsi, 2
+ mov rax, arg(0) ;dst;
+ movsxd rdx, dword ptr arg(1) ;dst_stride
+ lea rcx, [rdx*3]
+
+.label
+ movdqa [rax ], xmm1
+ movdqa [rax+rdx ], xmm1
+ movdqa [rax+rdx*2], xmm1
+ movdqa [rax+rcx ], xmm1
+ lea rax, [rax+rdx*4]
+ movdqa [rax ], xmm1
+ movdqa [rax+rdx ], xmm1
+ movdqa [rax+rdx*2], xmm1
+ movdqa [rax+rcx ], xmm1
+ lea rax, [rax+rdx*4]
+ dec rsi
+ jnz .label
+
+ ; begin epilog
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp8_intra_pred_y_ho_sse2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp8_intra_pred_y_ho_sse2)
+sym(vp8_intra_pred_y_ho_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; read from left and write out
+ mov edx, 8
+ mov rsi, arg(2) ;src;
+ movsxd rax, dword ptr arg(3) ;src_stride;
+ mov rdi, arg(0) ;dst;
+ movsxd rcx, dword ptr arg(1) ;dst_stride
+ dec rsi
+
+vp8_intra_pred_y_ho_sse2_loop:
+ movd xmm0, [rsi]
+ movd xmm1, [rsi+rax]
+ ; FIXME use pshufb for ssse3 version
+ punpcklbw xmm0, xmm0
+ punpcklbw xmm1, xmm1
+ pshuflw xmm0, xmm0, 0x0
+ pshuflw xmm1, xmm1, 0x0
+ punpcklqdq xmm0, xmm0
+ punpcklqdq xmm1, xmm1
+ movdqa [rdi ], xmm0
+ movdqa [rdi+rcx], xmm1
+ lea rsi, [rsi+rax*2]
+ lea rdi, [rdi+rcx*2]
+ dec edx
+ jnz vp8_intra_pred_y_ho_sse2_loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
SECTION_RODATA
+align 16
dc_128:
- times 8 db 128
+ times 16 db 128
dc_4:
times 4 dw 4
align 16
+dc_8:
+ times 8 dw 8
+align 16
dc_1024:
times 8 dw 0x400
align 16
diff --git a/vp8/common/x86/recon_wrapper_sse2.c b/vp8/common/x86/recon_wrapper_sse2.c
index fcc75a901..44221cd0b 100644
--- a/vp8/common/x86/recon_wrapper_sse2.c
+++ b/vp8/common/x86/recon_wrapper_sse2.c
@@ -94,3 +94,69 @@ void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x)
vp8_intra_pred_uv_tm_ssse3,
vp8_intra_pred_uv_ho_ssse3);
}
+
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dc_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dctop_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dcleft_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dc128_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_ho_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_ve_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_tm_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_tm_ssse3);
+
+static void vp8_build_intra_predictors_mby_x86(MACROBLOCKD *x,
+ unsigned char *dst_y,
+ int dst_stride,
+ build_intra_predictors_mbuv_fn_t tm_func)
+{
+ int mode = x->mode_info_context->mbmi.mode;
+ build_intra_predictors_mbuv_fn_t fn;
+ int src_stride = x->dst.y_stride;
+ switch (mode) {
+ case V_PRED: fn = vp8_intra_pred_y_ve_sse2; break;
+ case H_PRED: fn = vp8_intra_pred_y_ho_sse2; break;
+ case TM_PRED: fn = tm_func; break;
+ case DC_PRED:
+ if (x->up_available) {
+ if (x->left_available) {
+ fn = vp8_intra_pred_y_dc_sse2; break;
+ } else {
+ fn = vp8_intra_pred_y_dctop_sse2; break;
+ }
+ } else if (x->left_available) {
+ fn = vp8_intra_pred_y_dcleft_sse2; break;
+ } else {
+ fn = vp8_intra_pred_y_dc128_sse2; break;
+ }
+ break;
+ default: return;
+ }
+
+ fn(dst_y, dst_stride, x->dst.y_buffer, src_stride);
+ return;
+}
+
+void vp8_build_intra_predictors_mby_sse2(MACROBLOCKD *x)
+{
+ vp8_build_intra_predictors_mby_x86(x, x->predictor, 16,
+ vp8_intra_pred_y_tm_sse2);
+}
+
+void vp8_build_intra_predictors_mby_ssse3(MACROBLOCKD *x)
+{
+ vp8_build_intra_predictors_mby_x86(x, x->predictor, 16,
+ vp8_intra_pred_y_tm_ssse3);
+}
+
+void vp8_build_intra_predictors_mby_s_sse2(MACROBLOCKD *x)
+{
+ vp8_build_intra_predictors_mby_x86(x, x->dst.y_buffer, x->dst.y_stride,
+ vp8_intra_pred_y_tm_sse2);
+}
+
+void vp8_build_intra_predictors_mby_s_ssse3(MACROBLOCKD *x)
+{
+ vp8_build_intra_predictors_mby_x86(x, x->dst.y_buffer, x->dst.y_stride,
+ vp8_intra_pred_y_tm_ssse3);
+
+}
diff --git a/vp8/common/x86/recon_x86.h b/vp8/common/x86/recon_x86.h
index fbb3dcb63..afacc60d1 100644
--- a/vp8/common/x86/recon_x86.h
+++ b/vp8/common/x86/recon_x86.h
@@ -42,6 +42,8 @@ extern prototype_copy_block(vp8_copy_mem16x16_mmx);
extern prototype_copy_block(vp8_copy_mem16x16_sse2);
extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_sse2);
extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_sse2);
+extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_sse2);
+extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_s_sse2);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_recon_copy16x16
@@ -53,12 +55,20 @@ extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_sse2);
#undef vp8_recon_build_intra_predictors_mbuv_s
#define vp8_recon_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_sse2
+#undef vp8_recon_build_intra_predictors_mby
+#define vp8_recon_build_intra_predictors_mby vp8_build_intra_predictors_mby_sse2
+
+#undef vp8_recon_build_intra_predictors_mby_s
+#define vp8_recon_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_sse2
+
#endif
#endif
#if HAVE_SSSE3
extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_ssse3);
extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_ssse3);
+extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_ssse3);
+extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_s_ssse3);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_recon_build_intra_predictors_mbuv
@@ -67,6 +77,12 @@ extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_ssse3)
#undef vp8_recon_build_intra_predictors_mbuv_s
#define vp8_recon_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_ssse3
+#undef vp8_recon_build_intra_predictors_mby
+#define vp8_recon_build_intra_predictors_mby vp8_build_intra_predictors_mby_ssse3
+
+#undef vp8_recon_build_intra_predictors_mby_s
+#define vp8_recon_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_ssse3
+
#endif
#endif
#endif
diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index ad3a1f76b..e1e1b7987 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@@ -11,7 +11,6 @@
#include "vpx_config.h"
#include "vpx_ports/x86.h"
-#include "vp8/common/g_common.h"
#include "vp8/common/subpixel.h"
#include "vp8/common/loopfilter.h"
#include "vp8/common/recon.h"
@@ -86,6 +85,10 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
vp8_build_intra_predictors_mbuv_sse2;
rtcd->recon.build_intra_predictors_mbuv_s =
vp8_build_intra_predictors_mbuv_s_sse2;
+ rtcd->recon.build_intra_predictors_mby =
+ vp8_build_intra_predictors_mby_sse2;
+ rtcd->recon.build_intra_predictors_mby_s =
+ vp8_build_intra_predictors_mby_s_sse2;
rtcd->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_sse2;
rtcd->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2;
@@ -132,6 +135,10 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
vp8_build_intra_predictors_mbuv_ssse3;
rtcd->recon.build_intra_predictors_mbuv_s =
vp8_build_intra_predictors_mbuv_s_ssse3;
+ rtcd->recon.build_intra_predictors_mby =
+ vp8_build_intra_predictors_mby_ssse3;
+ rtcd->recon.build_intra_predictors_mby_s =
+ vp8_build_intra_predictors_mby_s_ssse3;
}
#endif
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 11d0e38f5..917aeceb6 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -42,7 +42,6 @@
void vp8cx_init_de_quantizer(VP8D_COMP *pbi)
{
- int i;
int Q;
VP8_COMMON *const pc = & pbi->common;
@@ -52,15 +51,9 @@ void vp8cx_init_de_quantizer(VP8D_COMP *pbi)
pc->Y2dequant[Q][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q);
pc->UVdequant[Q][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q);
- /* all the ac values = ; */
- for (i = 1; i < 16; i++)
- {
- int rc = vp8_default_zig_zag1d[i];
-
- pc->Y1dequant[Q][rc] = (short)vp8_ac_yquant(Q);
- pc->Y2dequant[Q][rc] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q);
- pc->UVdequant[Q][rc] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q);
- }
+ pc->Y1dequant[Q][1] = (short)vp8_ac_yquant(Q);
+ pc->Y2dequant[Q][1] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q);
+ pc->UVdequant[Q][1] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q);
}
}
@@ -88,19 +81,19 @@ void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd)
else
QIndex = pc->base_qindex;
- /* Set up the block level dequant pointers */
- for (i = 0; i < 16; i++)
- {
- xd->block[i].dequant = pc->Y1dequant[QIndex];
- }
+ /* Set up the macroblock dequant constants */
+ xd->dequant_y1_dc[0] = 1;
+ xd->dequant_y1[0] = pc->Y1dequant[QIndex][0];
+ xd->dequant_y2[0] = pc->Y2dequant[QIndex][0];
+ xd->dequant_uv[0] = pc->UVdequant[QIndex][0];
- for (i = 16; i < 24; i++)
+ for (i = 1; i < 16; i++)
{
- xd->block[i].dequant = pc->UVdequant[QIndex];
+ xd->dequant_y1_dc[i] =
+ xd->dequant_y1[i] = pc->Y1dequant[QIndex][1];
+ xd->dequant_y2[i] = pc->Y2dequant[QIndex][1];
+ xd->dequant_uv[i] = pc->UVdequant[QIndex][1];
}
-
- xd->block[24].dequant = pc->Y2dequant[QIndex];
-
}
#if CONFIG_RUNTIME_CPU_DETECT
@@ -180,6 +173,8 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
}
else
{
+ short *DQC = xd->dequant_y1;
+
/* clear out residual eob info */
if(xd->mode_info_context->mbmi.mb_skip_coeff)
vpx_memset(xd->eobs, 0, 25);
@@ -200,13 +195,13 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
if (xd->eobs[i] > 1)
{
DEQUANT_INVOKE(&pbi->common.rtcd.dequant, idct_add)
- (b->qcoeff, b->dequant,
+ (b->qcoeff, DQC,
*(b->base_dst) + b->dst, b->dst_stride);
}
else
{
IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
- (b->qcoeff[0] * b->dequant[0],
+ (b->qcoeff[0] * DQC[0],
*(b->base_dst) + b->dst, b->dst_stride,
*(b->base_dst) + b->dst, b->dst_stride);
((int *)b->qcoeff)[0] = 0;
@@ -233,10 +228,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
/* dequantization and idct */
if (mode != B_PRED)
{
- short *DQC = xd->block[0].dequant;
-
- /* save the dc dequant constant in case it is overridden */
- short dc_dequant_temp = DQC[0];
+ short *DQC = xd->dequant_y1;
if (mode != SPLITMV)
{
@@ -245,7 +237,8 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
/* do 2nd order transform on the dc block */
if (xd->eobs[24] > 1)
{
- DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b);
+ DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b,
+ xd->dequant_y2);
IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0],
xd->qcoeff);
@@ -260,7 +253,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
}
else
{
- b->dqcoeff[0] = b->qcoeff[0] * b->dequant[0];
+ b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0];
IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0],
xd->qcoeff);
((int *)b->qcoeff)[0] = 0;
@@ -269,20 +262,17 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
/* override the dc dequant constant in order to preserve the
* dc components
*/
- DQC[0] = 1;
+ DQC = xd->dequant_y1_dc;
}
DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_y_block)
- (xd->qcoeff, xd->block[0].dequant,
+ (xd->qcoeff, DQC,
xd->dst.y_buffer,
xd->dst.y_stride, xd->eobs);
-
- /* restore the dc dequant constant */
- DQC[0] = dc_dequant_temp;
}
DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_uv_block)
- (xd->qcoeff+16*16, xd->block[16].dequant,
+ (xd->qcoeff+16*16, xd->dequant_uv,
xd->dst.u_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->eobs+16);
}
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 9a45702cf..80648d39f 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -20,7 +20,6 @@
#include "vpx_scale/yv12extend.h"
#include "vp8/common/loopfilter.h"
#include "vp8/common/swapyv12buffer.h"
-#include "vp8/common/g_common.h"
#include "vp8/common/threading.h"
#include "decoderthreading.h"
#include <stdio.h>
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 947b3a1c6..2ce00f705 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -37,7 +37,7 @@ extern void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
{
VP8_COMMON *const pc = & pbi->common;
- int i, j;
+ int i;
for (i = 0; i < count; i++)
{
@@ -77,10 +77,10 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D
mbd->current_bc = &pbi->bc2;
- for (j = 0; j < 25; j++)
- {
- mbd->block[j].dequant = xd->block[j].dequant;
- }
+ vpx_memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
+ vpx_memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
+ vpx_memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
+ vpx_memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
mbd->fullpixel_mask = 0xffffffff;
if(pc->full_pixel)
@@ -177,6 +177,8 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
/* dequantization and idct */
if (xd->mode_info_context->mbmi.mode == B_PRED)
{
+ short *DQC = xd->dequant_y1;
+
for (i = 0; i < 16; i++)
{
BLOCKD *b = &xd->block[i];
@@ -190,13 +192,13 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
if (xd->eobs[i] > 1)
{
DEQUANT_INVOKE(&pbi->common.rtcd.dequant, idct_add)
- (b->qcoeff, b->dequant,
+ (b->qcoeff, DQC,
*(b->base_dst) + b->dst, b->dst_stride);
}
else
{
IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
- (b->qcoeff[0] * b->dequant[0],
+ (b->qcoeff[0] * DQC[0],
*(b->base_dst) + b->dst, b->dst_stride,
*(b->base_dst) + b->dst, b->dst_stride);
((int *)b->qcoeff)[0] = 0;
@@ -206,9 +208,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
}
else
{
- short *DQC = xd->block[0].dequant;
-
- DECLARE_ALIGNED(16, short, local_dequant[16]);
+ short *DQC = xd->dequant_y1;
if (xd->mode_info_context->mbmi.mode != SPLITMV)
{
@@ -217,7 +217,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
/* do 2nd order transform on the dc block */
if (xd->eobs[24] > 1)
{
- DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b);
+ DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b, xd->dequant_y2);
IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0],
xd->qcoeff);
@@ -232,20 +232,13 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
}
else
{
- b->dqcoeff[0] = b->qcoeff[0] * b->dequant[0];
+ b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0];
IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], xd->qcoeff);
((int *)b->qcoeff)[0] = 0;
}
- /* make a local copy of the dequant constants */
- vpx_memcpy(local_dequant, xd->block[0].dequant,
- sizeof(local_dequant));
-
/* override the dc dequant constant */
- local_dequant[0] = 1;
-
- /* use the new dequant constants */
- DQC = local_dequant;
+ DQC = xd->dequant_y1_dc;
}
DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_y_block)
@@ -255,7 +248,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
}
DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_uv_block)
- (xd->qcoeff+16*16, xd->block[16].dequant,
+ (xd->qcoeff+16*16, xd->dequant_uv,
xd->dst.u_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->eobs+16);
}
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index 5e5a60db7..0a74ca46d 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -45,10 +45,6 @@ typedef struct
unsigned char **base_src;
int src;
int src_stride;
-
-// MV enc_mv;
- int force_empty;
-
} BLOCK;
typedef struct
@@ -107,7 +103,6 @@ typedef struct
int mv_row_min;
int mv_row_max;
- int vector_range; // Used to monitor limiting range of recent vectors to guide search.
int skip;
int encode_breakout;
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 88868d684..b5c5c7445 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -595,8 +595,6 @@ void init_encode_frame_mb_context(VP8_COMP *cpi)
// Activity map pointer
x->mb_activity_ptr = cpi->mb_activity_map;
- x->vector_range = 32;
-
x->act_zbin_adj = 0;
x->partition_info = x->pi;
@@ -1122,7 +1120,7 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
vp8_inverse_transform_mby(xd, IF_RTCD(&cpi->common.rtcd));
DEQUANT_INVOKE (&cpi->common.rtcd.dequant, idct_add_uv_block)
- (xd->qcoeff+16*16, xd->block[16].dequant,
+ (xd->qcoeff+16*16, xd->dequant_uv,
xd->dst.u_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->eobs+16);
return rate;
@@ -1307,7 +1305,7 @@ int vp8cx_encode_inter_macroblock
vp8_inverse_transform_mby(xd, IF_RTCD(&cpi->common.rtcd));
DEQUANT_INVOKE (&cpi->common.rtcd.dequant, idct_add_uv_block)
- (xd->qcoeff+16*16, xd->block[16].dequant,
+ (xd->qcoeff+16*16, xd->dequant_uv,
xd->dst.u_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->eobs+16);
}
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 4378b634e..16393a1ff 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -18,7 +18,6 @@
#include "vp8/common/invtrans.h"
#include "vp8/common/recon.h"
#include "dct.h"
-#include "vp8/common/g_common.h"
#include "encodeintra.h"
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index ef8ead588..24339a5e0 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -302,7 +302,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
z->mv_col_max = x->mv_col_max;
z->mv_row_min = x->mv_row_min;
z->mv_row_max = x->mv_row_max;
- z->vector_range = x->vector_range ;
*/
z->vp8_short_fdct4x4 = x->vp8_short_fdct4x4;
@@ -350,8 +349,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
z->block[i].src = x->block[i].src;
*/
z->block[i].src_stride = x->block[i].src_stride;
- z->block[i].force_empty = x->block[i].force_empty;
-
}
{
@@ -387,10 +384,22 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
zd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
- for (i = 0; i < 25; i++)
- {
- zd->block[i].dequant = xd->block[i].dequant;
- }
+ vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
+ vpx_memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
+ vpx_memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
+ vpx_memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
+
+#if 1
+ /*TODO: Remove dequant from BLOCKD. This is a temporary solution until
+ * the quantizer code uses a passed in pointer to the dequant constants.
+ * This will also require modifications to the x86 and neon assembly.
+ * */
+ for (i = 0; i < 16; i++)
+ zd->block[i].dequant = zd->dequant_y1;
+ for (i = 16; i < 24; i++)
+ zd->block[i].dequant = zd->dequant_uv;
+ zd->block[24].dequant = zd->dequant_y2;
+#endif
}
}
@@ -421,8 +430,6 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
#endif
mb->gf_active_ptr = x->gf_active_ptr;
- mb->vector_range = 32;
-
vpx_memset(mbr_ei[i].segment_counts, 0, sizeof(mbr_ei[i].segment_counts));
mbr_ei[i].totalrate = 0;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 6e0254644..9223781af 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -23,7 +23,6 @@
#include "ratectrl.h"
#include "vp8/common/quant_common.h"
#include "segmentation.h"
-#include "vp8/common/g_common.h"
#include "vpx_scale/yv12extend.h"
#if CONFIG_POSTPROC
#include "vp8/common/postproc.h"
@@ -251,6 +250,9 @@ static void save_layer_context(VP8_COMP *cpi)
lc->starting_buffer_level = cpi->oxcf.starting_buffer_level;
lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level;
lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size;
+ lc->starting_buffer_level_in_ms = cpi->oxcf.starting_buffer_level_in_ms;
+ lc->optimal_buffer_level_in_ms = cpi->oxcf.optimal_buffer_level_in_ms;
+ lc->maximum_buffer_size_in_ms = cpi->oxcf.maximum_buffer_size_in_ms;
lc->buffer_level = cpi->buffer_level;
lc->bits_off_target = cpi->bits_off_target;
lc->total_actual_bits = cpi->total_actual_bits;
@@ -288,6 +290,9 @@ static void restore_layer_context(VP8_COMP *cpi, const int layer)
cpi->oxcf.starting_buffer_level = lc->starting_buffer_level;
cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level;
cpi->oxcf.maximum_buffer_size = lc->maximum_buffer_size;
+ cpi->oxcf.starting_buffer_level_in_ms = lc->starting_buffer_level_in_ms;
+ cpi->oxcf.optimal_buffer_level_in_ms = lc->optimal_buffer_level_in_ms;
+ cpi->oxcf.maximum_buffer_size_in_ms = lc->maximum_buffer_size_in_ms;
cpi->buffer_level = lc->buffer_level;
cpi->bits_off_target = lc->bits_off_target;
cpi->total_actual_bits = lc->total_actual_bits;
@@ -1255,6 +1260,8 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
if (cpi->frame_rate > 180)
cpi->frame_rate = 30;
+ cpi->ref_frame_rate = cpi->frame_rate;
+
// change includes all joint functionality
vp8_change_config(cpi, oxcf);
@@ -1290,6 +1297,10 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
cpi->output_frame_rate / cpi->oxcf.rate_decimator[i];
lc->target_bandwidth = cpi->oxcf.target_bitrate[i] * 1000;
+ lc->starting_buffer_level_in_ms = oxcf->starting_buffer_level;
+ lc->optimal_buffer_level_in_ms = oxcf->optimal_buffer_level;
+ lc->maximum_buffer_size_in_ms = oxcf->maximum_buffer_size;
+
lc->starting_buffer_level =
rescale(oxcf->starting_buffer_level,
lc->target_bandwidth, 1000);
@@ -1346,6 +1357,56 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
#endif
}
+void update_layer_contexts (VP8_COMP *cpi)
+{
+ VP8_CONFIG *oxcf = &cpi->oxcf;
+
+ /* Update snapshots of the layer contexts to reflect new parameters */
+ if (oxcf->number_of_layers > 1)
+ {
+ unsigned int i;
+ double prev_layer_frame_rate=0;
+
+ for (i=0; i<oxcf->number_of_layers; i++)
+ {
+ LAYER_CONTEXT *lc = &cpi->layer_context[i];
+
+ lc->frame_rate =
+ cpi->ref_frame_rate / oxcf->rate_decimator[i];
+ lc->target_bandwidth = oxcf->target_bitrate[i] * 1000;
+
+ lc->starting_buffer_level = rescale(
+ oxcf->starting_buffer_level_in_ms,
+ lc->target_bandwidth, 1000);
+
+ if (oxcf->optimal_buffer_level == 0)
+ lc->optimal_buffer_level = lc->target_bandwidth / 8;
+ else
+ lc->optimal_buffer_level = rescale(
+ oxcf->optimal_buffer_level_in_ms,
+ lc->target_bandwidth, 1000);
+
+ if (oxcf->maximum_buffer_size == 0)
+ lc->maximum_buffer_size = lc->target_bandwidth / 8;
+ else
+ lc->maximum_buffer_size = rescale(
+ oxcf->maximum_buffer_size_in_ms,
+ lc->target_bandwidth, 1000);
+
+ // Work out the average size of a frame within this layer
+ if (i > 0)
+ lc->avg_frame_size_for_layer = (oxcf->target_bitrate[i] -
+ oxcf->target_bitrate[i-1]) * 1000 /
+ (lc->frame_rate - prev_layer_frame_rate);
+
+ lc->active_worst_quality = oxcf->worst_allowed_q;
+ lc->active_best_quality = oxcf->best_allowed_q;
+ lc->avg_frame_qindex = oxcf->worst_allowed_q;
+
+ prev_layer_frame_rate = lc->frame_rate;
+ }
+ }
+}
void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
{
@@ -1486,9 +1547,12 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
// local file playback mode == really big buffer
if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK)
{
- cpi->oxcf.starting_buffer_level = 60000;
- cpi->oxcf.optimal_buffer_level = 60000;
- cpi->oxcf.maximum_buffer_size = 240000;
+ cpi->oxcf.starting_buffer_level = 60000;
+ cpi->oxcf.optimal_buffer_level = 60000;
+ cpi->oxcf.maximum_buffer_size = 240000;
+ cpi->oxcf.starting_buffer_level_in_ms = 60000;
+ cpi->oxcf.optimal_buffer_level_in_ms = 60000;
+ cpi->oxcf.maximum_buffer_size_in_ms = 240000;
}
// Convert target bandwidth from Kbit/s to Bit/s
@@ -4257,14 +4321,15 @@ static void encode_frame_to_data_rate
vp8_clear_system_state(); //__asm emms;
- if (cpi->twopass.total_left_stats->coded_error != 0.0)
- fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6d %6d"
+ if (cpi->twopass.total_left_stats.coded_error != 0.0)
+ fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %10d %6d %6d"
"%6d %6d %6d %5d %5d %5d %8d %8.2f %10d %10.3f"
"%10.3f %8d\n",
cpi->common.current_video_frame, cpi->this_frame_target,
cpi->projected_frame_size,
(cpi->projected_frame_size - cpi->this_frame_target),
(int)cpi->total_target_vs_actual,
+ cpi->buffer_level,
(cpi->oxcf.starting_buffer_level-cpi->bits_off_target),
(int)cpi->total_actual_bits, cm->base_qindex,
cpi->active_best_quality, cpi->active_worst_quality,
@@ -4275,18 +4340,19 @@ static void encode_frame_to_data_rate
cm->frame_type, cpi->gfu_boost,
cpi->twopass.est_max_qcorrection_factor,
(int)cpi->twopass.bits_left,
- cpi->twopass.total_left_stats->coded_error,
+ cpi->twopass.total_left_stats.coded_error,
(double)cpi->twopass.bits_left /
- cpi->twopass.total_left_stats->coded_error,
+ cpi->twopass.total_left_stats.coded_error,
cpi->tot_recode_hits);
else
- fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6d %6d"
+ fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %10d %6d %6d"
"%6d %6d %6d %5d %5d %5d %8d %8.2f %10d %10.3f"
"%8d\n",
cpi->common.current_video_frame,
cpi->this_frame_target, cpi->projected_frame_size,
(cpi->projected_frame_size - cpi->this_frame_target),
(int)cpi->total_target_vs_actual,
+ cpi->buffer_level,
(cpi->oxcf.starting_buffer_level-cpi->bits_off_target),
(int)cpi->total_actual_bits, cm->base_qindex,
cpi->active_best_quality, cpi->active_worst_quality,
@@ -4297,7 +4363,7 @@ static void encode_frame_to_data_rate
cm->frame_type, cpi->gfu_boost,
cpi->twopass.est_max_qcorrection_factor,
(int)cpi->twopass.bits_left,
- cpi->twopass.total_left_stats->coded_error,
+ cpi->twopass.total_left_stats.coded_error,
cpi->tot_recode_hits);
fclose(f);
@@ -4670,13 +4736,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
return -1;
}
- // Restore layer specific context if necessary
- if (cpi->oxcf.number_of_layers > 1)
- {
- restore_layer_context (cpi,
- cpi->oxcf.layer_id[cm->current_video_frame % cpi->oxcf.periodicity]);
- }
-
if (cpi->source->ts_start < cpi->first_time_stamp_ever)
{
cpi->first_time_stamp_ever = cpi->source->ts_start;
@@ -4684,16 +4743,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
}
// adjust frame rates based on timestamps given
- if (cpi->oxcf.number_of_layers > 1 )
- {
- vp8_new_frame_rate (
- cpi, cpi->layer_context[cpi->current_layer].frame_rate);
-
- cpi->last_time_stamp_seen = cpi->source->ts_start;
- cpi->last_end_time_stamp_seen = cpi->source->ts_end;
-
- }
- else if (!cm->refresh_alt_ref_frame)
+ if (!cm->refresh_alt_ref_frame || (cpi->oxcf.number_of_layers > 1))
{
int64_t this_duration;
int step = 0;
@@ -4718,7 +4768,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
if (this_duration)
{
if (step)
- vp8_new_frame_rate(cpi, 10000000.0 / this_duration);
+ cpi->ref_frame_rate = 10000000.0 / this_duration;
else
{
double avg_duration, interval;
@@ -4731,18 +4781,46 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
if(interval > 10000000.0)
interval = 10000000;
- avg_duration = 10000000.0 / cpi->frame_rate;
+ avg_duration = 10000000.0 / cpi->ref_frame_rate;
avg_duration *= (interval - avg_duration + this_duration);
avg_duration /= interval;
- vp8_new_frame_rate(cpi, 10000000.0 / avg_duration);
+ cpi->ref_frame_rate = 10000000.0 / avg_duration;
+ }
+
+ if (cpi->oxcf.number_of_layers > 1)
+ {
+ int i;
+
+ // Update frame rates for each layer
+ for (i=0; i<cpi->oxcf.number_of_layers; i++)
+ {
+ LAYER_CONTEXT *lc = &cpi->layer_context[i];
+ lc->frame_rate = cpi->ref_frame_rate /
+ cpi->oxcf.rate_decimator[i];
+ }
}
+ else
+ vp8_new_frame_rate(cpi, cpi->ref_frame_rate);
}
cpi->last_time_stamp_seen = cpi->source->ts_start;
cpi->last_end_time_stamp_seen = cpi->source->ts_end;
}
+ if (cpi->oxcf.number_of_layers > 1)
+ {
+ int layer;
+
+ update_layer_contexts (cpi);
+
+ // Restore layer specific context & set frame rate
+ layer = cpi->oxcf.layer_id[
+ cm->current_video_frame % cpi->oxcf.periodicity];
+ restore_layer_context (cpi, layer);
+ vp8_new_frame_rate (cpi, cpi->layer_context[layer].frame_rate);
+ }
+
if (cpi->compressor_speed == 2)
{
if (cpi->oxcf.number_of_layers == 1)
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 46951e3b9..35efd3a00 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -253,6 +253,9 @@ typedef struct
int starting_buffer_level;
int optimal_buffer_level;
int maximum_buffer_size;
+ int starting_buffer_level_in_ms;
+ int optimal_buffer_level_in_ms;
+ int maximum_buffer_size_in_ms;
int avg_frame_size_for_layer;
@@ -421,6 +424,7 @@ typedef struct VP8_COMP
int buffered_mode;
double frame_rate;
+ double ref_frame_rate;
int64_t buffer_level;
int bits_off_target;
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 592da9dbb..405c72dbd 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -21,7 +21,6 @@
#include "vp8/common/reconinter.h"
#include "vp8/common/reconintra.h"
#include "vp8/common/reconintra4x4.h"
-#include "vp8/common/g_common.h"
#include "variance.h"
#include "mcomp.h"
#include "rdopt.h"
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 3ca8758ef..d2aa7fe72 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -436,7 +436,8 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
int quant_val;
int Q;
- int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
+ int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44,
+ 44, 44};
for (Q = 0; Q < QINDEX_RANGE; Q++)
{
@@ -469,36 +470,58 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
// all the ac values = ;
- for (i = 1; i < 16; i++)
+ quant_val = vp8_ac_yquant(Q);
+ cpi->Y1quant_fast[Q][1] = (1 << 16) / quant_val;
+ invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 1,
+ cpi->Y1quant_shift[Q] + 1, quant_val);
+ cpi->Y1zbin[Q][1] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+ cpi->Y1round[Q][1] = (qrounding_factors[Q] * quant_val) >> 7;
+ cpi->common.Y1dequant[Q][1] = quant_val;
+ cpi->zrun_zbin_boost_y1[Q][1] = (quant_val * zbin_boost[1]) >> 7;
+
+ quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
+ cpi->Y2quant_fast[Q][1] = (1 << 16) / quant_val;
+ invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 1,
+ cpi->Y2quant_shift[Q] + 1, quant_val);
+ cpi->Y2zbin[Q][1] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+ cpi->Y2round[Q][1] = (qrounding_factors_y2[Q] * quant_val) >> 7;
+ cpi->common.Y2dequant[Q][1] = quant_val;
+ cpi->zrun_zbin_boost_y2[Q][1] = (quant_val * zbin_boost[1]) >> 7;
+
+ quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
+ cpi->UVquant_fast[Q][1] = (1 << 16) / quant_val;
+ invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 1,
+ cpi->UVquant_shift[Q] + 1, quant_val);
+ cpi->UVzbin[Q][1] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+ cpi->UVround[Q][1] = (qrounding_factors[Q] * quant_val) >> 7;
+ cpi->common.UVdequant[Q][1] = quant_val;
+ cpi->zrun_zbin_boost_uv[Q][1] = (quant_val * zbin_boost[1]) >> 7;
+
+ for (i = 2; i < 16; i++)
{
- int rc = vp8_default_zig_zag1d[i];
-
- quant_val = vp8_ac_yquant(Q);
- cpi->Y1quant_fast[Q][rc] = (1 << 16) / quant_val;
- invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
- cpi->Y1quant_shift[Q] + rc, quant_val);
- cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
- cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
- cpi->common.Y1dequant[Q][rc] = quant_val;
- cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
-
- quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
- cpi->Y2quant_fast[Q][rc] = (1 << 16) / quant_val;
- invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
- cpi->Y2quant_shift[Q] + rc, quant_val);
- cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
- cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
- cpi->common.Y2dequant[Q][rc] = quant_val;
- cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
-
- quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
- cpi->UVquant_fast[Q][rc] = (1 << 16) / quant_val;
- invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
- cpi->UVquant_shift[Q] + rc, quant_val);
- cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
- cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
- cpi->common.UVdequant[Q][rc] = quant_val;
- cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
+ cpi->Y1quant_fast[Q][i] = cpi->Y1quant_fast[Q][1];
+ cpi->Y1quant[Q][i] = cpi->Y1quant[Q][1];
+ cpi->Y1quant_shift[Q][i] = cpi->Y1quant_shift[Q][1];
+ cpi->Y1zbin[Q][i] = cpi->Y1zbin[Q][1];
+ cpi->Y1round[Q][i] = cpi->Y1round[Q][1];
+ cpi->zrun_zbin_boost_y1[Q][i] = (cpi->common.Y1dequant[Q][1] *
+ zbin_boost[i]) >> 7;
+
+ cpi->Y2quant_fast[Q][i] = cpi->Y2quant_fast[Q][1];
+ cpi->Y2quant[Q][i] = cpi->Y2quant[Q][1];
+ cpi->Y2quant_shift[Q][i] = cpi->Y2quant_shift[Q][1];
+ cpi->Y2zbin[Q][i] = cpi->Y2zbin[Q][1];
+ cpi->Y2round[Q][i] = cpi->Y2round[Q][1];
+ cpi->zrun_zbin_boost_y2[Q][i] = (cpi->common.Y2dequant[Q][1] *
+ zbin_boost[i]) >> 7;
+
+ cpi->UVquant_fast[Q][i] = cpi->UVquant_fast[Q][1];
+ cpi->UVquant[Q][i] = cpi->UVquant[Q][1];
+ cpi->UVquant_shift[Q][i] = cpi->UVquant_shift[Q][1];
+ cpi->UVzbin[Q][i] = cpi->UVzbin[Q][1];
+ cpi->UVround[Q][i] = cpi->UVround[Q][1];
+ cpi->zrun_zbin_boost_uv[Q][i] = (cpi->common.UVdequant[Q][1] *
+ zbin_boost[i]) >> 7;
}
}
}
@@ -615,6 +638,31 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
*/
if (!ok_to_skip || QIndex != x->q_index)
{
+
+ xd->dequant_y1_dc[0] = 1;
+ xd->dequant_y1[0] = cpi->common.Y1dequant[QIndex][0];
+ xd->dequant_y2[0] = cpi->common.Y2dequant[QIndex][0];
+ xd->dequant_uv[0] = cpi->common.UVdequant[QIndex][0];
+
+ for (i = 1; i < 16; i++)
+ {
+ xd->dequant_y1_dc[i] =
+ xd->dequant_y1[i] = cpi->common.Y1dequant[QIndex][1];
+ xd->dequant_y2[i] = cpi->common.Y2dequant[QIndex][1];
+ xd->dequant_uv[i] = cpi->common.UVdequant[QIndex][1];
+ }
+#if 1
+ /*TODO: Remove dequant from BLOCKD. This is a temporary solution until
+ * the quantizer code uses a passed in pointer to the dequant constants.
+ * This will also require modifications to the x86 and neon assembly.
+ * */
+ for (i = 0; i < 16; i++)
+ x->e_mbd.block[i].dequant = xd->dequant_y1; //cpi->common.Y1dequant[QIndex];
+ for (i = 16; i < 24; i++)
+ x->e_mbd.block[i].dequant = xd->dequant_uv; //cpi->common.UVdequant[QIndex];
+ x->e_mbd.block[24].dequant = xd->dequant_y2; //cpi->common.Y2dequant[QIndex];
+#endif
+
// Y
zbin_extra = ZBIN_EXTRA_Y;
@@ -625,7 +673,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
x->block[i].zbin = cpi->Y1zbin[QIndex];
x->block[i].round = cpi->Y1round[QIndex];
- x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
x->block[i].zbin_extra = (short)zbin_extra;
}
@@ -640,7 +687,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
x->block[i].zbin = cpi->UVzbin[QIndex];
x->block[i].round = cpi->UVround[QIndex];
- x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
x->block[i].zbin_extra = (short)zbin_extra;
}
@@ -653,7 +699,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
x->block[24].zbin = cpi->Y2zbin[QIndex];
x->block[24].round = cpi->Y2round[QIndex];
- x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
x->block[24].zbin_extra = (short)zbin_extra;
@@ -663,6 +708,9 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
cpi->last_zbin_over_quant = cpi->zbin_over_quant;
cpi->last_zbin_mode_boost = cpi->zbin_mode_boost;
x->last_act_zbin_adj = x->act_zbin_adj;
+
+
+
}
else if(cpi->last_zbin_over_quant != cpi->zbin_over_quant
|| cpi->last_zbin_mode_boost != cpi->zbin_mode_boost
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 726d3c4eb..d29aa75fe 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -28,7 +28,6 @@
#include "encodemb.h"
#include "quantize.h"
#include "vp8/common/idct.h"
-#include "vp8/common/g_common.h"
#include "variance.h"
#include "mcomp.h"
#include "rdopt.h"
diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
index a773d4391..545e4f205 100644
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -22,7 +22,6 @@
#include "ratectrl.h"
#include "vp8/common/quant_common.h"
#include "segmentation.h"
-#include "vp8/common/g_common.h"
#include "vpx_scale/yv12extend.h"
#include "vpx_mem/vpx_mem.h"
#include "vp8/common/swapyv12buffer.h"
diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm
index 34cc9c3bb..e698e904c 100644
--- a/vp8/encoder/x86/quantize_ssse3.asm
+++ b/vp8/encoder/x86/quantize_ssse3.asm
@@ -80,6 +80,9 @@ sym(vp8_fast_quantize_b_ssse3):
mov rdi, [rsi + vp8_blockd_dequant]
mov rcx, [rsi + vp8_blockd_dqcoeff]
+ movdqa xmm2, xmm1 ;store y for getting eob
+ movdqa xmm3, xmm5
+
pxor xmm1, xmm0
pxor xmm5, xmm4
psubw xmm1, xmm0
@@ -88,35 +91,30 @@ sym(vp8_fast_quantize_b_ssse3):
movdqa [rax], xmm1
movdqa [rax + 16], xmm5
- movdqa xmm2, [rdi]
- movdqa xmm3, [rdi + 16]
-
- pxor xmm4, xmm4
- pmullw xmm2, xmm1
- pmullw xmm3, xmm5
-
- pcmpeqw xmm1, xmm4 ;non zero mask
- pcmpeqw xmm5, xmm4 ;non zero mask
- packsswb xmm1, xmm5
- pshufb xmm1, [GLOBAL(zz_shuf)]
+ movdqa xmm0, [rdi]
+ movdqa xmm4, [rdi + 16]
- pmovmskb edx, xmm1
+ pmullw xmm0, xmm1
+ pmullw xmm4, xmm5
+ pxor xmm1, xmm1
- xor rdi, rdi
- mov eax, -1
- xor dx, ax ;flip the bits for bsr
- bsr eax, edx
+ pcmpgtw xmm2, xmm1 ;calculate eob
+ pcmpgtw xmm3, xmm1
+ packsswb xmm2, xmm3
+ pshufb xmm2, [GLOBAL(zz_shuf)]
- movdqa [rcx], xmm2 ;store dqcoeff
- movdqa [rcx + 16], xmm3 ;store dqcoeff
+ pmovmskb edx, xmm2
+ movdqa [rcx], xmm0 ;store dqcoeff
+ movdqa [rcx + 16], xmm4 ;store dqcoeff
mov rcx, [rsi + vp8_blockd_eob]
- sub edi, edx ;check for all zeros in bit mask
- sar edi, 31 ;0 or -1
+ bsr eax, edx ;count 0
add eax, 1
- and eax, edi ;if the bit mask was all zero,
- ;then eob = 0
+
+ cmp edx, 0 ;if all 0, eob=0
+ cmove eax, edx
+
mov BYTE PTR [rcx], al ;store eob
; begin epilog
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index eb483d235..0e564320f 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -39,7 +39,6 @@ VP8_COMMON_SRCS-yes += common/entropymode.h
VP8_COMMON_SRCS-yes += common/entropymv.h
VP8_COMMON_SRCS-yes += common/extend.h
VP8_COMMON_SRCS-yes += common/findnearmv.h
-VP8_COMMON_SRCS-yes += common/g_common.h
VP8_COMMON_SRCS-yes += common/header.h
VP8_COMMON_SRCS-yes += common/idct.h
VP8_COMMON_SRCS-yes += common/invtrans.h
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 6181ee8ee..f2f376a7c 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -335,6 +335,10 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
oxcf->under_shoot_pct = cfg.rc_undershoot_pct;
oxcf->over_shoot_pct = cfg.rc_overshoot_pct;
+ oxcf->maximum_buffer_size_in_ms = cfg.rc_buf_sz;
+ oxcf->starting_buffer_level_in_ms = cfg.rc_buf_initial_sz;
+ oxcf->optimal_buffer_level_in_ms = cfg.rc_buf_optimal_sz;
+
oxcf->maximum_buffer_size = cfg.rc_buf_sz;
oxcf->starting_buffer_level = cfg.rc_buf_initial_sz;
oxcf->optimal_buffer_level = cfg.rc_buf_optimal_sz;
@@ -1237,7 +1241,7 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] =
/* keyframing settings (kf) */
VPX_KF_AUTO, /* g_kfmode*/
0, /* kf_min_dist */
- 9999, /* kf_max_dist */
+ 128, /* kf_max_dist */
#if VPX_ENCODER_ABI_VERSION == (1 + VPX_CODEC_ABI_VERSION)
1, /* g_delete_first_pass_file */
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index fbe58171c..de2714317 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -412,7 +412,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
&& (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC))
{
ctx->postproc_cfg.post_proc_flag =
- VP8_DEBLOCK | VP8_DEMACROBLOCK;
+ VP8_DEBLOCK | VP8_DEMACROBLOCK | VP8_MFQE;
ctx->postproc_cfg.deblocking_level = 4;
ctx->postproc_cfg.noise_level = 0;
}
@@ -700,6 +700,27 @@ static vpx_codec_err_t vp8_get_last_ref_updates(vpx_codec_alg_priv_t *ctx,
return VPX_CODEC_INVALID_PARAM;
}
+extern int vp8_references_buffer( VP8_COMMON *oci, int ref_frame );
+static vpx_codec_err_t vp8_get_last_ref_frame(vpx_codec_alg_priv_t *ctx,
+ int ctrl_id,
+ va_list args)
+{
+ int *ref_info = va_arg(args, int *);
+ VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi;
+ VP8_COMMON *oci = &pbi->common;
+
+ if (ref_info)
+ {
+ *ref_info =
+ (vp8_references_buffer( oci, ALTREF_FRAME )?VP8_ALTR_FRAME:0) |
+ (vp8_references_buffer( oci, GOLDEN_FRAME )?VP8_GOLD_FRAME:0) |
+ (vp8_references_buffer( oci, LAST_FRAME )?VP8_LAST_FRAME:0);
+
+ return VPX_CODEC_OK;
+ }
+ else
+ return VPX_CODEC_INVALID_PARAM;
+}
static vpx_codec_err_t vp8_get_frame_corrupted(vpx_codec_alg_priv_t *ctx,
int ctrl_id,
diff --git a/vp8_scalable_patterns.c b/vp8_scalable_patterns.c
index 41ecaa78b..65883ff80 100644
--- a/vp8_scalable_patterns.c
+++ b/vp8_scalable_patterns.c
@@ -129,27 +129,29 @@ int main(int argc, char **argv) {
int got_data;
int flags = 0;
int i;
+ int pts = 0; // PTS starts at 0
+ int frame_duration = 1; // 1 timebase tick per frame
int layering_mode = 0;
int frames_in_layer[MAX_LAYERS] = {0};
int layer_flags[MAX_PERIODICITY] = {0};
// Check usage and arguments
- if (argc < 7)
- die("Usage: %s <infile> <outfile> <width> <height> <mode> "
- "<Rate_0> ... <Rate_nlayers-1>\n", argv[0]);
+ if (argc < 9)
+ die("Usage: %s <infile> <outfile> <width> <height> <rate_num> "
+ " <rate_den> <mode> <Rate_0> ... <Rate_nlayers-1>\n", argv[0]);
width = strtol (argv[3], NULL, 0);
height = strtol (argv[4], NULL, 0);
if (width < 16 || width%2 || height <16 || height%2)
die ("Invalid resolution: %d x %d", width, height);
- if (!sscanf(argv[5], "%d", &layering_mode))
- die ("Invalid mode %s", argv[5]);
+ if (!sscanf(argv[7], "%d", &layering_mode))
+ die ("Invalid mode %s", argv[7]);
if (layering_mode<0 || layering_mode>6)
- die ("Invalid mode (0..6) %s", argv[5]);
+ die ("Invalid mode (0..6) %s", argv[7]);
- if (argc != 6+mode_to_num_layers[layering_mode])
+ if (argc != 8+mode_to_num_layers[layering_mode])
die ("Invalid number of arguments");
if (!vpx_img_alloc (&raw, VPX_IMG_FMT_I420, width, height, 1))
@@ -168,8 +170,14 @@ int main(int argc, char **argv) {
cfg.g_w = width;
cfg.g_h = height;
- for (i=6; i<6+mode_to_num_layers[layering_mode]; i++)
- if (!sscanf(argv[i], "%d", &cfg.ts_target_bitrate[i-6]))
+ // Timebase format e.g. 30fps: numerator=1, demoninator=30
+ if (!sscanf (argv[5], "%d", &cfg.g_timebase.num ))
+ die ("Invalid timebase numerator %s", argv[5]);
+ if (!sscanf (argv[6], "%d", &cfg.g_timebase.den ))
+ die ("Invalid timebase denominator %s", argv[6]);
+
+ for (i=8; i<8+mode_to_num_layers[layering_mode]; i++)
+ if (!sscanf(argv[i], "%d", &cfg.ts_target_bitrate[i-8]))
die ("Invalid data rate %s", argv[i]);
// Real time parameters
@@ -193,7 +201,7 @@ int main(int argc, char **argv) {
cfg.kf_min_dist = cfg.kf_max_dist = 1000;
// Temporal scaling parameters:
- // NOTE: The 3 prediction frames cannot be used interchangebly due to
+ // NOTE: The 3 prediction frames cannot be used interchangeably due to
// differences in the way they are handled throughout the code. The
// frames should be allocated to layers in the order LAST, GF, ARF.
// Other combinations work, but may produce slightly inferior results.
@@ -210,14 +218,15 @@ int main(int argc, char **argv) {
cfg.ts_rate_decimator[1] = 1;
memcpy(cfg.ts_layer_id, ids, sizeof(ids));
+#if 1
// 0=L, 1=GF, Intra-layer prediction enabled
layer_flags[0] = VPX_EFLAG_FORCE_KF |
VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
VP8_EFLAG_NO_REF_ARF;
-#if 0
- // 0=L, 1=GF, Intra-layer 1 prediction disabled
+#else
+ // 0=L, 1=GF, Intra-layer prediction disabled
layer_flags[0] = VPX_EFLAG_FORCE_KF |
VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
@@ -275,7 +284,7 @@ int main(int argc, char **argv) {
case 3:
{
// 3-layers, 4-frame period
- int ids[6] = {0,2,1,2};
+ int ids[4] = {0,2,1,2};
cfg.ts_number_layers = 3;
cfg.ts_periodicity = 4;
cfg.ts_rate_decimator[0] = 4;
@@ -295,13 +304,12 @@ int main(int argc, char **argv) {
VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
VP8_EFLAG_NO_UPD_ARF;
break;
- cfg.ts_rate_decimator[2] = 1;
}
case 4:
{
// 3-layers, 4-frame period
- int ids[6] = {0,2,1,2};
+ int ids[4] = {0,2,1,2};
cfg.ts_number_layers = 3;
cfg.ts_periodicity = 4;
cfg.ts_rate_decimator[0] = 4;
@@ -326,7 +334,7 @@ int main(int argc, char **argv) {
case 5:
{
// 3-layers, 4-frame period
- int ids[6] = {0,2,1,2};
+ int ids[4] = {0,2,1,2};
cfg.ts_number_layers = 3;
cfg.ts_periodicity = 4;
cfg.ts_rate_decimator[0] = 4;
@@ -417,7 +425,7 @@ int main(int argc, char **argv) {
flags = layer_flags[frame_cnt % cfg.ts_periodicity];
frame_avail = read_frame(infile, &raw);
- if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, frame_cnt,
+ if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, pts,
1, flags, VPX_DL_REALTIME))
die_codec(&codec, "Failed to encode frame");
@@ -446,6 +454,7 @@ int main(int argc, char **argv) {
fflush (stdout);
}
frame_cnt++;
+ pts += frame_duration;
}
printf ("\n");
fclose (infile);
diff --git a/vpx/vp8.h b/vpx/vp8.h
index 983cc4ad4..eec979763 100644
--- a/vpx/vp8.h
+++ b/vpx/vp8.h
@@ -63,6 +63,7 @@ enum vp8_postproc_level
VP8_DEBUG_TXT_MBLK_MODES = 1<<4, /**< print macro block modes over each macro block */
VP8_DEBUG_TXT_DC_DIFF = 1<<5, /**< print dc diff for each macro block */
VP8_DEBUG_TXT_RATE_INFO = 1<<6, /**< print video rate info (encoder only) */
+ VP8_MFQE = 1<<10,
};
/*!\brief post process flags
diff --git a/vpx/vp8dx.h b/vpx/vp8dx.h
index 1d9d53165..86610358c 100644
--- a/vpx/vp8dx.h
+++ b/vpx/vp8dx.h
@@ -55,6 +55,11 @@ enum vp8_dec_control_id
/** check if the indicated frame is corrupted */
VP8D_GET_FRAME_CORRUPTED,
+ /** control function to get info on which reference frames were used
+ * by the last decode
+ */
+ VP8D_GET_LAST_REF_USED,
+
VP8_DECODER_CTRL_ID_MAX
} ;
@@ -69,7 +74,7 @@ enum vp8_dec_control_id
VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_UPDATES, int *)
VPX_CTRL_USE_TYPE(VP8D_GET_FRAME_CORRUPTED, int *)
-
+VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_USED, int *)
/*! @} - end defgroup vp8_decoder */
diff --git a/vpx_ports/vpxtypes.h b/vpx_ports/vpxtypes.h
index c7ccc0510..f2fb08954 100644
--- a/vpx_ports/vpxtypes.h
+++ b/vpx_ports/vpxtypes.h
@@ -96,11 +96,6 @@ typedef unsigned __int64 vpxu64;
# define PRId64 "lld"
# define VPX64 PRId64
typedef long vpxs64;
-#elif defined(__SYMBIAN32__)
-# undef PRId64
-# define PRId64 "u"
-# define VPX64 PRId64
-typedef unsigned int vpxs64;
#else
# error "64 bit integer type undefined for this platform!"
#endif
diff --git a/vpx_scale/arm/armv4/gen_scalers_armv4.asm b/vpx_scale/arm/armv4/gen_scalers_armv4.asm
deleted file mode 100644
index e495184e7..000000000
--- a/vpx_scale/arm/armv4/gen_scalers_armv4.asm
+++ /dev/null
@@ -1,774 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |horizontal_line_4_5_scale_armv4|
- EXPORT |vertical_band_4_5_scale_armv4|
- EXPORT |horizontal_line_2_3_scale_armv4|
- EXPORT |vertical_band_2_3_scale_armv4|
- EXPORT |horizontal_line_3_5_scale_armv4|
- EXPORT |vertical_band_3_5_scale_armv4|
- EXPORT |horizontal_line_3_4_scale_armv4|
- EXPORT |vertical_band_3_4_scale_armv4|
- EXPORT |horizontal_line_1_2_scale_armv4|
- EXPORT |vertical_band_1_2_scale_armv4|
-
- AREA |.text|, CODE, READONLY ; name this block of code
-
-src RN r0
-srcw RN r1
-dest RN r2
-mask RN r12
-c51_205 RN r10
-c102_154 RN r11
-;/****************************************************************************
-; *
-; * ROUTINE : horizontal_line_4_5_scale_armv4
-; *
-; * INPUTS : const unsigned char *source : Pointer to source data.
-; * unsigned int source_width : Stride of source.
-; * unsigned char *dest : Pointer to destination data.
-; * unsigned int dest_width : Stride of destination (NOT USED).
-; *
-; * OUTPUTS : None.
-; *
-; * RETURNS : void
-; *
-; * FUNCTION : Copies horizontal line of pixels from source to
-; * destination scaling up by 4 to 5.
-; *
-; * SPECIAL NOTES : None.
-; *
-; ****************************************************************************/
-;void horizontal_line_4_5_scale_armv4
-;(
-; r0 = UINT8 *source
-; r1 = UINT32 source_width
-; r2 = UINT8 *dest
-; r3 = UINT32 dest_width
-;)
-|horizontal_line_4_5_scale_armv4| PROC
- stmdb sp!, {r4 - r11, lr}
-
- mov mask, #255 ; mask for selection
- ldr c51_205, =0x3300cd
- ldr c102_154, =0x66009a
-
- ldr r3, [src], #4
-
-hl45_loop
-
- and r4, r3, mask ; a = src[0]
- and r5, mask, r3, lsr #8 ; b = src[1]
- strb r4, [dest], #1
-
- orr r6, r4, r5, lsl #16 ; b | a
- and r7, mask, r3, lsr #16 ; c = src[2]
- mul r6, c51_205, r6 ; a * 51 + 205 * b
-
- orr r5, r5, r7, lsl #16 ; c | b
- mul r5, c102_154, r5 ; b * 102 + 154 * c
- add r6, r6, #0x8000
- and r8, mask, r3, lsr #24 ; d = src[3]
- mov r6, r6, lsr #24
- strb r6, [dest], #1
-
- orr r7, r8, r7, lsl #16 ; c | d
- mul r7, c102_154, r7 ; c * 154 + 102 * d
- add r5, r5, #0x8000
- ldr r3, [src], #4
- mov r5, r5, lsr #24
- strb r5, [dest], #1
-
- add r7, r7, #0x8000
- and r9, mask, r3 ; e = src[4]
- orr r9, r9, r8, lsl #16 ; d | e
- mul r9, c51_205, r9 ; d * 205 + 51 * e
- mov r7, r7, lsr #24
- strb r7, [dest], #1
-
- add r9, r9, #0x8000
- subs srcw, srcw, #4
- mov r9, r9, lsr #24
- strb r9, [dest], #1
-
- bne hl45_loop
-
- and r4, r3, mask
- and r5, mask, r3, lsl #8
- strb r4, [dest], #1
-
- orr r6, r4, r5, lsl #16 ; b | a
- mul r6, c51_205, r6
-
- and r7, mask, r3, lsl #16
- orr r5, r5, r7, lsl #16 ; c | b
- mul r5, c102_154, r5
- add r6, r6, #0x8000
- and r8, mask, r3, lsl #24
- mov r6, r6, lsr #24
- strb r6, [dest], #1
-
- orr r7, r8, r7, lsl #16 ; c | d
- mul r7, c102_154, r7
- add r5, r5, #0x8000
- mov r5, r5, lsr #24
- strb r5, [dest], #1
-
- add r7, r7, #0x8000
- mov r7, r7, lsr #24
- strb r7, [dest], #1
-
- ldrb r3, [src]
- strb r3, [dest], #1
-
- ldmia sp!, {r4 - r11, pc}
- ENDP ;|vp8cx_horizontal_line_4_5_scale_c|
-
-;/****************************************************************************
-; *
-; * ROUTINE : vertical_band_4_5_scale_armv4
-; *
-; * INPUTS : unsigned char *dest : Pointer to destination data.
-; * unsigned int dest_pitch : Stride of destination data.
-; * unsigned int dest_width : Width of destination data.
-; *
-; * OUTPUTS : None.
-; *
-; * RETURNS : void
-; *
-; * FUNCTION : Scales vertical band of pixels by scale 4 to 5. The
-; * height of the band scaled is 4-pixels.
-; *
-; * SPECIAL NOTES : The routine uses the first line of the band below
-; * the current band.
-; *
-; ****************************************************************************/
-;void vertical_band_4_5_scale_armv4
-;(
-; r0 = UINT8 *dest
-; r1 = UINT32 dest_pitch
-; r2 = UINT32 dest_width
-;)
-|vertical_band_4_5_scale_armv4| PROC
- stmdb sp!, {r4 - r11, lr}
-
- ldr c51_205, =0x3300cd
- ldr c102_154, =0x66009a
-
-vl45_loop
- mov r3, src
- ldrb r4, [r3], r1 ; a = des [0]
- ldrb r5, [r3], r1 ; b = des [dest_pitch]
- ldrb r7, [r3], r1 ; c = des[dest_pitch*2]
- add lr, src, r1
-
- orr r6, r4, r5, lsl #16 ; b | a
- mul r6, c51_205, r6 ; a * 51 + 205 * b
-
- ldrb r8, [r3], r1 ; d = des[dest_pitch*3]
- orr r5, r5, r7, lsl #16 ; c | b
- mul r5, c102_154, r5 ; b * 102 + 154 * c
- add r6, r6, #0x8000
- orr r7, r8, r7, lsl #16 ; c | d
- mov r6, r6, lsr #24
- strb r6, [lr], r1
-
- ldrb r9, [r3, r1] ; e = des [dest_pitch * 5]
- mul r7, c102_154, r7 ; c * 154 + 102 * d
- add r5, r5, #0x8000
- orr r9, r9, r8, lsl #16 ; d | e
- mov r5, r5, lsr #24
- strb r5, [lr], r1
-
- mul r9, c51_205, r9 ; d * 205 + 51 * e
- add r7, r7, #0x8000
- add src, src, #1
- mov r7, r7, lsr #24
- strb r7, [lr], r1
-
- add r9, r9, #0x8000
- subs r2, r2, #1
- mov r9, r9, lsr #24
- strb r9, [lr], r1
-
- bne vl45_loop
-
- ldmia sp!, {r4 - r11, pc}
- ENDP ;|vertical_band_4_5_scale_armv4|
-
-;/****************************************************************************
-; *
-; * ROUTINE : horizontal_line_2_3_scale_armv4
-; *
-; * INPUTS : const unsigned char *source : Pointer to source data.
-; * unsigned int source_width : Stride of source.
-; * unsigned char *dest : Pointer to destination data.
-; * unsigned int dest_width : Stride of destination (NOT USED).
-; *
-; * OUTPUTS : None.
-; *
-; * RETURNS : void
-; *
-; * FUNCTION : Copies horizontal line of pixels from source to
-; * destination scaling up by 2 to 3.
-; *
-; * SPECIAL NOTES : None.
-; *
-; *
-; ****************************************************************************/
-;void horizontal_line_2_3_scale_armv4
-;(
-; const unsigned char *source,
-; unsigned int source_width,
-; unsigned char *dest,
-; unsigned int dest_width
-;)
-|horizontal_line_2_3_scale_armv4| PROC
- stmdb sp!, {r4 - r11, lr}
- ldr lr, =85
- ldr r12, =171
-
-hl23_loop
-
- ldrb r3, [src], #1 ; a
- ldrb r4, [src], #1 ; b
- ldrb r5, [src] ; c
-
- strb r3, [dest], #1
- mul r4, r12, r4 ; b * 171
- mla r6, lr, r3, r4 ; a * 85
- mla r7, lr, r5, r4 ; c * 85
-
- add r6, r6, #128
- mov r6, r6, lsr #8
- strb r6, [dest], #1
-
- add r7, r7, #128
- mov r7, r7, lsr #8
- strb r7, [dest], #1
-
- subs srcw, srcw, #2
- bne hl23_loop
-
- ldrb r4, [src, #1] ; b
- strb r5, [dest], #1
- strb r4, [dest, #1]
-
- mul r4, r12, r4 ; b * 171
- mla r6, lr, r5, r4 ; a * 85 + b *171
-
- add r6, r6, #128
- mov r6, r6, lsr #8
- strb r6, [dest]
-
- ldmia sp!, {r4 - r11, pc}
- ENDP ;|horizontal_line_2_3_scale_armv4|
-
-;/****************************************************************************
-; *
-; * ROUTINE : vertical_band_2_3_scale_armv4
-; *
-; * INPUTS : unsigned char *dest : Pointer to destination data.
-; * unsigned int dest_pitch : Stride of destination data.
-; * unsigned int dest_width : Width of destination data.
-; *
-; * OUTPUTS : None.
-; *
-; * RETURNS : void
-; *
-; * FUNCTION : Scales vertical band of pixels by scale 2 to 3. The
-; * height of the band scaled is 2-pixels.
-; *
-; * SPECIAL NOTES : The routine uses the first line of the band below
-; * the current band.
-; *
-; ****************************************************************************/
-;void vertical_band_2_3_scale_armv4
-;(
-; r0 = UINT8 *dest
-; r1 = UINT32 dest_pitch
-; r2 = UINT32 dest_width
-;)
-|vertical_band_2_3_scale_armv4| PROC
- stmdb sp!, {r4 - r8, lr}
- ldr lr, =85
- ldr r12, =171
- add r3, r1, r1, lsl #1 ; 3 * dest_pitch
-
-vl23_loop
- ldrb r4, [src] ; a = des [0]
- ldrb r5, [src, r1] ; b = des [dest_pitch]
- ldrb r7, [src, r3] ; c = des [dest_pitch*3]
- subs r2, r2, #1
-
- mul r5, r12, r5 ; b * 171
- mla r6, lr, r4, r5 ; a * 85
- mla r8, lr, r7, r5 ; c * 85
-
- add r6, r6, #128
- mov r6, r6, lsr #8
- strb r6, [src, r1]
-
- add r8, r8, #128
- mov r8, r8, lsr #8
- strb r8, [src, r1, lsl #1]
-
- add src, src, #1
-
- bne vl23_loop
-
- ldmia sp!, {r4 - r8, pc}
- ENDP ;|vertical_band_2_3_scale_armv4|
-
-;/****************************************************************************
-; *
-; * ROUTINE : vp8cx_horizontal_line_3_5_scale_c
-; *
-; * INPUTS : const unsigned char *source : Pointer to source data.
-; * unsigned int source_width : Stride of source.
-; * unsigned char *dest : Pointer to destination data.
-; * unsigned int dest_width : Stride of destination (NOT USED).
-; *
-; * OUTPUTS : None.
-; *
-; * RETURNS : void
-; *
-; * FUNCTION : Copies horizontal line of pixels from source to
-; * destination scaling up by 3 to 5.
-; *
-; * SPECIAL NOTES : None.
-; *
-; *
-; ****************************************************************************/
-;void vp8cx_horizontal_line_3_5_scale_c
-;(
-; const unsigned char *source,
-; unsigned int source_width,
-; unsigned char *dest,
-; unsigned int dest_width
-;)
-|horizontal_line_3_5_scale_armv4| PROC
- stmdb sp!, {r4 - r11, lr}
-
- ldr c51_205, =0x3300cd
- ldr c102_154, =0x66009a
-
- ldrb r4, [src], #1 ; a = src[0]
-
-hl35_loop
-
- ldrb r8, [src], #1 ; b = src[1]
- strb r4, [dest], #1
-
- orr r6, r4, r8, lsl #16 ; b | a
- ldrb r9, [src], #1 ; c = src[2]
- mul r6, c102_154, r6 ; a * 102 + 154 * b
-
- orr r5, r9, r8, lsl #16 ; b | c
- mul r5, c51_205, r5 ; b * 205 + 51 * c
- add r6, r6, #0x8000
- ldrb r4, [src], #1 ; d = src[3]
- mov r6, r6, lsr #24
- strb r6, [dest], #1
-
- orr r7, r8, r9, lsl #16 ; c | b
- mul r7, c51_205, r7 ; c * 205 + 154 * b
- add r5, r5, #0x8000
- mov r5, r5, lsr #24
- strb r5, [dest], #1
-
- orr r9, r4, r9, lsl #16 ; c | d
- mul r9, c102_154, r9 ; c * 154 + 102 * d
- add r7, r7, #0x8000
- mov r7, r7, lsr #24
- strb r7, [dest], #1
-
- add r9, r9, #0x8000
- subs srcw, srcw, #3
- mov r9, r9, lsr #24
- strb r9, [dest], #1
-
- bpl hl35_loop
-
- ldrb r5, [src], #1 ; b = src[1]
- strb r4, [dest], #1
-
- orr r6, r4, r8, lsl #16 ; b | a
- ldrb r9, [src], #1 ; c = src[2]
- mul r6, c102_154, r6 ; a * 102 + 154 * b
-
- orr r5, r9, r8, lsl #16 ; b | c
- mul r5, c51_205, r5 ; b * 205 + 51 * c
- add r6, r6, #0x8000
- mov r6, r6, lsr #24
- strb r6, [dest], #1
-
- orr r7, r8, r9, lsl #16 ; c | b
- mul r7, c51_205, r7 ; c * 205 + 154 * b
- add r5, r5, #0x8000
- mov r5, r5, lsr #24
- strb r5, [dest], #1
-
- add r7, r7, #0x8000
- mov r7, r7, lsr #24
- strb r7, [dest], #1
- strb r9, [dest], #1
-
- ldmia sp!, {r4 - r11, pc}
- ENDP ;|vp8cx_horizontal_line_3_5_scale_c|
-
-
-;/****************************************************************************
-; *
-; * ROUTINE : vp8cx_vertical_band_3_5_scale_c
-; *
-; * INPUTS : unsigned char *dest : Pointer to destination data.
-; * unsigned int dest_pitch : Stride of destination data.
-; * unsigned int dest_width : Width of destination data.
-; *
-; * OUTPUTS : None.
-; *
-; * RETURNS : void
-; *
-; * FUNCTION : Scales vertical band of pixels by scale 3 to 5. The
-; * height of the band scaled is 3-pixels.
-; *
-; * SPECIAL NOTES : The routine uses the first line of the band below
-; * the current band.
-; *
-; ****************************************************************************/
-;void vertical_band_4_5_scale_armv4
-;(
-; r0 = UINT8 *dest
-; r1 = UINT32 dest_pitch
-; r2 = UINT32 dest_width
-;)
-|vertical_band_3_5_scale_armv4| PROC
- stmdb sp!, {r4 - r11, lr}
-
- ldr c51_205, =0x3300cd
- ldr c102_154, =0x66009a
-
-vl35_loop
- mov r3, src
- ldrb r4, [r3], r1 ; a = des [0]
- ldrb r5, [r3], r1 ; b = des [dest_pitch]
- ldrb r7, [r3], r1 ; c = des[dest_pitch*2]
- add lr, src, r1
-
- orr r8, r4, r5, lsl #16 ; b | a
- mul r6, c102_154, r8 ; a * 102 + 154 * b
-
- ldrb r8, [r3, r1, lsl #1] ; d = des[dest_pitch*5]
- orr r3, r7, r5, lsl #16 ; b | c
- mul r9, c51_205, r3 ; b * 205 + 51 * c
- add r6, r6, #0x8000
- orr r3, r5, r7, lsl #16 ; c | b
- mov r6, r6, lsr #24
- strb r6, [lr], r1
-
- mul r5, c51_205, r3 ; c * 205 + 154 * b
- add r9, r9, #0x8000
- orr r3, r8, r7, lsl #16 ; c | d
- mov r9, r9, lsr #24
- strb r9, [lr], r1
-
- mul r7, c102_154, r3 ; c * 154 + 102 * d
- add r5, r5, #0x8000
- add src, src, #1
- mov r5, r5, lsr #24
- strb r5, [lr], r1
-
- add r7, r7, #0x8000
- subs r2, r2, #1
- mov r7, r7, lsr #24
- strb r7, [lr], r1
-
-
- bne vl35_loop
-
- ldmia sp!, {r4 - r11, pc}
- ENDP ;|vertical_band_3_5_scale_armv4|
-
-;/****************************************************************************
-; *
-; * ROUTINE : horizontal_line_3_4_scale_armv4
-; *
-; * INPUTS : const unsigned char *source : Pointer to source data.
-; * unsigned int source_width : Stride of source.
-; * unsigned char *dest : Pointer to destination data.
-; * unsigned int dest_width : Stride of destination (NOT USED).
-; *
-; * OUTPUTS : None.
-; *
-; * RETURNS : void
-; *
-; * FUNCTION : Copies horizontal line of pixels from source to
-; * destination scaling up by 3 to 4.
-; *
-; * SPECIAL NOTES : None.
-; *
-; *
-; ****************************************************************************/
-;void horizontal_line_3_4_scale_armv4
-;(
-; const unsigned char *source,
-; unsigned int source_width,
-; unsigned char *dest,
-; unsigned int dest_width
-;)
-|horizontal_line_3_4_scale_armv4| PROC
- stmdb sp!, {r4 - r11, lr}
-
- ldr r10, =64
- ldr r11, =192
- mov r9, #128
-
- ldrb r4, [src], #1 ; a = src[0]
-
-hl34_loop
-
- ldrb r8, [src], #1 ; b = src[1]
- ldrb r7, [src], #1 ; c = src[2]
- strb r4, [dest], #1
-
- mla r4, r10, r4, r9 ; a*64 + 128
- mla r4, r11, r8, r4 ; a*64 + b*192 + 1
-
- add r8, r8, #1 ; b + 1
- add r8, r8, r7 ; b + c + 1
- mov r8, r8, asr #1 ; (b + c + 1) >> 1
-
- mov r4, r4, asr #8 ; (a*64 + b*192 + 1) >> 8
- strb r4, [dest], #1
-
- strb r8, [dest], #1
-
- ldrb r4, [src], #1 ; [a+1]
-
- mla r7, r11, r7, r9 ; c*192 + 128
- mla r7, r4, r10, r7 ; a*64 + b*192 + 128
-
- subs srcw, srcw, #3
-
- mov r7, r7, asr #8 ; (a*64 + b*192 + 128) >> 8
- strb r7, [dest], #1
-
- bpl hl34_loop
-
- ldrb r8, [src], #1 ; b = src[1]
- ldrb r7, [src], #1 ; c = src[2]
- strb r4, [dest], #1
-
- mla r4, r10, r4, r9 ; a*64 + 128
- mla r4, r11, r8, r4 ; a*64 + b*192 + 1
- mov r4, r4, asr #8 ; (a*64 + b*192 + 1) >> 8
- strb r4, [dest], #1
-
- add r8, r8, #1 ; b + 1
- add r8, r8, r7 ; b + c + 1
- mov r8, r8, asr #1 ; (b + c + 1) >> 1
- strb r8, [dest], #1
- strb r7, [dest], #1
-
- ldmia sp!, {r4 - r11, pc}
- ENDP ;|vp8cx_horizontal_line_3_4_scale_c|
-
-
-;/****************************************************************************
-; *
-; * ROUTINE : vertical_band_3_4_scale_armv4
-; *
-; * INPUTS : unsigned char *dest : Pointer to destination data.
-; * unsigned int dest_pitch : Stride of destination data.
-; * unsigned int dest_width : Width of destination data.
-; *
-; * OUTPUTS : None.
-; *
-; * RETURNS : void
-; *
-; * FUNCTION : Scales vertical band of pixels by scale 3 to 4. The
-; * height of the band scaled is 3-pixels.
-; *
-; * SPECIAL NOTES : The routine uses the first line of the band below
-; * the current band.
-; *
-; ****************************************************************************/
-;void vertical_band_3_4_scale_armv4
-;(
-; r0 = UINT8 *dest
-; r1 = UINT32 dest_pitch
-; r2 = UINT32 dest_width
-;)
-|vertical_band_3_4_scale_armv4| PROC
- stmdb sp!, {r4 - r11, lr}
-
- ldr r10, =64
- ldr r11, =192
- mov r9, #128
-
-; ldr r1,[r1]
-vl34_loop
- mov r3, src
- ldrb r4, [r3], r1 ; a = des [0]
- ldrb r5, [r3], r1 ; b = des [dest_pitch]
- ldrb r7, [r3], r1 ; c = des [dest_pitch*2]
- add lr, src, r1
-
- mla r4, r10, r4, r9 ; a*64 + 128
- mla r4, r11, r5, r4 ; a*64 + b*192 + 1
-
- add r5, r5, #1 ; b + 1
- add r5, r5, r7 ; b + c + 1
- mov r5, r5, asr #1 ; (b + c + 1) >> 1
-
- mov r4, r4, asr #8 ; (a*64 + b*192 + 1) >> 8
- strb r4, [lr], r1
-
- ldrb r4, [r3, r1] ; a = des [dest_pitch*4]
-
- strb r5, [lr], r1
-
- mla r7, r11, r7, r9 ; c*192 + 128
- mla r7, r4, r10, r7 ; a*64 + b*192 + 128
- mov r7, r7, asr #8 ; (a*64 + b*192 + 128) >> 8
-
- add src, src, #1
- subs r2, r2, #1
-
- strb r7, [lr]
-
- bne vl34_loop
-
- ldmia sp!, {r4 - r11, pc}
- ENDP ;|vertical_band_3_4_scale_armv4|
-
-;/****************************************************************************
-; *
-; * ROUTINE : vp8cx_horizontal_line_1_2_scale_c
-; *
-; * INPUTS : const unsigned char *source : Pointer to source data.
-; * unsigned int source_width : Stride of source.
-; * unsigned char *dest : Pointer to destination data.
-; * unsigned int dest_width : Stride of destination (NOT USED).
-; *
-; * OUTPUTS : None.
-; *
-; * RETURNS : void
-; *
-; * FUNCTION : Copies horizontal line of pixels from source to
-; * destination scaling up by 1 to 2.
-; *
-; * SPECIAL NOTES : None.
-; *
-; ****************************************************************************/
-;void vp8cx_horizontal_line_1_2_scale_c
-;(
-; const unsigned char *source,
-; unsigned int source_width,
-; unsigned char *dest,
-; unsigned int dest_width
-;)
-|horizontal_line_1_2_scale_armv4| PROC
- stmdb sp!, {r4 - r5, lr}
-
- sub srcw, srcw, #1
-
- ldrb r3, [src], #1
- ldrb r4, [src], #1
-hl12_loop
- subs srcw, srcw, #1
-
- add r5, r3, r4
- add r5, r5, #1
- mov r5, r5, lsr #1
-
- orr r5, r3, r5, lsl #8
- strh r5, [dest], #2
-
- mov r3, r4
-
- ldrneb r4, [src], #1
- bne hl12_loop
-
- orr r5, r4, r4, lsl #8
- strh r5, [dest]
-
- ldmia sp!, {r4 - r5, pc}
- ENDP ;|vertical_band_3_5_scale_armv4|
-
-;/****************************************************************************
-; *
-; * ROUTINE : vp8cx_vertical_band_1_2_scale_c
-; *
-; * INPUTS : unsigned char *dest : Pointer to destination data.
-; * unsigned int dest_pitch : Stride of destination data.
-; * unsigned int dest_width : Width of destination data.
-; *
-; * OUTPUTS : None.
-; *
-; * RETURNS : void
-; *
-; * FUNCTION : Scales vertical band of pixels by scale 1 to 2. The
-; * height of the band scaled is 1-pixel.
-; *
-; * SPECIAL NOTES : The routine uses the first line of the band below
-; * the current band.
-; *
-; ****************************************************************************/
-;void vp8cx_vertical_band_1_2_scale_c
-;(
-; r0 = UINT8 *dest
-; r1 = UINT32 dest_pitch
-; r2 = UINT32 dest_width
-;)
-|vertical_band_1_2_scale_armv4| PROC
- stmdb sp!, {r4 - r7, lr}
-
- ldr mask, =0xff00ff ; mask for selection
- ldr lr, = 0x010001
-
-vl12_loop
- mov r3, src
- ldr r4, [r3], r1
- ldr r5, [r3, r1]
-
- add src, src, #4
- subs r2, r2, #4
-
- and r6, r4, mask
- and r7, r5, mask
-
- add r6, r7, r6
- add r6, r6, lr
-
- and r4, mask, r4, lsr #8
- and r5, mask, r5, lsr #8
-
- mov r6, r6, lsr #1
- and r6, r6, mask
-
- add r4, r5, r4
- add r4, r4, lr
-
- mov r4, r4, lsr #1
- and r4, r4, mask
-
- orr r5, r6, r4, lsl #8
-
- str r5, [r3]
-
- bpl vl12_loop
-
- ldmia sp!, {r4 - r7, pc}
- ENDP ;|vertical_band_3_5_scale_armv4|
-
- END
diff --git a/vpx_scale/generic/vpxscale.c b/vpx_scale/generic/vpxscale.c
index 13c9122f0..29b130876 100644
--- a/vpx_scale/generic/vpxscale.c
+++ b/vpx_scale/generic/vpxscale.c
@@ -27,7 +27,6 @@
/****************************************************************************
* Exports
****************************************************************************/
-#ifndef VPX_NO_GLOBALS
void (*vp8_vertical_band_4_5_scale)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) = 0;
void (*vp8_last_vertical_band_4_5_scale)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) = 0;
void (*vp8_vertical_band_2_3_scale)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) = 0;
@@ -51,9 +50,6 @@ void (*vp8_vertical_band_2_1_scale_i)(unsigned char *source, unsigned int src_pi
void (*vp8_horizontal_line_2_1_scale)(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width) = 0;
void (*vp8_horizontal_line_5_3_scale)(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width) = 0;
void (*vp8_horizontal_line_5_4_scale)(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width) = 0;
-#else
-# include "vpxscale_nofp.h"
-#endif
typedef struct
{
diff --git a/vpx_scale/include/arm/vpxscale_nofp.h b/vpx_scale/include/arm/vpxscale_nofp.h
deleted file mode 100644
index 3e1a9fa83..000000000
--- a/vpx_scale/include/arm/vpxscale_nofp.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-void vp8cx_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_last_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_last_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_last_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_last_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_horizontal_line_1_2_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vp8cx_horizontal_line_3_4_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vp8cx_horizontal_line_3_5_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vp8cx_horizontal_line_2_3_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vp8cx_horizontal_line_4_5_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vp8cx_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_last_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-
-void vp8cx_vertical_band_5_4_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_vertical_band_5_3_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_vertical_band_2_1_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_vertical_band_2_1_scale_i_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_horizontal_line_2_1_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vp8cx_horizontal_line_5_3_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vp8cx_horizontal_line_5_4_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-
-void horizontal_line_4_5_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void horizontal_line_2_3_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void horizontal_line_3_5_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void horizontal_line_3_4_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void horizontal_line_1_2_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vertical_band_4_5_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vertical_band_2_3_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vertical_band_3_5_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vertical_band_3_4_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vertical_band_1_2_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-
-#define vp8_vertical_band_4_5_scale vertical_band_4_5_scale_armv4
-#define vp8_last_vertical_band_4_5_scale vp8cx_last_vertical_band_4_5_scale_c
-#define vp8_vertical_band_2_3_scale vertical_band_2_3_scale_armv4
-#define vp8_last_vertical_band_2_3_scale vp8cx_last_vertical_band_2_3_scale_c
-#define vp8_vertical_band_3_5_scale vertical_band_3_5_scale_armv4
-#define vp8_last_vertical_band_3_5_scale vp8cx_last_vertical_band_3_5_scale_c
-#define vp8_vertical_band_3_4_scale vertical_band_3_4_scale_armv4
-#define vp8_last_vertical_band_3_4_scale vp8cx_last_vertical_band_3_4_scale_c
-#define vp8_horizontal_line_1_2_scale horizontal_line_1_2_scale_armv4
-#define vp8_horizontal_line_3_5_scale horizontal_line_3_5_scale_armv4
-#define vp8_horizontal_line_3_4_scale horizontal_line_3_4_scale_armv4
-#define vp8_horizontal_line_4_5_scale horizontal_line_4_5_scale_armv4
-#define vp8_horizontal_line_2_3_scale horizontal_line_2_3_scale_armv4
-#define vp8_vertical_band_1_2_scale vertical_band_1_2_scale_armv4
-#define vp8_last_vertical_band_1_2_scale vp8cx_last_vertical_band_1_2_scale_c
-#define vp8_vertical_band_5_4_scale vp8cx_vertical_band_5_4_scale_c
-#define vp8_vertical_band_5_3_scale vp8cx_vertical_band_5_3_scale_c
-#define vp8_vertical_band_2_1_scale vp8cx_vertical_band_2_1_scale_c
-#define vp8_vertical_band_2_1_scale_i vp8cx_vertical_band_2_1_scale_i_c
-#define vp8_horizontal_line_2_1_scale vp8cx_horizontal_line_2_1_scale_c
-#define vp8_horizontal_line_5_3_scale vp8cx_horizontal_line_5_3_scale_c
-#define vp8_horizontal_line_5_4_scale vp8cx_horizontal_line_5_4_scale_c
diff --git a/vpx_scale/include/generic/vpxscale_nofp.h b/vpx_scale/include/generic/vpxscale_nofp.h
deleted file mode 100644
index 7b8205a1b..000000000
--- a/vpx_scale/include/generic/vpxscale_nofp.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-void vp8cx_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_last_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_last_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_last_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_horizontal_line_1_2_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vp8cx_horizontal_line_3_5_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vp8cx_horizontal_line_2_3_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vp8cx_horizontal_line_4_5_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vp8cx_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_last_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-
-void vp8cx_vertical_band_5_4_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_vertical_band_5_3_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_vertical_band_2_1_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_vertical_band_2_1_scale_i_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_horizontal_line_2_1_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vp8cx_horizontal_line_5_3_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vp8cx_horizontal_line_5_4_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-
-#define vp8_vertical_band_4_5_scale vp8cx_vertical_band_4_5_scale_c
-#define vp8_last_vertical_band_4_5_scale vp8cx_last_vertical_band_4_5_scale_c
-#define vp8_vertical_band_2_3_scale vp8cx_vertical_band_2_3_scale_c
-#define vp8_last_vertical_band_2_3_scale vp8cx_last_vertical_band_2_3_scale_c
-#define vp8_vertical_band_3_5_scale vp8cx_vertical_band_3_5_scale_c
-#define vp8_last_vertical_band_3_5_scale vp8cx_last_vertical_band_3_5_scale_c
-#define vp8_horizontal_line_1_2_scale vp8cx_horizontal_line_1_2_scale_c
-#define vp8_horizontal_line_3_5_scale vp8cx_horizontal_line_3_5_scale_c
-#define vp8_horizontal_line_4_5_scale vp8cx_horizontal_line_4_5_scale_c
-#define vp8_horizontal_line_2_3_scale vp8cx_horizontal_line_2_3_scale_c
-#define vp8_vertical_band_1_2_scale vp8cx_vertical_band_1_2_scale_c
-#define vp8_last_vertical_band_1_2_scale vp8cx_last_vertical_band_1_2_scale_c
-#define vp8_vertical_band_5_4_scale vp8cx_vertical_band_5_4_scale_c
-#define vp8_vertical_band_5_3_scale vp8cx_vertical_band_5_3_scale_c
-#define vp8_vertical_band_2_1_scale vp8cx_vertical_band_2_1_scale_c
-#define vp8_vertical_band_2_1_scale_i vp8cx_vertical_band_2_1_scale_i_c
-#define vp8_horizontal_line_2_1_scale vp8cx_horizontal_line_2_1_scale_c
-#define vp8_horizontal_line_5_3_scale vp8cx_horizontal_line_5_3_scale_c
-#define vp8_horizontal_line_5_4_scale vp8cx_horizontal_line_5_4_scale_c
diff --git a/vpx_scale/include/symbian/vpxscale_nofp.h b/vpx_scale/include/symbian/vpxscale_nofp.h
deleted file mode 100644
index 3e1a9fa83..000000000
--- a/vpx_scale/include/symbian/vpxscale_nofp.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-void vp8cx_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_last_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_last_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_last_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_last_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_horizontal_line_1_2_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vp8cx_horizontal_line_3_4_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vp8cx_horizontal_line_3_5_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vp8cx_horizontal_line_2_3_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vp8cx_horizontal_line_4_5_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vp8cx_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_last_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-
-void vp8cx_vertical_band_5_4_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_vertical_band_5_3_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_vertical_band_2_1_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_vertical_band_2_1_scale_i_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vp8cx_horizontal_line_2_1_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vp8cx_horizontal_line_5_3_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vp8cx_horizontal_line_5_4_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-
-void horizontal_line_4_5_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void horizontal_line_2_3_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void horizontal_line_3_5_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void horizontal_line_3_4_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void horizontal_line_1_2_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vertical_band_4_5_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vertical_band_2_3_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vertical_band_3_5_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vertical_band_3_4_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vertical_band_1_2_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-
-#define vp8_vertical_band_4_5_scale vertical_band_4_5_scale_armv4
-#define vp8_last_vertical_band_4_5_scale vp8cx_last_vertical_band_4_5_scale_c
-#define vp8_vertical_band_2_3_scale vertical_band_2_3_scale_armv4
-#define vp8_last_vertical_band_2_3_scale vp8cx_last_vertical_band_2_3_scale_c
-#define vp8_vertical_band_3_5_scale vertical_band_3_5_scale_armv4
-#define vp8_last_vertical_band_3_5_scale vp8cx_last_vertical_band_3_5_scale_c
-#define vp8_vertical_band_3_4_scale vertical_band_3_4_scale_armv4
-#define vp8_last_vertical_band_3_4_scale vp8cx_last_vertical_band_3_4_scale_c
-#define vp8_horizontal_line_1_2_scale horizontal_line_1_2_scale_armv4
-#define vp8_horizontal_line_3_5_scale horizontal_line_3_5_scale_armv4
-#define vp8_horizontal_line_3_4_scale horizontal_line_3_4_scale_armv4
-#define vp8_horizontal_line_4_5_scale horizontal_line_4_5_scale_armv4
-#define vp8_horizontal_line_2_3_scale horizontal_line_2_3_scale_armv4
-#define vp8_vertical_band_1_2_scale vertical_band_1_2_scale_armv4
-#define vp8_last_vertical_band_1_2_scale vp8cx_last_vertical_band_1_2_scale_c
-#define vp8_vertical_band_5_4_scale vp8cx_vertical_band_5_4_scale_c
-#define vp8_vertical_band_5_3_scale vp8cx_vertical_band_5_3_scale_c
-#define vp8_vertical_band_2_1_scale vp8cx_vertical_band_2_1_scale_c
-#define vp8_vertical_band_2_1_scale_i vp8cx_vertical_band_2_1_scale_i_c
-#define vp8_horizontal_line_2_1_scale vp8cx_horizontal_line_2_1_scale_c
-#define vp8_horizontal_line_5_3_scale vp8cx_horizontal_line_5_3_scale_c
-#define vp8_horizontal_line_5_4_scale vp8cx_horizontal_line_5_4_scale_c
diff --git a/vpx_scale/include/vpxscale_nofp.h b/vpx_scale/include/vpxscale_nofp.h
deleted file mode 100644
index a704bd92c..000000000
--- a/vpx_scale/include/vpxscale_nofp.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#if defined(__S60_V20__) || defined(__SYMBIAN32__) && !defined(__WINS__)
-#include "symbian\vpxscale_nofp.h"
-#else
-#include "generic\vpxscale_nofp.h"
-#endif
diff --git a/vpx_scale/vpxscale.h b/vpx_scale/vpxscale.h
index a13a65f57..1a4997c55 100644
--- a/vpx_scale/vpxscale.h
+++ b/vpx_scale/vpxscale.h
@@ -61,19 +61,6 @@ extern void (*vp8_horizontal_line_2_1_scale)(const unsigned char *source, unsign
extern void (*vp8_horizontal_line_5_3_scale)(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
extern void (*vp8_horizontal_line_5_4_scale)(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void horizontal_line_4_5_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void horizontal_line_2_3_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void horizontal_line_3_5_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void horizontal_line_3_4_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void horizontal_line_1_2_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
-void vertical_band_4_5_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vertical_band_2_3_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vertical_band_3_5_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vertical_band_3_4_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-void vertical_band_1_2_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
-
-
-extern void dmachine_specific_config(int mmx_enabled, int xmm_enabled, int wmt_enabled);
extern void vp8_yv12_scale_or_center
(
YV12_BUFFER_CONFIG *src_yuv_config,
diff --git a/vpxdec.c b/vpxdec.c
index 7401101f8..4482f3dc7 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -124,11 +124,13 @@ static const arg_def_t pp_disp_b_modes = ARG_DEF(NULL, "pp-dbg-b-modes", 1,
"Display only selected block modes");
static const arg_def_t pp_disp_mvs = ARG_DEF(NULL, "pp-dbg-mvs", 1,
"Draw only selected motion vectors");
+static const arg_def_t mfqe = ARG_DEF(NULL, "mfqe", 0,
+ "Enable multiframe quality enhancement");
static const arg_def_t *vp8_pp_args[] =
{
&addnoise_level, &deblock, &demacroblock_level, &pp_debug_info,
- &pp_disp_ref_frame, &pp_disp_mb_modes, &pp_disp_b_modes, &pp_disp_mvs,
+ &pp_disp_ref_frame, &pp_disp_mb_modes, &pp_disp_b_modes, &pp_disp_mvs, &mfqe,
NULL
};
#endif
@@ -803,6 +805,11 @@ int main(int argc, const char **argv_)
postproc = 1;
vp8_pp_cfg.post_proc_flag |= VP8_DEBLOCK;
}
+ else if (arg_match(&arg, &mfqe, argi))
+ {
+ postproc = 1;
+ vp8_pp_cfg.post_proc_flag |= VP8_MFQE;
+ }
else if (arg_match(&arg, &pp_debug_info, argi))
{
unsigned int level = arg_parse_uint(&arg);