diff options
56 files changed, 1323 insertions, 1406 deletions
@@ -44,15 +44,9 @@ COMPILING THE APPLICATIONS/LIBRARIES: armv5te-linux-rvct armv5te-linux-gcc - armv5te-symbian-gcc armv6-darwin-gcc armv6-linux-rvct armv6-linux-gcc - armv6-symbian-gcc - iwmmxt-linux-rvct - iwmmxt-linux-gcc - iwmmxt2-linux-rvct - iwmmxt2-linux-gcc armv7-linux-rvct armv7-linux-gcc mips32-linux-gcc diff --git a/build/make/ads2gas.pl b/build/make/ads2gas.pl index cea967f93..c55ed0fe4 100755 --- a/build/make/ads2gas.pl +++ b/build/make/ads2gas.pl @@ -129,11 +129,14 @@ while (<STDIN>) # ARM code s/\sARM/.arm/g; + # eabi_attributes numerical equivalents can be found in the + # "ARM IHI 0045C" document. + # REQUIRE8 Stack is required to be 8-byte aligned - s/\sREQUIRE8/.eabi_attribute Tag_ABI_align_needed, 1/g; + s/\sREQUIRE8/.eabi_attribute 24, 1 \@Tag_ABI_align_needed/g; # PRESERVE8 Stack 8-byte align is preserved - s/\sPRESERVE8/.eabi_attribute Tag_ABI_align_preserved, 1/g; + s/\sPRESERVE8/.eabi_attribute 25, 1 \@Tag_ABI_align_preserved/g; # Use PROC and ENDP to give the symbols a .size directive. # This makes them show up properly in debugging tools like gdb and valgrind. diff --git a/build/make/configure.sh b/build/make/configure.sh index 0426f9220..6039a5066 100755 --- a/build/make/configure.sh +++ b/build/make/configure.sh @@ -603,8 +603,8 @@ process_common_toolchain() { # Enable the architecture family case ${tgt_isa} in - arm*|iwmmxt*) enable arm;; - mips*) enable mips;; + arm*) enable arm;; + mips*) enable mips;; esac # PIC is probably what we want when building shared libs @@ -665,37 +665,25 @@ process_common_toolchain() { # Process ARM architecture variants case ${toolchain} in - arm*|iwmmxt*) - # on arm, isa versions are supersets - enabled armv7a && soft_enable armv7 ### DEBUG - enabled armv7 && soft_enable armv6 - enabled armv7 || enabled armv6 && soft_enable armv5te - enabled armv7 || enabled armv6 && soft_enable fast_unaligned - enabled iwmmxt2 && soft_enable iwmmxt - enabled iwmmxt && soft_enable armv5te + arm*) + # on arm, isa versions are supersets + enabled armv7a && soft_enable armv7 ### DEBUG + enabled armv7 && soft_enable armv6 + enabled armv7 || enabled armv6 && soft_enable armv5te + enabled armv7 || enabled armv6 && soft_enable fast_unaligned - asm_conversion_cmd="cat" + asm_conversion_cmd="cat" case ${tgt_cc} in gcc) - if enabled iwmmxt || enabled iwmmxt2 - then - CROSS=${CROSS:-arm-iwmmxt-linux-gnueabi-} - elif enabled symbian; then - CROSS=${CROSS:-arm-none-symbianelf-} - else - CROSS=${CROSS:-arm-none-linux-gnueabi-} - fi + CROSS=${CROSS:-arm-none-linux-gnueabi-} link_with_cc=gcc setup_gnu_toolchain arch_int=${tgt_isa##armv} arch_int=${arch_int%%te} check_add_asflags --defsym ARCHITECTURE=${arch_int} tune_cflags="-mtune=" - if enabled iwmmxt || enabled iwmmxt2 - then - check_add_asflags -mcpu=${tgt_isa} - elif enabled armv7 + if enabled armv7 then check_add_cflags -march=armv7-a -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp #-ftree-vectorize check_add_asflags -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp #-march=armv7-a @@ -802,19 +790,6 @@ process_common_toolchain() { fi ;; - symbian*) - enable symbian - # Add the paths for the alternate libc - for d in include/libc; do - try_dir="${alt_libc}/${d}" - [ -d "${try_dir}" ] && add_cflags -I"${try_dir}" - done - for d in release/armv5/urel; do - try_dir="${alt_libc}/${d}" - [ -d "${try_dir}" ] && add_ldflags -L"${try_dir}" - done - add_cflags -DIMPORT_C= - esac ;; mips*) @@ -83,16 +83,10 @@ EOF all_platforms="${all_platforms} armv5te-linux-rvct" all_platforms="${all_platforms} armv5te-linux-gcc" all_platforms="${all_platforms} armv5te-none-rvct" -all_platforms="${all_platforms} armv5te-symbian-gcc" all_platforms="${all_platforms} armv6-darwin-gcc" all_platforms="${all_platforms} armv6-linux-rvct" all_platforms="${all_platforms} armv6-linux-gcc" all_platforms="${all_platforms} armv6-none-rvct" -all_platforms="${all_platforms} armv6-symbian-gcc" -all_platforms="${all_platforms} iwmmxt-linux-rvct" -all_platforms="${all_platforms} iwmmxt-linux-gcc" -all_platforms="${all_platforms} iwmmxt2-linux-rvct" -all_platforms="${all_platforms} iwmmxt2-linux-gcc" all_platforms="${all_platforms} armv7-darwin-gcc" #neon Cortex-A8 all_platforms="${all_platforms} armv7-linux-rvct" #neon Cortex-A8 all_platforms="${all_platforms} armv7-linux-gcc" #neon Cortex-A8 @@ -198,8 +192,6 @@ ARCH_EXT_LIST=" armv5te armv6 armv7 - iwmmxt - iwmmxt2 mips32 diff --git a/examples/postproc.txt b/examples/postproc.txt index 0940ea24c..51b251a04 100644 --- a/examples/postproc.txt +++ b/examples/postproc.txt @@ -58,7 +58,7 @@ if(frame_cnt%30 == 1) { if(vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp)) die_codec(&codec, "Failed to turn off postproc"); } else if(frame_cnt%30 == 16) { - vp8_postproc_cfg_t pp = {VP8_DEBLOCK | VP8_DEMACROBLOCK, 4, 0}; + vp8_postproc_cfg_t pp = {VP8_DEBLOCK | VP8_DEMACROBLOCK | VP8_MFQE, 4, 0}; if(vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp)) die_codec(&codec, "Failed to turn on postproc"); diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c index 97a3559a4..b606aaca0 100644 --- a/vp8/common/alloccommon.c +++ b/vp8/common/alloccommon.c @@ -43,6 +43,8 @@ void vp8_de_alloc_frame_buffers(VP8_COMMON *oci) vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame); vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer); + if (oci->post_proc_buffer_int_used) + vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int); vpx_free(oci->above_context); vpx_free(oci->mip); @@ -101,6 +103,8 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) return 1; } + oci->post_proc_buffer_int_used = 0; + oci->mb_rows = height >> 4; oci->mb_cols = width >> 4; oci->MBs = oci->mb_rows * oci->mb_cols; diff --git a/vp8/common/arm/arm_systemdependent.c b/vp8/common/arm/arm_systemdependent.c index 1e2467411..89a2be825 100644 --- a/vp8/common/arm/arm_systemdependent.c +++ b/vp8/common/arm/arm_systemdependent.c @@ -11,7 +11,6 @@ #include "vpx_config.h" #include "vpx_ports/arm.h" -#include "vp8/common/g_common.h" #include "vp8/common/pragmas.h" #include "vp8/common/subpixel.h" #include "vp8/common/loopfilter.h" diff --git a/vp8/common/arm/dequantize_arm.c b/vp8/common/arm/dequantize_arm.c index 20a8ac4fc..7cf4bf943 100644 --- a/vp8/common/arm/dequantize_arm.c +++ b/vp8/common/arm/dequantize_arm.c @@ -23,22 +23,20 @@ extern void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ); #if HAVE_ARMV7 -void vp8_dequantize_b_neon(BLOCKD *d) +void vp8_dequantize_b_neon(BLOCKD *d, short *DQC) { short *DQ = d->dqcoeff; short *Q = d->qcoeff; - short *DQC = d->dequant; vp8_dequantize_b_loop_neon(Q, DQC, DQ); } #endif #if HAVE_ARMV6 -void vp8_dequantize_b_v6(BLOCKD *d) +void vp8_dequantize_b_v6(BLOCKD *d, short *DQC) { short *DQ = d->dqcoeff; short *Q = d->qcoeff; - short *DQC = d->dequant; vp8_dequantize_b_loop_v6(Q, DQC, DQ); } diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h index 99b731c78..b237206e6 100644 --- a/vp8/common/blockd.h +++ b/vp8/common/blockd.h @@ -184,7 +184,6 @@ typedef struct short *qcoeff; short *dqcoeff; unsigned char *predictor; - short *diff; short *dequant; /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */ @@ -203,12 +202,16 @@ typedef struct typedef struct MacroBlockD { - DECLARE_ALIGNED(16, short, diff[400]); /* from idct diff */ DECLARE_ALIGNED(16, unsigned char, predictor[384]); DECLARE_ALIGNED(16, short, qcoeff[400]); DECLARE_ALIGNED(16, short, dqcoeff[400]); DECLARE_ALIGNED(16, char, eobs[25]); + DECLARE_ALIGNED(16, short, dequant_y1[16]); + DECLARE_ALIGNED(16, short, dequant_y1_dc[16]); + DECLARE_ALIGNED(16, short, dequant_y2[16]); + DECLARE_ALIGNED(16, short, dequant_uv[16]); + /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */ BLOCKD block[25]; int fullpixel_mask; diff --git a/vp8/common/dequantize.c b/vp8/common/dequantize.c index 4a48a3192..96245162f 100644 --- a/vp8/common/dequantize.c +++ b/vp8/common/dequantize.c @@ -14,12 +14,11 @@ #include "vp8/common/idct.h" #include "vpx_mem/vpx_mem.h" -void vp8_dequantize_b_c(BLOCKD *d) +void vp8_dequantize_b_c(BLOCKD *d, short *DQC) { int i; short *DQ = d->dqcoeff; short *Q = d->qcoeff; - short *DQC = d->dequant; for (i = 0; i < 16; i++) { diff --git a/vp8/common/dequantize.h b/vp8/common/dequantize.h index f66cf2bac..429359190 100644 --- a/vp8/common/dequantize.h +++ b/vp8/common/dequantize.h @@ -14,7 +14,7 @@ #include "vp8/common/blockd.h" #define prototype_dequant_block(sym) \ - void sym(BLOCKD *x) + void sym(BLOCKD *x, short *DQC) #define prototype_dequant_idct_add(sym) \ void sym(short *input, short *dq, \ diff --git a/vp8/common/g_common.h b/vp8/common/g_common.h deleted file mode 100644 index 5f523980b..000000000 --- a/vp8/common/g_common.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -extern void (*vp8_clear_system_state)(void); -extern void (*vp8_plane_add_noise)(unsigned char *Start, unsigned int Width, unsigned int Height, int Pitch, int DPitch, int q); -extern void (*de_interlace) -( - unsigned char *src_ptr, - unsigned char *dst_ptr, - int Width, - int Height, - int Stride -); diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c index dbf8d6504..01d76206d 100644 --- a/vp8/common/generic/systemdependent.c +++ b/vp8/common/generic/systemdependent.c @@ -10,7 +10,6 @@ #include "vpx_config.h" -#include "vp8/common/g_common.h" #include "vp8/common/subpixel.h" #include "vp8/common/loopfilter.h" #include "vp8/common/recon.h" diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h index 7eec58e26..f49e2e577 100644 --- a/vp8/common/invtrans.h +++ b/vp8/common/invtrans.h @@ -17,6 +17,10 @@ #include "blockd.h" #include "onyxc_int.h" +#if CONFIG_MULTITHREAD +#include "vpx_mem/vpx_mem.h" +#endif + static void eob_adjust(char *eobs, short *diff) { /* eob adjust.... the idct can only skip if both the dc and eob are zero */ @@ -32,9 +36,7 @@ static void eob_adjust(char *eobs, short *diff) static void vp8_inverse_transform_mby(MACROBLOCKD *xd, const VP8_COMMON_RTCD *rtcd) { - short *DQC = xd->block[0].dequant; - /* save the dc dequant constant in case it is overridden */ - short dc_dequant_temp = DQC[0]; + short *DQC = xd->dequant_y1; if (xd->mode_info_context->mbmi.mode != SPLITMV) { @@ -51,15 +53,11 @@ static void vp8_inverse_transform_mby(MACROBLOCKD *xd, } eob_adjust(xd->eobs, xd->qcoeff); - /* override the dc dequant constant */ - DQC[0] = 1; + DQC = xd->dequant_y1_dc; } DEQUANT_INVOKE (&rtcd->dequant, idct_add_y_block) - (xd->qcoeff, xd->block[0].dequant, + (xd->qcoeff, DQC, xd->dst.y_buffer, xd->dst.y_stride, xd->eobs); - - /* restore the dc dequant constant */ - DQC[0] = dc_dequant_temp; } #endif diff --git a/vp8/common/mbpitch.c b/vp8/common/mbpitch.c index 11fa3ffa7..f8971d754 100644 --- a/vp8/common/mbpitch.c +++ b/vp8/common/mbpitch.c @@ -87,7 +87,6 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x) { for (c = 0; c < 4; c++) { - x->block[r*4+c].diff = &x->diff[r * 4 * 16 + c * 4]; x->block[r*4+c].predictor = x->predictor + r * 4 * 16 + c * 4; } } @@ -96,7 +95,6 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x) { for (c = 0; c < 2; c++) { - x->block[16+r*2+c].diff = &x->diff[256 + r * 4 * 8 + c * 4]; x->block[16+r*2+c].predictor = x->predictor + 256 + r * 4 * 8 + c * 4; } @@ -106,14 +104,11 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x) { for (c = 0; c < 2; c++) { - x->block[20+r*2+c].diff = &x->diff[320+ r * 4 * 8 + c * 4]; x->block[20+r*2+c].predictor = x->predictor + 320 + r * 4 * 8 + c * 4; } } - x->block[24].diff = &x->diff[384]; - for (r = 0; r < 25; r++) { x->block[r].qcoeff = x->qcoeff + r * 16; diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h index d17a32b82..eb7d5458d 100644 --- a/vp8/common/onyx.h +++ b/vp8/common/onyx.h @@ -147,10 +147,14 @@ extern "C" int over_shoot_pct; // buffering parameters - int64_t starting_buffer_level; // in seconds + int64_t starting_buffer_level; // in bytes int64_t optimal_buffer_level; int64_t maximum_buffer_size; + int64_t starting_buffer_level_in_ms; // in milli-seconds + int64_t optimal_buffer_level_in_ms; + int64_t maximum_buffer_size_in_ms; + // controlling quality int fixed_q; int worst_allowed_q; diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h index f733ff774..f91383de8 100644 --- a/vp8/common/onyxc_int.h +++ b/vp8/common/onyxc_int.h @@ -93,9 +93,9 @@ typedef struct VP8Common { struct vpx_internal_error_info error; - DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][2]); + DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][2]); + DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][2]); int Width; int Height; @@ -114,6 +114,8 @@ typedef struct VP8Common YV12_BUFFER_CONFIG post_proc_buffer; YV12_BUFFER_CONFIG temp_scale_frame; + YV12_BUFFER_CONFIG post_proc_buffer_int; + int post_proc_buffer_int_used; FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */ FRAME_TYPE frame_type; diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c index ace4c113c..cb81cb52a 100644 --- a/vp8/common/postproc.c +++ b/vp8/common/postproc.c @@ -12,9 +12,12 @@ #include "vpx_config.h" #include "vpx_scale/yv12config.h" #include "postproc.h" +#include "common.h" +#include "recon.h" #include "vpx_scale/yv12extend.h" #include "vpx_scale/vpxscale.h" #include "systemdependent.h" +#include "../encoder/variance.h" #include <math.h> #include <stdlib.h> @@ -26,6 +29,7 @@ ( (0.439*(float)(t>>16)) - (0.368*(float)(t>>8&0xff)) - (0.071*(float)(t&0xff)) + 128) /* global constants */ +#define MFQE_PRECISION 4 #if CONFIG_POSTPROC_VISUALIZER static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] = { @@ -121,7 +125,6 @@ const short vp8_rv[] = 0, 9, 5, 5, 11, 10, 13, 9, 10, 13, }; - extern void vp8_blit_text(const char *msg, unsigned char *address, const int pitch); extern void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch); /*********************************************************************************************************** @@ -323,11 +326,11 @@ static void vp8_deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source, } void vp8_deblock(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *post, - int q, - int low_var_thresh, - int flag, - vp8_postproc_rtcd_vtable_t *rtcd) + YV12_BUFFER_CONFIG *post, + int q, + int low_var_thresh, + int flag, + vp8_postproc_rtcd_vtable_t *rtcd) { double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065; int ppl = (int)(level + .5); @@ -671,6 +674,128 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei } } +int vp8_references_buffer( VP8_COMMON *oci, int ref_frame ) +{ + const MODE_INFO *mi = oci->mi; + int mb_row, mb_col; + + for (mb_row = 0; mb_row < oci->mb_rows; mb_row++) + { + for (mb_col = 0; mb_col < oci->mb_cols; mb_col++,mi++) + { + if( mi->mbmi.ref_frame == ref_frame) + return 1; + } + mi++; + } + return 0; + +} + +static void multiframe_quality_enhance_block +( + int blksize, /* Currently only values supported are 16, 8, 4 */ + int qcurr, + int qprev, + unsigned char *y, + unsigned char *u, + unsigned char *v, + int y_stride, + int uv_stride, + unsigned char *yd, + unsigned char *ud, + unsigned char *vd, + int yd_stride, + int uvd_stride +) +{ + static const unsigned char VP8_ZEROS[16]= + { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + }; + int blksizeby2 = blksize >> 1; + int blksizesq = blksize * blksize; + + int i, j; + unsigned char *yp; + unsigned char *ydp; + unsigned char *up; + unsigned char *udp; + unsigned char *vp; + unsigned char *vdp; + + unsigned int act, sse, sad, thr; + if (blksize == 16) + { + act = vp8_variance_var16x16(y, y_stride, VP8_ZEROS, 0, &sse); + sad = vp8_variance_sad16x16(y, y_stride, yd, yd_stride, 0); + } + else if (blksize == 8) + { + act = vp8_variance_var8x8(y, y_stride, VP8_ZEROS, 0, &sse); + sad = vp8_variance_sad8x8(y, y_stride, yd, yd_stride, 0); + } + else + { + act = vp8_variance_var4x4(y, y_stride, VP8_ZEROS, 0, &sse); + sad = vp8_variance_sad4x4(y, y_stride, yd, yd_stride, 0); + } + + thr = 6 * blksizesq + (act >> 3); + if (thr > 12 * blksizesq) thr = 12 * blksizesq; + // These thresholds should be adapted later based on qcurr and qprev + if (sad < thr) + { + static const int roundoff = (1 << (MFQE_PRECISION - 1)); + int ifactor = (sad << MFQE_PRECISION) / thr; + // TODO: SIMD optimize this section + if (ifactor) + { + int icfactor = (1 << MFQE_PRECISION) - ifactor; + for (yp = y, ydp = yd, i = 0; i < blksize; ++i, yp += y_stride, ydp += yd_stride) + { + for (j = 0; j < blksize; ++j) + ydp[j] = (int)((yp[j] * ifactor + ydp[j] * icfactor + roundoff) >> MFQE_PRECISION); + } + for (up = u, udp = ud, i = 0; i < blksizeby2; ++i, up += uv_stride, udp += uvd_stride) + { + for (j = 0; j < blksizeby2; ++j) + udp[j] = (int)((up[j] * ifactor + udp[j] * icfactor + roundoff) >> MFQE_PRECISION); + } + for (vp = v, vdp = vd, i = 0; i < blksizeby2; ++i, vp += uv_stride, vdp += uvd_stride) + { + for (j = 0; j < blksizeby2; ++j) + vdp[j] = (int)((vp[j] * ifactor + vdp[j] * icfactor + roundoff) >> MFQE_PRECISION); + } + } + } + else + { + if (blksize == 16) + { + vp8_recon_copy16x16(y, y_stride, yd, yd_stride); + vp8_recon_copy8x8(u, uv_stride, ud, uvd_stride); + vp8_recon_copy8x8(v, uv_stride, vd, uvd_stride); + } + else if (blksize == 8) + { + vp8_recon_copy8x8(y, y_stride, yd, yd_stride); + for (up = u, udp = ud, i = 0; i < blksizeby2; ++i, up += uv_stride, udp += uvd_stride) + vpx_memcpy(udp, up, blksizeby2); + for (vp = v, vdp = vd, i = 0; i < blksizeby2; ++i, vp += uv_stride, vdp += uvd_stride) + vpx_memcpy(vdp, vp, blksizeby2); + } + else + { + for (yp = y, ydp = yd, i = 0; i < blksize; ++i, yp += y_stride, ydp += yd_stride) + vpx_memcpy(ydp, yp, blksize); + for (up = u, udp = ud, i = 0; i < blksizeby2; ++i, up += uv_stride, udp += uvd_stride) + vpx_memcpy(udp, up, blksizeby2); + for (vp = v, vdp = vd, i = 0; i < blksizeby2; ++i, vp += uv_stride, vdp += uvd_stride) + vpx_memcpy(vdp, vp, blksizeby2); + } + } +} #if CONFIG_RUNTIME_CPU_DETECT #define RTCD_VTABLE(oci) (&(oci)->rtcd.postproc) @@ -678,6 +803,104 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei #define RTCD_VTABLE(oci) NULL #endif +void vp8_multiframe_quality_enhance +( + VP8_COMMON *cm +) +{ + YV12_BUFFER_CONFIG *show = cm->frame_to_show; + YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer; + + FRAME_TYPE frame_type = cm->frame_type; + /* Point at base of Mb MODE_INFO list has motion vectors etc */ + const MODE_INFO *mode_info_context = cm->mi; + int mb_row; + int mb_col; + int qcurr = cm->base_qindex; + int qprev = cm->postproc_state.last_base_qindex; + + unsigned char *y_ptr, *u_ptr, *v_ptr; + unsigned char *yd_ptr, *ud_ptr, *vd_ptr; + + /* Set up the buffer pointers */ + y_ptr = show->y_buffer; + u_ptr = show->u_buffer; + v_ptr = show->v_buffer; + yd_ptr = dest->y_buffer; + ud_ptr = dest->u_buffer; + vd_ptr = dest->v_buffer; + + /* postprocess each macro block */ + for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) + { + for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) + { + /* if motion is high there will likely be no benefit */ + if (((frame_type == INTER_FRAME && + abs(mode_info_context->mbmi.mv.as_mv.row) <= 10 && + abs(mode_info_context->mbmi.mv.as_mv.col) <= 10) || + (frame_type == KEY_FRAME)) && + mode_info_context->mbmi.mode != B_PRED) + { + multiframe_quality_enhance_block(16, + qcurr, + qprev, + y_ptr, + u_ptr, + v_ptr, + show->y_stride, + show->uv_stride, + yd_ptr, + ud_ptr, + vd_ptr, + dest->y_stride, + dest->uv_stride); + } + else if (mode_info_context->mbmi.mode == B_PRED) + { + int i, j; + for (i=0; i<2; ++i) + for (j=0; j<2; ++j) + multiframe_quality_enhance_block(8, + qcurr, + qprev, + y_ptr + 8*(i*show->y_stride+j), + u_ptr + 4*(i*show->uv_stride+j), + v_ptr + 4*(i*show->uv_stride+j), + show->y_stride, + show->uv_stride, + yd_ptr + 8*(i*dest->y_stride+j), + ud_ptr + 4*(i*dest->uv_stride+j), + vd_ptr + 4*(i*dest->uv_stride+j), + dest->y_stride, + dest->uv_stride); + } + else + { + vp8_recon_copy16x16(y_ptr, show->y_stride, yd_ptr, dest->y_stride); + vp8_recon_copy8x8(u_ptr, show->uv_stride, ud_ptr, dest->uv_stride); + vp8_recon_copy8x8(v_ptr, show->uv_stride, vd_ptr, dest->uv_stride); + } + y_ptr += 16; + u_ptr += 8; + v_ptr += 8; + yd_ptr += 16; + ud_ptr += 8; + vd_ptr += 8; + mode_info_context++; /* step to next MB */ + } + + y_ptr += show->y_stride * 16 - 16 * cm->mb_cols; + u_ptr += show->uv_stride * 8 - 8 * cm->mb_cols; + v_ptr += show->uv_stride * 8 - 8 * cm->mb_cols; + yd_ptr += dest->y_stride * 16 - 16 * cm->mb_cols; + ud_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols; + vd_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols; + + mode_info_context++; /* Skip border mb */ + } +} + int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *ppflags) { int q = oci->filter_level * 10 / 6; @@ -699,27 +922,65 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t dest->y_width = oci->Width; dest->y_height = oci->Height; dest->uv_height = dest->y_height / 2; + oci->postproc_state.last_base_qindex = oci->base_qindex; return 0; + } + /* Allocate post_proc_buffer_int if needed */ + if ((flags & VP8D_MFQE) && !oci->post_proc_buffer_int_used) + { + if ((flags & VP8D_DEBLOCK) || (flags & VP8D_DEMACROBLOCK)) + { + if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer_int, oci->Width, oci->Height, VP8BORDERINPIXELS) >= 0) + { + oci->post_proc_buffer_int_used = 1; + } + } } #if ARCH_X86||ARCH_X86_64 vpx_reset_mmx_state(); #endif - if (flags & VP8D_DEMACROBLOCK) + if ((flags & VP8D_MFQE) && + oci->current_video_frame >= 2 && + oci->base_qindex - oci->postproc_state.last_base_qindex >= 10) + { + vp8_multiframe_quality_enhance(oci); + if (((flags & VP8D_DEBLOCK) || (flags & VP8D_DEMACROBLOCK)) && + oci->post_proc_buffer_int_used) + { + vp8_yv12_copy_frame_ptr(&oci->post_proc_buffer, &oci->post_proc_buffer_int); + if (flags & VP8D_DEMACROBLOCK) + { + vp8_deblock_and_de_macro_block(&oci->post_proc_buffer_int, &oci->post_proc_buffer, + q + (deblock_level - 5) * 10, 1, 0, RTCD_VTABLE(oci)); + } + else if (flags & VP8D_DEBLOCK) + { + vp8_deblock(&oci->post_proc_buffer_int, &oci->post_proc_buffer, + q, 1, 0, RTCD_VTABLE(oci)); + } + } + /* Move partially towards the base q of the previous frame */ + oci->postproc_state.last_base_qindex = (3*oci->postproc_state.last_base_qindex + oci->base_qindex)>>2; + } + else if (flags & VP8D_DEMACROBLOCK) { vp8_deblock_and_de_macro_block(oci->frame_to_show, &oci->post_proc_buffer, q + (deblock_level - 5) * 10, 1, 0, RTCD_VTABLE(oci)); + oci->postproc_state.last_base_qindex = oci->base_qindex; } else if (flags & VP8D_DEBLOCK) { vp8_deblock(oci->frame_to_show, &oci->post_proc_buffer, q, 1, 0, RTCD_VTABLE(oci)); + oci->postproc_state.last_base_qindex = oci->base_qindex; } else { vp8_yv12_copy_frame_ptr(oci->frame_to_show, &oci->post_proc_buffer); + oci->postproc_state.last_base_qindex = oci->base_qindex; } if (flags & VP8D_ADDNOISE) diff --git a/vp8/common/postproc.h b/vp8/common/postproc.h index c641b9ca5..d5aaf6216 100644 --- a/vp8/common/postproc.h +++ b/vp8/common/postproc.h @@ -104,6 +104,7 @@ struct postproc_state int last_q; int last_noise; char noise[3072]; + int last_base_qindex; DECLARE_ALIGNED(16, char, blackclamp[16]); DECLARE_ALIGNED(16, char, whiteclamp[16]); DECLARE_ALIGNED(16, char, bothclamp[16]); diff --git a/vp8/common/ppc/systemdependent.c b/vp8/common/ppc/systemdependent.c index 1f5d79068..7046a63e8 100644 --- a/vp8/common/ppc/systemdependent.c +++ b/vp8/common/ppc/systemdependent.c @@ -9,7 +9,6 @@ */ -#include "g_common.h" #include "subpixel.h" #include "loopfilter.h" #include "recon.h" diff --git a/vp8/common/ppflags.h b/vp8/common/ppflags.h index 65b0cab6a..665e21fd9 100644 --- a/vp8/common/ppflags.h +++ b/vp8/common/ppflags.h @@ -23,7 +23,8 @@ enum VP8D_DEBUG_TXT_RATE_INFO = 1<<6, VP8D_DEBUG_DRAW_MV = 1<<7, VP8D_DEBUG_CLR_BLK_MODES = 1<<8, - VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9 + VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9, + VP8D_MFQE = 1<<10 }; typedef struct diff --git a/vp8/common/x86/idct_blk_mmx.c b/vp8/common/x86/idct_blk_mmx.c index 49cebd6f5..8ff483708 100644 --- a/vp8/common/x86/idct_blk_mmx.c +++ b/vp8/common/x86/idct_blk_mmx.c @@ -14,12 +14,12 @@ extern void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q); -void vp8_dequantize_b_mmx(BLOCKD *d) +void vp8_dequantize_b_mmx(BLOCKD *d, short *DQC) { short *sq = (short *) d->qcoeff; short *dq = (short *) d->dqcoeff; - short *q = (short *) d->dequant; - vp8_dequantize_b_impl_mmx(sq, dq, q); + + vp8_dequantize_b_impl_mmx(sq, dq, DQC); } void vp8_dequant_idct_add_y_block_mmx diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm index 86927d9f1..2ad010adb 100644 --- a/vp8/common/x86/loopfilter_sse2.asm +++ b/vp8/common/x86/loopfilter_sse2.asm @@ -1385,52 +1385,54 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): SHADOW_ARGS_TO_STACK 3 SAVE_XMM 7 GET_GOT rbx - push rsi - push rdi ; end prolog - mov rsi, arg(0) ;src_ptr + mov rcx, arg(0) ;src_ptr movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - mov rdx, arg(2) ;blimit - movdqa xmm3, XMMWORD PTR [rdx] - mov rdi, rsi ; rdi points to row +1 for indirect addressing - add rdi, rax + lea rdx, [rcx + rax] neg rax ; calculate mask - movdqa xmm1, [rsi+2*rax] ; p1 - movdqa xmm0, [rdi] ; q1 + movdqa xmm0, [rdx] ; q1 + mov rdx, arg(2) ;blimit + movdqa xmm1, [rcx+2*rax] ; p1 + movdqa xmm2, xmm1 movdqa xmm7, xmm0 - movdqa xmm4, xmm0 + psubusb xmm0, xmm1 ; q1-=p1 - psubusb xmm1, xmm4 ; p1-=q1 + psubusb xmm1, xmm7 ; p1-=q1 por xmm1, xmm0 ; abs(p1-q1) pand xmm1, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw xmm1, 1 ; abs(p1-q1)/2 - movdqa xmm5, [rsi+rax] ; p0 - movdqa xmm4, [rsi] ; q0 + movdqa xmm3, XMMWORD PTR [rdx] + + movdqa xmm5, [rcx+rax] ; p0 + movdqa xmm4, [rcx] ; q0 movdqa xmm0, xmm4 ; q0 movdqa xmm6, xmm5 ; p0 psubusb xmm5, xmm4 ; p0-=q0 psubusb xmm4, xmm6 ; q0-=p0 por xmm5, xmm4 ; abs(p0 - q0) + + movdqa xmm4, [GLOBAL(t80)] + paddusb xmm5, xmm5 ; abs(p0-q0)*2 paddusb xmm5, xmm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit pxor xmm3, xmm3 pcmpeqb xmm5, xmm3 + ; start work on filters - pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values + pxor xmm2, xmm4 ; p1 offset to convert to signed values + pxor xmm7, xmm4 ; q1 offset to convert to signed values psubsb xmm2, xmm7 ; p1 - q1 - pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values + pxor xmm6, xmm4 ; offset to convert to signed values + pxor xmm0, xmm4 ; offset to convert to signed values movdqa xmm3, xmm0 ; q0 psubsb xmm0, xmm6 ; q0 - p0 paddsb xmm2, xmm0 ; p1 - q1 + 1 * (q0 - p0) @@ -1438,42 +1440,36 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): paddsb xmm2, xmm0 ; p1 - q1 + 3 * (q0 - p0) pand xmm5, xmm2 ; mask filter values we don't care about - ; do + 4 side - paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 - - movdqa xmm0, xmm5 ; get a copy of filters - psllw xmm0, 8 ; shift left 8 - psraw xmm0, 3 ; arithmetic shift right 11 - psrlw xmm0, 8 - movdqa xmm1, xmm5 ; get a copy of filters - psraw xmm1, 11 ; arithmetic shift right 11 - psllw xmm1, 8 ; shift left 8 to put it back - - por xmm0, xmm1 ; put the two together to get result + paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 + movdqa xmm0, xmm5 + psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4 - psubsb xmm3, xmm0 ; q0-= q0 add - pxor xmm3, [GLOBAL(t80)] ; unoffset - movdqa [rsi], xmm3 ; write back + movdqa xmm1, [GLOBAL(te0)] + movdqa xmm2, [GLOBAL(t1f)] - ; now do +3 side - psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4 + pxor xmm7, xmm7 + pcmpgtb xmm7, xmm0 ;save sign + pand xmm7, xmm1 ;preserve the upper 3 bits + psrlw xmm0, 3 + pand xmm0, xmm2 ;clear out upper 3 bits + por xmm0, xmm7 ;add sign + psubsb xmm3, xmm0 ; q0-= q0sz add - movdqa xmm0, xmm5 ; get a copy of filters - psllw xmm0, 8 ; shift left 8 - psraw xmm0, 3 ; arithmetic shift right 11 - psrlw xmm0, 8 - psraw xmm5, 11 ; arithmetic shift right 11 - psllw xmm5, 8 ; shift left 8 to put it back - por xmm0, xmm5 ; put the two together to get result + pxor xmm7, xmm7 + pcmpgtb xmm7, xmm5 ;save sign + pand xmm7, xmm1 ;preserve the upper 3 bits + psrlw xmm5, 3 + pand xmm5, xmm2 ;clear out upper 3 bits + por xmm5, xmm7 ;add sign + paddsb xmm6, xmm5 ; p0+= p0 add + pxor xmm3, xmm4 ; unoffset + movdqa [rcx], xmm3 ; write back - paddsb xmm6, xmm0 ; p0+= p0 add - pxor xmm6, [GLOBAL(t80)] ; unoffset - movdqa [rsi+rax], xmm6 ; write back + pxor xmm6, xmm4 ; unoffset + movdqa [rcx+rax], xmm6 ; write back ; begin epilog - pop rdi - pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS @@ -1536,9 +1532,6 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): punpckldq xmm0, xmm1 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00 punpckhdq xmm2, xmm1 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 - movdqa t0, xmm0 ; save to t0 - movdqa t1, xmm2 ; save to t1 - lea rsi, [rsi + rax*8] lea rdi, [rsi + rax] lea rdx, [rsi + rax*4] @@ -1551,26 +1544,24 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): punpckldq xmm4, xmm1 ; c3 c2 c1 c0 83 82 81 80 punpckldq xmm6, xmm3 ; d3 d2 d1 d0 93 92 91 90 - movd xmm0, [rsi + rax*2] ; a3 a2 a1 a0 + movd xmm1, [rsi + rax*2] ; a3 a2 a1 a0 movd xmm5, [rdx + rax*2] ; e3 e2 e1 e0 - movd xmm2, [rdi + rax*2] ; b3 b2 b1 b0 + movd xmm3, [rdi + rax*2] ; b3 b2 b1 b0 movd xmm7, [rcx + rax*2] ; f3 f2 f1 f0 - punpckldq xmm0, xmm5 ; e3 e2 e1 e0 a3 a2 a1 a0 - punpckldq xmm2, xmm7 ; f3 f2 f1 f0 b3 b2 b1 b0 + punpckldq xmm1, xmm5 ; e3 e2 e1 e0 a3 a2 a1 a0 + punpckldq xmm3, xmm7 ; f3 f2 f1 f0 b3 b2 b1 b0 punpcklbw xmm4, xmm6 ; d3 c3 d2 c2 d1 c1 d0 c0 93 83 92 82 91 81 90 80 - punpcklbw xmm0, xmm2 ; f3 e3 f2 e2 f1 e1 f0 e0 b3 a3 b2 a2 b1 a1 b0 a0 + punpcklbw xmm1, xmm3 ; f3 e3 f2 e2 f1 e1 f0 e0 b3 a3 b2 a2 b1 a1 b0 a0 - movdqa xmm1, xmm4 - punpcklwd xmm4, xmm0 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80 - punpckhwd xmm1, xmm0 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 + movdqa xmm7, xmm4 + punpcklwd xmm4, xmm1 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80 + punpckhwd xmm7, xmm1 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 movdqa xmm6, xmm4 - punpckldq xmm4, xmm1 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80 - punpckhdq xmm6, xmm1 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82 + punpckldq xmm4, xmm7 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80 + punpckhdq xmm6, xmm7 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82 - movdqa xmm0, t0 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00 - movdqa xmm2, t1 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 movdqa xmm1, xmm0 movdqa xmm3, xmm2 @@ -1579,6 +1570,8 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): punpcklqdq xmm2, xmm6 ; q0 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 punpckhqdq xmm3, xmm6 ; q1 f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 + mov rdx, arg(2) ;blimit + ; calculate mask movdqa xmm6, xmm0 ; p1 movdqa xmm7, xmm3 ; q1 @@ -1588,6 +1581,8 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): pand xmm6, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw xmm6, 1 ; abs(p1-q1)/2 + movdqa xmm7, [rdx] + movdqa xmm5, xmm1 ; p0 movdqa xmm4, xmm2 ; q0 psubusb xmm5, xmm2 ; p0-=q0 @@ -1596,8 +1591,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): paddusb xmm5, xmm5 ; abs(p0-q0)*2 paddusb xmm5, xmm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - mov rdx, arg(2) ;blimit - movdqa xmm7, XMMWORD PTR [rdx] + movdqa xmm4, [GLOBAL(t80)] psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit pxor xmm7, xmm7 @@ -1607,59 +1601,48 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): movdqa t0, xmm0 movdqa t1, xmm3 - pxor xmm0, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor xmm3, [GLOBAL(t80)] ; q1 offset to convert to signed values - + pxor xmm0, xmm4 ; p1 offset to convert to signed values + pxor xmm3, xmm4 ; q1 offset to convert to signed values psubsb xmm0, xmm3 ; p1 - q1 - movdqa xmm6, xmm1 ; p0 - - movdqa xmm7, xmm2 ; q0 - pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor xmm7, [GLOBAL(t80)] ; offset to convert to signed values - movdqa xmm3, xmm7 ; offseted ; q0 - - psubsb xmm7, xmm6 ; q0 - p0 - paddsb xmm0, xmm7 ; p1 - q1 + 1 * (q0 - p0) + movdqa xmm6, xmm1 ; p0 +; movdqa xmm7, xmm2 ; q0 - paddsb xmm0, xmm7 ; p1 - q1 + 2 * (q0 - p0) - paddsb xmm0, xmm7 ; p1 - q1 + 3 * (q0 - p0) + pxor xmm6, xmm4 ; offset to convert to signed values + pxor xmm2, xmm4 ; offset to convert to signed values + movdqa xmm3, xmm2 ; offseted ; q0 + psubsb xmm2, xmm6 ; q0 - p0 + paddsb xmm0, xmm2 ; p1 - q1 + 1 * (q0 - p0) + paddsb xmm0, xmm2 ; p1 - q1 + 2 * (q0 - p0) + paddsb xmm0, xmm2 ; p1 - q1 + 3 * (q0 - p0) pand xmm5, xmm0 ; mask filter values we don't care about - paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 - - movdqa xmm0, xmm5 ; get a copy of filters - psllw xmm0, 8 ; shift left 8 - - psraw xmm0, 3 ; arithmetic shift right 11 - psrlw xmm0, 8 - - movdqa xmm7, xmm5 ; get a copy of filters - psraw xmm7, 11 ; arithmetic shift right 11 - - psllw xmm7, 8 ; shift left 8 to put it back - por xmm0, xmm7 ; put the two together to get result - - psubsb xmm3, xmm0 ; q0-= q0sz add - pxor xmm3, [GLOBAL(t80)] ; unoffset q0 - - ; now do +3 side + movdqa xmm0, xmm5 psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4 - movdqa xmm0, xmm5 ; get a copy of filters - psllw xmm0, 8 ; shift left 8 - psraw xmm0, 3 ; arithmetic shift right 11 + movdqa xmm1, [GLOBAL(te0)] + movdqa xmm2, [GLOBAL(t1f)] - psrlw xmm0, 8 - psraw xmm5, 11 ; arithmetic shift right 11 + pxor xmm7, xmm7 + pcmpgtb xmm7, xmm0 ;save sign + pand xmm7, xmm1 ;preserve the upper 3 bits + psrlw xmm0, 3 + pand xmm0, xmm2 ;clear out upper 3 bits + por xmm0, xmm7 ;add sign + psubsb xmm3, xmm0 ; q0-= q0sz add - psllw xmm5, 8 ; shift left 8 to put it back - por xmm0, xmm5 ; put the two together to get result + pxor xmm7, xmm7 + pcmpgtb xmm7, xmm5 ;save sign + pand xmm7, xmm1 ;preserve the upper 3 bits + psrlw xmm5, 3 + pand xmm5, xmm2 ;clear out upper 3 bits + por xmm5, xmm7 ;add sign + paddsb xmm6, xmm5 ; p0+= p0 add - paddsb xmm6, xmm0 ; p0+= p0 add - pxor xmm6, [GLOBAL(t80)] ; unoffset p0 + pxor xmm3, xmm4 ; unoffset q0 + pxor xmm6, xmm4 ; unoffset p0 movdqa xmm0, t0 ; p1 movdqa xmm4, t1 ; q1 @@ -1763,3 +1746,9 @@ s9: align 16 s63: times 8 dw 0x003f +align 16 +te0: + times 16 db 0xe0 +align 16 +t1f: + times 16 db 0x1f diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm index a82c1b4fd..4b68ef5f2 100644 --- a/vp8/common/x86/recon_sse2.asm +++ b/vp8/common/x86/recon_sse2.asm @@ -559,12 +559,492 @@ sym(vp8_intra_pred_uv_ho_%1): vp8_intra_pred_uv_ho mmx2 vp8_intra_pred_uv_ho ssse3 +;void vp8_intra_pred_y_dc_sse2( +; unsigned char *dst, +; int dst_stride +; unsigned char *src, +; int src_stride, +; ) +global sym(vp8_intra_pred_y_dc_sse2) +sym(vp8_intra_pred_y_dc_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + push rsi + push rdi + ; end prolog + + ; from top + mov rsi, arg(2) ;src; + movsxd rax, dword ptr arg(3) ;src_stride; + sub rsi, rax + pxor xmm0, xmm0 + movdqa xmm1, [rsi] + psadbw xmm1, xmm0 + movq xmm2, xmm1 + punpckhqdq xmm1, xmm1 + paddw xmm1, xmm2 + + ; from left + dec rsi + lea rdi, [rax*3] + movzx ecx, byte [rsi+rax] + movzx edx, byte [rsi+rax*2] + add ecx, edx + movzx edx, byte [rsi+rdi] + add ecx, edx + lea rsi, [rsi+rax*4] + movzx edx, byte [rsi] + add ecx, edx + movzx edx, byte [rsi+rax] + add ecx, edx + movzx edx, byte [rsi+rax*2] + add ecx, edx + movzx edx, byte [rsi+rdi] + add ecx, edx + lea rsi, [rsi+rax*4] + movzx edx, byte [rsi] + add ecx, edx + movzx edx, byte [rsi+rax] + add ecx, edx + movzx edx, byte [rsi+rax*2] + add ecx, edx + movzx edx, byte [rsi+rdi] + add ecx, edx + lea rsi, [rsi+rax*4] + movzx edx, byte [rsi] + add ecx, edx + movzx edx, byte [rsi+rax] + add ecx, edx + movzx edx, byte [rsi+rax*2] + add ecx, edx + movzx edx, byte [rsi+rdi] + add ecx, edx + movzx edx, byte [rsi+rax*4] + add ecx, edx + + ; add up + pextrw edx, xmm1, 0x0 + lea edx, [edx+ecx+16] + sar edx, 5 + movd xmm1, edx + ; FIXME use pshufb for ssse3 version + pshuflw xmm1, xmm1, 0x0 + punpcklqdq xmm1, xmm1 + packuswb xmm1, xmm1 + + ; write out + mov rsi, 2 + mov rdi, arg(0) ;dst; + movsxd rcx, dword ptr arg(1) ;dst_stride + lea rax, [rcx*3] + +.label + movdqa [rdi ], xmm1 + movdqa [rdi+rcx ], xmm1 + movdqa [rdi+rcx*2], xmm1 + movdqa [rdi+rax ], xmm1 + lea rdi, [rdi+rcx*4] + movdqa [rdi ], xmm1 + movdqa [rdi+rcx ], xmm1 + movdqa [rdi+rcx*2], xmm1 + movdqa [rdi+rax ], xmm1 + lea rdi, [rdi+rcx*4] + dec rsi + jnz .label + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +;void vp8_intra_pred_y_dctop_sse2( +; unsigned char *dst, +; int dst_stride +; unsigned char *src, +; int src_stride, +; ) +global sym(vp8_intra_pred_y_dctop_sse2) +sym(vp8_intra_pred_y_dctop_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + push rsi + GET_GOT rbx + ; end prolog + + ; from top + mov rcx, arg(2) ;src; + movsxd rax, dword ptr arg(3) ;src_stride; + sub rcx, rax + pxor xmm0, xmm0 + movdqa xmm1, [rcx] + psadbw xmm1, xmm0 + movdqa xmm2, xmm1 + punpckhqdq xmm1, xmm1 + paddw xmm1, xmm2 + + ; add up + paddw xmm1, [GLOBAL(dc_8)] + psraw xmm1, 4 + ; FIXME use pshufb for ssse3 version + pshuflw xmm1, xmm1, 0x0 + punpcklqdq xmm1, xmm1 + packuswb xmm1, xmm1 + + ; write out + mov rsi, 2 + mov rdx, arg(0) ;dst; + movsxd rcx, dword ptr arg(1) ;dst_stride + lea rax, [rcx*3] + +.label + movdqa [rdx ], xmm1 + movdqa [rdx+rcx ], xmm1 + movdqa [rdx+rcx*2], xmm1 + movdqa [rdx+rax ], xmm1 + lea rdx, [rdx+rcx*4] + movdqa [rdx ], xmm1 + movdqa [rdx+rcx ], xmm1 + movdqa [rdx+rcx*2], xmm1 + movdqa [rdx+rax ], xmm1 + lea rdx, [rdx+rcx*4] + dec rsi + jnz .label + + ; begin epilog + RESTORE_GOT + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +;void vp8_intra_pred_y_dcleft_sse2( +; unsigned char *dst, +; int dst_stride +; unsigned char *src, +; int src_stride, +; ) +global sym(vp8_intra_pred_y_dcleft_sse2) +sym(vp8_intra_pred_y_dcleft_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + push rsi + push rdi + ; end prolog + + ; from left + mov rsi, arg(2) ;src; + movsxd rax, dword ptr arg(3) ;src_stride; + dec rsi + lea rdi, [rax*3] + movzx ecx, byte [rsi] + movzx edx, byte [rsi+rax] + add ecx, edx + movzx edx, byte [rsi+rax*2] + add ecx, edx + movzx edx, byte [rsi+rdi] + add ecx, edx + lea rsi, [rsi+rax*4] + movzx edx, byte [rsi] + add ecx, edx + movzx edx, byte [rsi+rax] + add ecx, edx + movzx edx, byte [rsi+rax*2] + add ecx, edx + movzx edx, byte [rsi+rdi] + add ecx, edx + lea rsi, [rsi+rax*4] + movzx edx, byte [rsi] + add ecx, edx + movzx edx, byte [rsi+rax] + add ecx, edx + movzx edx, byte [rsi+rax*2] + add ecx, edx + movzx edx, byte [rsi+rdi] + add ecx, edx + lea rsi, [rsi+rax*4] + movzx edx, byte [rsi] + add ecx, edx + movzx edx, byte [rsi+rax] + add ecx, edx + movzx edx, byte [rsi+rax*2] + add ecx, edx + movzx edx, byte [rsi+rdi] + lea edx, [ecx+edx+8] + + ; add up + shr edx, 4 + movd xmm1, edx + ; FIXME use pshufb for ssse3 version + pshuflw xmm1, xmm1, 0x0 + punpcklqdq xmm1, xmm1 + packuswb xmm1, xmm1 + + ; write out + mov rsi, 2 + mov rdi, arg(0) ;dst; + movsxd rcx, dword ptr arg(1) ;dst_stride + lea rax, [rcx*3] + +.label + movdqa [rdi ], xmm1 + movdqa [rdi+rcx ], xmm1 + movdqa [rdi+rcx*2], xmm1 + movdqa [rdi+rax ], xmm1 + lea rdi, [rdi+rcx*4] + movdqa [rdi ], xmm1 + movdqa [rdi+rcx ], xmm1 + movdqa [rdi+rcx*2], xmm1 + movdqa [rdi+rax ], xmm1 + lea rdi, [rdi+rcx*4] + dec rsi + jnz .label + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +;void vp8_intra_pred_y_dc128_sse2( +; unsigned char *dst, +; int dst_stride +; unsigned char *src, +; int src_stride, +; ) +global sym(vp8_intra_pred_y_dc128_sse2) +sym(vp8_intra_pred_y_dc128_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + push rsi + GET_GOT rbx + ; end prolog + + ; write out + mov rsi, 2 + movdqa xmm1, [GLOBAL(dc_128)] + mov rax, arg(0) ;dst; + movsxd rdx, dword ptr arg(1) ;dst_stride + lea rcx, [rdx*3] + +.label + movdqa [rax ], xmm1 + movdqa [rax+rdx ], xmm1 + movdqa [rax+rdx*2], xmm1 + movdqa [rax+rcx ], xmm1 + lea rax, [rax+rdx*4] + movdqa [rax ], xmm1 + movdqa [rax+rdx ], xmm1 + movdqa [rax+rdx*2], xmm1 + movdqa [rax+rcx ], xmm1 + lea rax, [rax+rdx*4] + dec rsi + jnz .label + + ; begin epilog + RESTORE_GOT + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +;void vp8_intra_pred_y_tm_sse2( +; unsigned char *dst, +; int dst_stride +; unsigned char *src, +; int src_stride, +; ) +%macro vp8_intra_pred_y_tm 1 +global sym(vp8_intra_pred_y_tm_%1) +sym(vp8_intra_pred_y_tm_%1): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + push rsi + push rdi + GET_GOT rbx + ; end prolog + + ; read top row + mov edx, 8 + mov rsi, arg(2) ;src; + movsxd rax, dword ptr arg(3) ;src_stride; + sub rsi, rax + pxor xmm0, xmm0 +%ifidn %1, ssse3 + movdqa xmm3, [GLOBAL(dc_1024)] +%endif + movdqa xmm1, [rsi] + movdqa xmm2, xmm1 + punpcklbw xmm1, xmm0 + punpckhbw xmm2, xmm0 + + ; set up left ptrs ans subtract topleft + movd xmm4, [rsi-1] + lea rsi, [rsi+rax-1] +%ifidn %1, sse2 + punpcklbw xmm4, xmm0 + pshuflw xmm4, xmm4, 0x0 + punpcklqdq xmm4, xmm4 +%else + pshufb xmm4, xmm3 +%endif + psubw xmm1, xmm4 + psubw xmm2, xmm4 + + ; set up dest ptrs + mov rdi, arg(0) ;dst; + movsxd rcx, dword ptr arg(1) ;dst_stride +vp8_intra_pred_y_tm_%1_loop: + movd xmm4, [rsi] + movd xmm5, [rsi+rax] +%ifidn %1, sse2 + punpcklbw xmm4, xmm0 + punpcklbw xmm5, xmm0 + pshuflw xmm4, xmm4, 0x0 + pshuflw xmm5, xmm5, 0x0 + punpcklqdq xmm4, xmm4 + punpcklqdq xmm5, xmm5 +%else + pshufb xmm4, xmm3 + pshufb xmm5, xmm3 +%endif + movdqa xmm6, xmm4 + movdqa xmm7, xmm5 + paddw xmm4, xmm1 + paddw xmm6, xmm2 + paddw xmm5, xmm1 + paddw xmm7, xmm2 + packuswb xmm4, xmm6 + packuswb xmm5, xmm7 + movdqa [rdi ], xmm4 + movdqa [rdi+rcx], xmm5 + lea rsi, [rsi+rax*2] + lea rdi, [rdi+rcx*2] + dec edx + jnz vp8_intra_pred_y_tm_%1_loop + + ; begin epilog + RESTORE_GOT + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret +%endmacro + +vp8_intra_pred_y_tm sse2 +vp8_intra_pred_y_tm ssse3 + +;void vp8_intra_pred_y_ve_sse2( +; unsigned char *dst, +; int dst_stride +; unsigned char *src, +; int src_stride, +; ) +global sym(vp8_intra_pred_y_ve_sse2) +sym(vp8_intra_pred_y_ve_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + push rsi + ; end prolog + + ; read from top + mov rax, arg(2) ;src; + movsxd rdx, dword ptr arg(3) ;src_stride; + sub rax, rdx + movdqa xmm1, [rax] + + ; write out + mov rsi, 2 + mov rax, arg(0) ;dst; + movsxd rdx, dword ptr arg(1) ;dst_stride + lea rcx, [rdx*3] + +.label + movdqa [rax ], xmm1 + movdqa [rax+rdx ], xmm1 + movdqa [rax+rdx*2], xmm1 + movdqa [rax+rcx ], xmm1 + lea rax, [rax+rdx*4] + movdqa [rax ], xmm1 + movdqa [rax+rdx ], xmm1 + movdqa [rax+rdx*2], xmm1 + movdqa [rax+rcx ], xmm1 + lea rax, [rax+rdx*4] + dec rsi + jnz .label + + ; begin epilog + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +;void vp8_intra_pred_y_ho_sse2( +; unsigned char *dst, +; int dst_stride +; unsigned char *src, +; int src_stride, +; ) +global sym(vp8_intra_pred_y_ho_sse2) +sym(vp8_intra_pred_y_ho_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + push rsi + push rdi + ; end prolog + + ; read from left and write out + mov edx, 8 + mov rsi, arg(2) ;src; + movsxd rax, dword ptr arg(3) ;src_stride; + mov rdi, arg(0) ;dst; + movsxd rcx, dword ptr arg(1) ;dst_stride + dec rsi + +vp8_intra_pred_y_ho_sse2_loop: + movd xmm0, [rsi] + movd xmm1, [rsi+rax] + ; FIXME use pshufb for ssse3 version + punpcklbw xmm0, xmm0 + punpcklbw xmm1, xmm1 + pshuflw xmm0, xmm0, 0x0 + pshuflw xmm1, xmm1, 0x0 + punpcklqdq xmm0, xmm0 + punpcklqdq xmm1, xmm1 + movdqa [rdi ], xmm0 + movdqa [rdi+rcx], xmm1 + lea rsi, [rsi+rax*2] + lea rdi, [rdi+rcx*2] + dec edx + jnz vp8_intra_pred_y_ho_sse2_loop + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + SECTION_RODATA +align 16 dc_128: - times 8 db 128 + times 16 db 128 dc_4: times 4 dw 4 align 16 +dc_8: + times 8 dw 8 +align 16 dc_1024: times 8 dw 0x400 align 16 diff --git a/vp8/common/x86/recon_wrapper_sse2.c b/vp8/common/x86/recon_wrapper_sse2.c index fcc75a901..44221cd0b 100644 --- a/vp8/common/x86/recon_wrapper_sse2.c +++ b/vp8/common/x86/recon_wrapper_sse2.c @@ -94,3 +94,69 @@ void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x) vp8_intra_pred_uv_tm_ssse3, vp8_intra_pred_uv_ho_ssse3); } + +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dc_sse2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dctop_sse2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dcleft_sse2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dc128_sse2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_ho_sse2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_ve_sse2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_tm_sse2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_tm_ssse3); + +static void vp8_build_intra_predictors_mby_x86(MACROBLOCKD *x, + unsigned char *dst_y, + int dst_stride, + build_intra_predictors_mbuv_fn_t tm_func) +{ + int mode = x->mode_info_context->mbmi.mode; + build_intra_predictors_mbuv_fn_t fn; + int src_stride = x->dst.y_stride; + switch (mode) { + case V_PRED: fn = vp8_intra_pred_y_ve_sse2; break; + case H_PRED: fn = vp8_intra_pred_y_ho_sse2; break; + case TM_PRED: fn = tm_func; break; + case DC_PRED: + if (x->up_available) { + if (x->left_available) { + fn = vp8_intra_pred_y_dc_sse2; break; + } else { + fn = vp8_intra_pred_y_dctop_sse2; break; + } + } else if (x->left_available) { + fn = vp8_intra_pred_y_dcleft_sse2; break; + } else { + fn = vp8_intra_pred_y_dc128_sse2; break; + } + break; + default: return; + } + + fn(dst_y, dst_stride, x->dst.y_buffer, src_stride); + return; +} + +void vp8_build_intra_predictors_mby_sse2(MACROBLOCKD *x) +{ + vp8_build_intra_predictors_mby_x86(x, x->predictor, 16, + vp8_intra_pred_y_tm_sse2); +} + +void vp8_build_intra_predictors_mby_ssse3(MACROBLOCKD *x) +{ + vp8_build_intra_predictors_mby_x86(x, x->predictor, 16, + vp8_intra_pred_y_tm_ssse3); +} + +void vp8_build_intra_predictors_mby_s_sse2(MACROBLOCKD *x) +{ + vp8_build_intra_predictors_mby_x86(x, x->dst.y_buffer, x->dst.y_stride, + vp8_intra_pred_y_tm_sse2); +} + +void vp8_build_intra_predictors_mby_s_ssse3(MACROBLOCKD *x) +{ + vp8_build_intra_predictors_mby_x86(x, x->dst.y_buffer, x->dst.y_stride, + vp8_intra_pred_y_tm_ssse3); + +} diff --git a/vp8/common/x86/recon_x86.h b/vp8/common/x86/recon_x86.h index fbb3dcb63..afacc60d1 100644 --- a/vp8/common/x86/recon_x86.h +++ b/vp8/common/x86/recon_x86.h @@ -42,6 +42,8 @@ extern prototype_copy_block(vp8_copy_mem16x16_mmx); extern prototype_copy_block(vp8_copy_mem16x16_sse2); extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_sse2); extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_sse2); +extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_sse2); +extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_s_sse2); #if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_recon_copy16x16 @@ -53,12 +55,20 @@ extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_sse2); #undef vp8_recon_build_intra_predictors_mbuv_s #define vp8_recon_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_sse2 +#undef vp8_recon_build_intra_predictors_mby +#define vp8_recon_build_intra_predictors_mby vp8_build_intra_predictors_mby_sse2 + +#undef vp8_recon_build_intra_predictors_mby_s +#define vp8_recon_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_sse2 + #endif #endif #if HAVE_SSSE3 extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_ssse3); extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_ssse3); +extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_ssse3); +extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_s_ssse3); #if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_recon_build_intra_predictors_mbuv @@ -67,6 +77,12 @@ extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_ssse3) #undef vp8_recon_build_intra_predictors_mbuv_s #define vp8_recon_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_ssse3 +#undef vp8_recon_build_intra_predictors_mby +#define vp8_recon_build_intra_predictors_mby vp8_build_intra_predictors_mby_ssse3 + +#undef vp8_recon_build_intra_predictors_mby_s +#define vp8_recon_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_ssse3 + #endif #endif #endif diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c index ad3a1f76b..e1e1b7987 100644 --- a/vp8/common/x86/x86_systemdependent.c +++ b/vp8/common/x86/x86_systemdependent.c @@ -11,7 +11,6 @@ #include "vpx_config.h" #include "vpx_ports/x86.h" -#include "vp8/common/g_common.h" #include "vp8/common/subpixel.h" #include "vp8/common/loopfilter.h" #include "vp8/common/recon.h" @@ -86,6 +85,10 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx) vp8_build_intra_predictors_mbuv_sse2; rtcd->recon.build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_sse2; + rtcd->recon.build_intra_predictors_mby = + vp8_build_intra_predictors_mby_sse2; + rtcd->recon.build_intra_predictors_mby_s = + vp8_build_intra_predictors_mby_s_sse2; rtcd->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_sse2; rtcd->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2; @@ -132,6 +135,10 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx) vp8_build_intra_predictors_mbuv_ssse3; rtcd->recon.build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_ssse3; + rtcd->recon.build_intra_predictors_mby = + vp8_build_intra_predictors_mby_ssse3; + rtcd->recon.build_intra_predictors_mby_s = + vp8_build_intra_predictors_mby_s_ssse3; } #endif diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index 11d0e38f5..917aeceb6 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -42,7 +42,6 @@ void vp8cx_init_de_quantizer(VP8D_COMP *pbi) { - int i; int Q; VP8_COMMON *const pc = & pbi->common; @@ -52,15 +51,9 @@ void vp8cx_init_de_quantizer(VP8D_COMP *pbi) pc->Y2dequant[Q][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q); pc->UVdequant[Q][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q); - /* all the ac values = ; */ - for (i = 1; i < 16; i++) - { - int rc = vp8_default_zig_zag1d[i]; - - pc->Y1dequant[Q][rc] = (short)vp8_ac_yquant(Q); - pc->Y2dequant[Q][rc] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q); - pc->UVdequant[Q][rc] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q); - } + pc->Y1dequant[Q][1] = (short)vp8_ac_yquant(Q); + pc->Y2dequant[Q][1] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q); + pc->UVdequant[Q][1] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q); } } @@ -88,19 +81,19 @@ void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd) else QIndex = pc->base_qindex; - /* Set up the block level dequant pointers */ - for (i = 0; i < 16; i++) - { - xd->block[i].dequant = pc->Y1dequant[QIndex]; - } + /* Set up the macroblock dequant constants */ + xd->dequant_y1_dc[0] = 1; + xd->dequant_y1[0] = pc->Y1dequant[QIndex][0]; + xd->dequant_y2[0] = pc->Y2dequant[QIndex][0]; + xd->dequant_uv[0] = pc->UVdequant[QIndex][0]; - for (i = 16; i < 24; i++) + for (i = 1; i < 16; i++) { - xd->block[i].dequant = pc->UVdequant[QIndex]; + xd->dequant_y1_dc[i] = + xd->dequant_y1[i] = pc->Y1dequant[QIndex][1]; + xd->dequant_y2[i] = pc->Y2dequant[QIndex][1]; + xd->dequant_uv[i] = pc->UVdequant[QIndex][1]; } - - xd->block[24].dequant = pc->Y2dequant[QIndex]; - } #if CONFIG_RUNTIME_CPU_DETECT @@ -180,6 +173,8 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, } else { + short *DQC = xd->dequant_y1; + /* clear out residual eob info */ if(xd->mode_info_context->mbmi.mb_skip_coeff) vpx_memset(xd->eobs, 0, 25); @@ -200,13 +195,13 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, if (xd->eobs[i] > 1) { DEQUANT_INVOKE(&pbi->common.rtcd.dequant, idct_add) - (b->qcoeff, b->dequant, + (b->qcoeff, DQC, *(b->base_dst) + b->dst, b->dst_stride); } else { IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add) - (b->qcoeff[0] * b->dequant[0], + (b->qcoeff[0] * DQC[0], *(b->base_dst) + b->dst, b->dst_stride, *(b->base_dst) + b->dst, b->dst_stride); ((int *)b->qcoeff)[0] = 0; @@ -233,10 +228,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, /* dequantization and idct */ if (mode != B_PRED) { - short *DQC = xd->block[0].dequant; - - /* save the dc dequant constant in case it is overridden */ - short dc_dequant_temp = DQC[0]; + short *DQC = xd->dequant_y1; if (mode != SPLITMV) { @@ -245,7 +237,8 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, /* do 2nd order transform on the dc block */ if (xd->eobs[24] > 1) { - DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b); + DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b, + xd->dequant_y2); IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], xd->qcoeff); @@ -260,7 +253,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, } else { - b->dqcoeff[0] = b->qcoeff[0] * b->dequant[0]; + b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0]; IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], xd->qcoeff); ((int *)b->qcoeff)[0] = 0; @@ -269,20 +262,17 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, /* override the dc dequant constant in order to preserve the * dc components */ - DQC[0] = 1; + DQC = xd->dequant_y1_dc; } DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_y_block) - (xd->qcoeff, xd->block[0].dequant, + (xd->qcoeff, DQC, xd->dst.y_buffer, xd->dst.y_stride, xd->eobs); - - /* restore the dc dequant constant */ - DQC[0] = dc_dequant_temp; } DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_uv_block) - (xd->qcoeff+16*16, xd->block[16].dequant, + (xd->qcoeff+16*16, xd->dequant_uv, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs+16); } diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c index 9a45702cf..80648d39f 100644 --- a/vp8/decoder/onyxd_if.c +++ b/vp8/decoder/onyxd_if.c @@ -20,7 +20,6 @@ #include "vpx_scale/yv12extend.h" #include "vp8/common/loopfilter.h" #include "vp8/common/swapyv12buffer.h" -#include "vp8/common/g_common.h" #include "vp8/common/threading.h" #include "decoderthreading.h" #include <stdio.h> diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c index 947b3a1c6..2ce00f705 100644 --- a/vp8/decoder/threading.c +++ b/vp8/decoder/threading.c @@ -37,7 +37,7 @@ extern void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd); static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count) { VP8_COMMON *const pc = & pbi->common; - int i, j; + int i; for (i = 0; i < count; i++) { @@ -77,10 +77,10 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D mbd->current_bc = &pbi->bc2; - for (j = 0; j < 25; j++) - { - mbd->block[j].dequant = xd->block[j].dequant; - } + vpx_memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc)); + vpx_memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); + vpx_memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2)); + vpx_memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); mbd->fullpixel_mask = 0xffffffff; if(pc->full_pixel) @@ -177,6 +177,8 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m /* dequantization and idct */ if (xd->mode_info_context->mbmi.mode == B_PRED) { + short *DQC = xd->dequant_y1; + for (i = 0; i < 16; i++) { BLOCKD *b = &xd->block[i]; @@ -190,13 +192,13 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m if (xd->eobs[i] > 1) { DEQUANT_INVOKE(&pbi->common.rtcd.dequant, idct_add) - (b->qcoeff, b->dequant, + (b->qcoeff, DQC, *(b->base_dst) + b->dst, b->dst_stride); } else { IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add) - (b->qcoeff[0] * b->dequant[0], + (b->qcoeff[0] * DQC[0], *(b->base_dst) + b->dst, b->dst_stride, *(b->base_dst) + b->dst, b->dst_stride); ((int *)b->qcoeff)[0] = 0; @@ -206,9 +208,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m } else { - short *DQC = xd->block[0].dequant; - - DECLARE_ALIGNED(16, short, local_dequant[16]); + short *DQC = xd->dequant_y1; if (xd->mode_info_context->mbmi.mode != SPLITMV) { @@ -217,7 +217,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m /* do 2nd order transform on the dc block */ if (xd->eobs[24] > 1) { - DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b); + DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b, xd->dequant_y2); IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], xd->qcoeff); @@ -232,20 +232,13 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m } else { - b->dqcoeff[0] = b->qcoeff[0] * b->dequant[0]; + b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0]; IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], xd->qcoeff); ((int *)b->qcoeff)[0] = 0; } - /* make a local copy of the dequant constants */ - vpx_memcpy(local_dequant, xd->block[0].dequant, - sizeof(local_dequant)); - /* override the dc dequant constant */ - local_dequant[0] = 1; - - /* use the new dequant constants */ - DQC = local_dequant; + DQC = xd->dequant_y1_dc; } DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_y_block) @@ -255,7 +248,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m } DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_uv_block) - (xd->qcoeff+16*16, xd->block[16].dequant, + (xd->qcoeff+16*16, xd->dequant_uv, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs+16); } diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h index 5e5a60db7..0a74ca46d 100644 --- a/vp8/encoder/block.h +++ b/vp8/encoder/block.h @@ -45,10 +45,6 @@ typedef struct unsigned char **base_src; int src; int src_stride; - -// MV enc_mv; - int force_empty; - } BLOCK; typedef struct @@ -107,7 +103,6 @@ typedef struct int mv_row_min; int mv_row_max; - int vector_range; // Used to monitor limiting range of recent vectors to guide search. int skip; int encode_breakout; diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index 88868d684..b5c5c7445 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -595,8 +595,6 @@ void init_encode_frame_mb_context(VP8_COMP *cpi) // Activity map pointer x->mb_activity_ptr = cpi->mb_activity_map; - x->vector_range = 32; - x->act_zbin_adj = 0; x->partition_info = x->pi; @@ -1122,7 +1120,7 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, vp8_inverse_transform_mby(xd, IF_RTCD(&cpi->common.rtcd)); DEQUANT_INVOKE (&cpi->common.rtcd.dequant, idct_add_uv_block) - (xd->qcoeff+16*16, xd->block[16].dequant, + (xd->qcoeff+16*16, xd->dequant_uv, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs+16); return rate; @@ -1307,7 +1305,7 @@ int vp8cx_encode_inter_macroblock vp8_inverse_transform_mby(xd, IF_RTCD(&cpi->common.rtcd)); DEQUANT_INVOKE (&cpi->common.rtcd.dequant, idct_add_uv_block) - (xd->qcoeff+16*16, xd->block[16].dequant, + (xd->qcoeff+16*16, xd->dequant_uv, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs+16); } diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c index 4378b634e..16393a1ff 100644 --- a/vp8/encoder/encodeintra.c +++ b/vp8/encoder/encodeintra.c @@ -18,7 +18,6 @@ #include "vp8/common/invtrans.h" #include "vp8/common/recon.h" #include "dct.h" -#include "vp8/common/g_common.h" #include "encodeintra.h" diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c index ef8ead588..24339a5e0 100644 --- a/vp8/encoder/ethreading.c +++ b/vp8/encoder/ethreading.c @@ -302,7 +302,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) z->mv_col_max = x->mv_col_max; z->mv_row_min = x->mv_row_min; z->mv_row_max = x->mv_row_max; - z->vector_range = x->vector_range ; */ z->vp8_short_fdct4x4 = x->vp8_short_fdct4x4; @@ -350,8 +349,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) z->block[i].src = x->block[i].src; */ z->block[i].src_stride = x->block[i].src_stride; - z->block[i].force_empty = x->block[i].force_empty; - } { @@ -387,10 +384,22 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) zd->mb_segement_abs_delta = xd->mb_segement_abs_delta; vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data)); - for (i = 0; i < 25; i++) - { - zd->block[i].dequant = xd->block[i].dequant; - } + vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc)); + vpx_memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); + vpx_memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2)); + vpx_memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); + +#if 1 + /*TODO: Remove dequant from BLOCKD. This is a temporary solution until + * the quantizer code uses a passed in pointer to the dequant constants. + * This will also require modifications to the x86 and neon assembly. + * */ + for (i = 0; i < 16; i++) + zd->block[i].dequant = zd->dequant_y1; + for (i = 16; i < 24; i++) + zd->block[i].dequant = zd->dequant_uv; + zd->block[24].dequant = zd->dequant_y2; +#endif } } @@ -421,8 +430,6 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi, #endif mb->gf_active_ptr = x->gf_active_ptr; - mb->vector_range = 32; - vpx_memset(mbr_ei[i].segment_counts, 0, sizeof(mbr_ei[i].segment_counts)); mbr_ei[i].totalrate = 0; diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 6e0254644..9223781af 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -23,7 +23,6 @@ #include "ratectrl.h" #include "vp8/common/quant_common.h" #include "segmentation.h" -#include "vp8/common/g_common.h" #include "vpx_scale/yv12extend.h" #if CONFIG_POSTPROC #include "vp8/common/postproc.h" @@ -251,6 +250,9 @@ static void save_layer_context(VP8_COMP *cpi) lc->starting_buffer_level = cpi->oxcf.starting_buffer_level; lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level; lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size; + lc->starting_buffer_level_in_ms = cpi->oxcf.starting_buffer_level_in_ms; + lc->optimal_buffer_level_in_ms = cpi->oxcf.optimal_buffer_level_in_ms; + lc->maximum_buffer_size_in_ms = cpi->oxcf.maximum_buffer_size_in_ms; lc->buffer_level = cpi->buffer_level; lc->bits_off_target = cpi->bits_off_target; lc->total_actual_bits = cpi->total_actual_bits; @@ -288,6 +290,9 @@ static void restore_layer_context(VP8_COMP *cpi, const int layer) cpi->oxcf.starting_buffer_level = lc->starting_buffer_level; cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level; cpi->oxcf.maximum_buffer_size = lc->maximum_buffer_size; + cpi->oxcf.starting_buffer_level_in_ms = lc->starting_buffer_level_in_ms; + cpi->oxcf.optimal_buffer_level_in_ms = lc->optimal_buffer_level_in_ms; + cpi->oxcf.maximum_buffer_size_in_ms = lc->maximum_buffer_size_in_ms; cpi->buffer_level = lc->buffer_level; cpi->bits_off_target = lc->bits_off_target; cpi->total_actual_bits = lc->total_actual_bits; @@ -1255,6 +1260,8 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) if (cpi->frame_rate > 180) cpi->frame_rate = 30; + cpi->ref_frame_rate = cpi->frame_rate; + // change includes all joint functionality vp8_change_config(cpi, oxcf); @@ -1290,6 +1297,10 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) cpi->output_frame_rate / cpi->oxcf.rate_decimator[i]; lc->target_bandwidth = cpi->oxcf.target_bitrate[i] * 1000; + lc->starting_buffer_level_in_ms = oxcf->starting_buffer_level; + lc->optimal_buffer_level_in_ms = oxcf->optimal_buffer_level; + lc->maximum_buffer_size_in_ms = oxcf->maximum_buffer_size; + lc->starting_buffer_level = rescale(oxcf->starting_buffer_level, lc->target_bandwidth, 1000); @@ -1346,6 +1357,56 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) #endif } +void update_layer_contexts (VP8_COMP *cpi) +{ + VP8_CONFIG *oxcf = &cpi->oxcf; + + /* Update snapshots of the layer contexts to reflect new parameters */ + if (oxcf->number_of_layers > 1) + { + unsigned int i; + double prev_layer_frame_rate=0; + + for (i=0; i<oxcf->number_of_layers; i++) + { + LAYER_CONTEXT *lc = &cpi->layer_context[i]; + + lc->frame_rate = + cpi->ref_frame_rate / oxcf->rate_decimator[i]; + lc->target_bandwidth = oxcf->target_bitrate[i] * 1000; + + lc->starting_buffer_level = rescale( + oxcf->starting_buffer_level_in_ms, + lc->target_bandwidth, 1000); + + if (oxcf->optimal_buffer_level == 0) + lc->optimal_buffer_level = lc->target_bandwidth / 8; + else + lc->optimal_buffer_level = rescale( + oxcf->optimal_buffer_level_in_ms, + lc->target_bandwidth, 1000); + + if (oxcf->maximum_buffer_size == 0) + lc->maximum_buffer_size = lc->target_bandwidth / 8; + else + lc->maximum_buffer_size = rescale( + oxcf->maximum_buffer_size_in_ms, + lc->target_bandwidth, 1000); + + // Work out the average size of a frame within this layer + if (i > 0) + lc->avg_frame_size_for_layer = (oxcf->target_bitrate[i] - + oxcf->target_bitrate[i-1]) * 1000 / + (lc->frame_rate - prev_layer_frame_rate); + + lc->active_worst_quality = oxcf->worst_allowed_q; + lc->active_best_quality = oxcf->best_allowed_q; + lc->avg_frame_qindex = oxcf->worst_allowed_q; + + prev_layer_frame_rate = lc->frame_rate; + } + } +} void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) { @@ -1486,9 +1547,12 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) // local file playback mode == really big buffer if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK) { - cpi->oxcf.starting_buffer_level = 60000; - cpi->oxcf.optimal_buffer_level = 60000; - cpi->oxcf.maximum_buffer_size = 240000; + cpi->oxcf.starting_buffer_level = 60000; + cpi->oxcf.optimal_buffer_level = 60000; + cpi->oxcf.maximum_buffer_size = 240000; + cpi->oxcf.starting_buffer_level_in_ms = 60000; + cpi->oxcf.optimal_buffer_level_in_ms = 60000; + cpi->oxcf.maximum_buffer_size_in_ms = 240000; } // Convert target bandwidth from Kbit/s to Bit/s @@ -4257,14 +4321,15 @@ static void encode_frame_to_data_rate vp8_clear_system_state(); //__asm emms; - if (cpi->twopass.total_left_stats->coded_error != 0.0) - fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6d %6d" + if (cpi->twopass.total_left_stats.coded_error != 0.0) + fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %10d %6d %6d" "%6d %6d %6d %5d %5d %5d %8d %8.2f %10d %10.3f" "%10.3f %8d\n", cpi->common.current_video_frame, cpi->this_frame_target, cpi->projected_frame_size, (cpi->projected_frame_size - cpi->this_frame_target), (int)cpi->total_target_vs_actual, + cpi->buffer_level, (cpi->oxcf.starting_buffer_level-cpi->bits_off_target), (int)cpi->total_actual_bits, cm->base_qindex, cpi->active_best_quality, cpi->active_worst_quality, @@ -4275,18 +4340,19 @@ static void encode_frame_to_data_rate cm->frame_type, cpi->gfu_boost, cpi->twopass.est_max_qcorrection_factor, (int)cpi->twopass.bits_left, - cpi->twopass.total_left_stats->coded_error, + cpi->twopass.total_left_stats.coded_error, (double)cpi->twopass.bits_left / - cpi->twopass.total_left_stats->coded_error, + cpi->twopass.total_left_stats.coded_error, cpi->tot_recode_hits); else - fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6d %6d" + fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %10d %6d %6d" "%6d %6d %6d %5d %5d %5d %8d %8.2f %10d %10.3f" "%8d\n", cpi->common.current_video_frame, cpi->this_frame_target, cpi->projected_frame_size, (cpi->projected_frame_size - cpi->this_frame_target), (int)cpi->total_target_vs_actual, + cpi->buffer_level, (cpi->oxcf.starting_buffer_level-cpi->bits_off_target), (int)cpi->total_actual_bits, cm->base_qindex, cpi->active_best_quality, cpi->active_worst_quality, @@ -4297,7 +4363,7 @@ static void encode_frame_to_data_rate cm->frame_type, cpi->gfu_boost, cpi->twopass.est_max_qcorrection_factor, (int)cpi->twopass.bits_left, - cpi->twopass.total_left_stats->coded_error, + cpi->twopass.total_left_stats.coded_error, cpi->tot_recode_hits); fclose(f); @@ -4670,13 +4736,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l return -1; } - // Restore layer specific context if necessary - if (cpi->oxcf.number_of_layers > 1) - { - restore_layer_context (cpi, - cpi->oxcf.layer_id[cm->current_video_frame % cpi->oxcf.periodicity]); - } - if (cpi->source->ts_start < cpi->first_time_stamp_ever) { cpi->first_time_stamp_ever = cpi->source->ts_start; @@ -4684,16 +4743,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l } // adjust frame rates based on timestamps given - if (cpi->oxcf.number_of_layers > 1 ) - { - vp8_new_frame_rate ( - cpi, cpi->layer_context[cpi->current_layer].frame_rate); - - cpi->last_time_stamp_seen = cpi->source->ts_start; - cpi->last_end_time_stamp_seen = cpi->source->ts_end; - - } - else if (!cm->refresh_alt_ref_frame) + if (!cm->refresh_alt_ref_frame || (cpi->oxcf.number_of_layers > 1)) { int64_t this_duration; int step = 0; @@ -4718,7 +4768,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l if (this_duration) { if (step) - vp8_new_frame_rate(cpi, 10000000.0 / this_duration); + cpi->ref_frame_rate = 10000000.0 / this_duration; else { double avg_duration, interval; @@ -4731,18 +4781,46 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l if(interval > 10000000.0) interval = 10000000; - avg_duration = 10000000.0 / cpi->frame_rate; + avg_duration = 10000000.0 / cpi->ref_frame_rate; avg_duration *= (interval - avg_duration + this_duration); avg_duration /= interval; - vp8_new_frame_rate(cpi, 10000000.0 / avg_duration); + cpi->ref_frame_rate = 10000000.0 / avg_duration; + } + + if (cpi->oxcf.number_of_layers > 1) + { + int i; + + // Update frame rates for each layer + for (i=0; i<cpi->oxcf.number_of_layers; i++) + { + LAYER_CONTEXT *lc = &cpi->layer_context[i]; + lc->frame_rate = cpi->ref_frame_rate / + cpi->oxcf.rate_decimator[i]; + } } + else + vp8_new_frame_rate(cpi, cpi->ref_frame_rate); } cpi->last_time_stamp_seen = cpi->source->ts_start; cpi->last_end_time_stamp_seen = cpi->source->ts_end; } + if (cpi->oxcf.number_of_layers > 1) + { + int layer; + + update_layer_contexts (cpi); + + // Restore layer specific context & set frame rate + layer = cpi->oxcf.layer_id[ + cm->current_video_frame % cpi->oxcf.periodicity]; + restore_layer_context (cpi, layer); + vp8_new_frame_rate (cpi, cpi->layer_context[layer].frame_rate); + } + if (cpi->compressor_speed == 2) { if (cpi->oxcf.number_of_layers == 1) diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index 46951e3b9..35efd3a00 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -253,6 +253,9 @@ typedef struct int starting_buffer_level; int optimal_buffer_level; int maximum_buffer_size; + int starting_buffer_level_in_ms; + int optimal_buffer_level_in_ms; + int maximum_buffer_size_in_ms; int avg_frame_size_for_layer; @@ -421,6 +424,7 @@ typedef struct VP8_COMP int buffered_mode; double frame_rate; + double ref_frame_rate; int64_t buffer_level; int bits_off_target; diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c index 592da9dbb..405c72dbd 100644 --- a/vp8/encoder/pickinter.c +++ b/vp8/encoder/pickinter.c @@ -21,7 +21,6 @@ #include "vp8/common/reconinter.h" #include "vp8/common/reconintra.h" #include "vp8/common/reconintra4x4.h" -#include "vp8/common/g_common.h" #include "variance.h" #include "mcomp.h" #include "rdopt.h" diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c index 3ca8758ef..d2aa7fe72 100644 --- a/vp8/encoder/quantize.c +++ b/vp8/encoder/quantize.c @@ -436,7 +436,8 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) int quant_val; int Q; - int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44}; + int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, + 44, 44}; for (Q = 0; Q < QINDEX_RANGE; Q++) { @@ -469,36 +470,58 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7; // all the ac values = ; - for (i = 1; i < 16; i++) + quant_val = vp8_ac_yquant(Q); + cpi->Y1quant_fast[Q][1] = (1 << 16) / quant_val; + invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 1, + cpi->Y1quant_shift[Q] + 1, quant_val); + cpi->Y1zbin[Q][1] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; + cpi->Y1round[Q][1] = (qrounding_factors[Q] * quant_val) >> 7; + cpi->common.Y1dequant[Q][1] = quant_val; + cpi->zrun_zbin_boost_y1[Q][1] = (quant_val * zbin_boost[1]) >> 7; + + quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q); + cpi->Y2quant_fast[Q][1] = (1 << 16) / quant_val; + invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 1, + cpi->Y2quant_shift[Q] + 1, quant_val); + cpi->Y2zbin[Q][1] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7; + cpi->Y2round[Q][1] = (qrounding_factors_y2[Q] * quant_val) >> 7; + cpi->common.Y2dequant[Q][1] = quant_val; + cpi->zrun_zbin_boost_y2[Q][1] = (quant_val * zbin_boost[1]) >> 7; + + quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q); + cpi->UVquant_fast[Q][1] = (1 << 16) / quant_val; + invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 1, + cpi->UVquant_shift[Q] + 1, quant_val); + cpi->UVzbin[Q][1] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; + cpi->UVround[Q][1] = (qrounding_factors[Q] * quant_val) >> 7; + cpi->common.UVdequant[Q][1] = quant_val; + cpi->zrun_zbin_boost_uv[Q][1] = (quant_val * zbin_boost[1]) >> 7; + + for (i = 2; i < 16; i++) { - int rc = vp8_default_zig_zag1d[i]; - - quant_val = vp8_ac_yquant(Q); - cpi->Y1quant_fast[Q][rc] = (1 << 16) / quant_val; - invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc, - cpi->Y1quant_shift[Q] + rc, quant_val); - cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; - cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7; - cpi->common.Y1dequant[Q][rc] = quant_val; - cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7; - - quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q); - cpi->Y2quant_fast[Q][rc] = (1 << 16) / quant_val; - invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc, - cpi->Y2quant_shift[Q] + rc, quant_val); - cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7; - cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7; - cpi->common.Y2dequant[Q][rc] = quant_val; - cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7; - - quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q); - cpi->UVquant_fast[Q][rc] = (1 << 16) / quant_val; - invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc, - cpi->UVquant_shift[Q] + rc, quant_val); - cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; - cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7; - cpi->common.UVdequant[Q][rc] = quant_val; - cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7; + cpi->Y1quant_fast[Q][i] = cpi->Y1quant_fast[Q][1]; + cpi->Y1quant[Q][i] = cpi->Y1quant[Q][1]; + cpi->Y1quant_shift[Q][i] = cpi->Y1quant_shift[Q][1]; + cpi->Y1zbin[Q][i] = cpi->Y1zbin[Q][1]; + cpi->Y1round[Q][i] = cpi->Y1round[Q][1]; + cpi->zrun_zbin_boost_y1[Q][i] = (cpi->common.Y1dequant[Q][1] * + zbin_boost[i]) >> 7; + + cpi->Y2quant_fast[Q][i] = cpi->Y2quant_fast[Q][1]; + cpi->Y2quant[Q][i] = cpi->Y2quant[Q][1]; + cpi->Y2quant_shift[Q][i] = cpi->Y2quant_shift[Q][1]; + cpi->Y2zbin[Q][i] = cpi->Y2zbin[Q][1]; + cpi->Y2round[Q][i] = cpi->Y2round[Q][1]; + cpi->zrun_zbin_boost_y2[Q][i] = (cpi->common.Y2dequant[Q][1] * + zbin_boost[i]) >> 7; + + cpi->UVquant_fast[Q][i] = cpi->UVquant_fast[Q][1]; + cpi->UVquant[Q][i] = cpi->UVquant[Q][1]; + cpi->UVquant_shift[Q][i] = cpi->UVquant_shift[Q][1]; + cpi->UVzbin[Q][i] = cpi->UVzbin[Q][1]; + cpi->UVround[Q][i] = cpi->UVround[Q][1]; + cpi->zrun_zbin_boost_uv[Q][i] = (cpi->common.UVdequant[Q][1] * + zbin_boost[i]) >> 7; } } } @@ -615,6 +638,31 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) */ if (!ok_to_skip || QIndex != x->q_index) { + + xd->dequant_y1_dc[0] = 1; + xd->dequant_y1[0] = cpi->common.Y1dequant[QIndex][0]; + xd->dequant_y2[0] = cpi->common.Y2dequant[QIndex][0]; + xd->dequant_uv[0] = cpi->common.UVdequant[QIndex][0]; + + for (i = 1; i < 16; i++) + { + xd->dequant_y1_dc[i] = + xd->dequant_y1[i] = cpi->common.Y1dequant[QIndex][1]; + xd->dequant_y2[i] = cpi->common.Y2dequant[QIndex][1]; + xd->dequant_uv[i] = cpi->common.UVdequant[QIndex][1]; + } +#if 1 + /*TODO: Remove dequant from BLOCKD. This is a temporary solution until + * the quantizer code uses a passed in pointer to the dequant constants. + * This will also require modifications to the x86 and neon assembly. + * */ + for (i = 0; i < 16; i++) + x->e_mbd.block[i].dequant = xd->dequant_y1; //cpi->common.Y1dequant[QIndex]; + for (i = 16; i < 24; i++) + x->e_mbd.block[i].dequant = xd->dequant_uv; //cpi->common.UVdequant[QIndex]; + x->e_mbd.block[24].dequant = xd->dequant_y2; //cpi->common.Y2dequant[QIndex]; +#endif + // Y zbin_extra = ZBIN_EXTRA_Y; @@ -625,7 +673,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) x->block[i].quant_shift = cpi->Y1quant_shift[QIndex]; x->block[i].zbin = cpi->Y1zbin[QIndex]; x->block[i].round = cpi->Y1round[QIndex]; - x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex]; x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex]; x->block[i].zbin_extra = (short)zbin_extra; } @@ -640,7 +687,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) x->block[i].quant_shift = cpi->UVquant_shift[QIndex]; x->block[i].zbin = cpi->UVzbin[QIndex]; x->block[i].round = cpi->UVround[QIndex]; - x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex]; x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex]; x->block[i].zbin_extra = (short)zbin_extra; } @@ -653,7 +699,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) x->block[24].quant_shift = cpi->Y2quant_shift[QIndex]; x->block[24].zbin = cpi->Y2zbin[QIndex]; x->block[24].round = cpi->Y2round[QIndex]; - x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex]; x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex]; x->block[24].zbin_extra = (short)zbin_extra; @@ -663,6 +708,9 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) cpi->last_zbin_over_quant = cpi->zbin_over_quant; cpi->last_zbin_mode_boost = cpi->zbin_mode_boost; x->last_act_zbin_adj = x->act_zbin_adj; + + + } else if(cpi->last_zbin_over_quant != cpi->zbin_over_quant || cpi->last_zbin_mode_boost != cpi->zbin_mode_boost diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 726d3c4eb..d29aa75fe 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -28,7 +28,6 @@ #include "encodemb.h" #include "quantize.h" #include "vp8/common/idct.h" -#include "vp8/common/g_common.h" #include "variance.h" #include "mcomp.h" #include "rdopt.h" diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c index a773d4391..545e4f205 100644 --- a/vp8/encoder/temporal_filter.c +++ b/vp8/encoder/temporal_filter.c @@ -22,7 +22,6 @@ #include "ratectrl.h" #include "vp8/common/quant_common.h" #include "segmentation.h" -#include "vp8/common/g_common.h" #include "vpx_scale/yv12extend.h" #include "vpx_mem/vpx_mem.h" #include "vp8/common/swapyv12buffer.h" diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm index 34cc9c3bb..e698e904c 100644 --- a/vp8/encoder/x86/quantize_ssse3.asm +++ b/vp8/encoder/x86/quantize_ssse3.asm @@ -80,6 +80,9 @@ sym(vp8_fast_quantize_b_ssse3): mov rdi, [rsi + vp8_blockd_dequant] mov rcx, [rsi + vp8_blockd_dqcoeff] + movdqa xmm2, xmm1 ;store y for getting eob + movdqa xmm3, xmm5 + pxor xmm1, xmm0 pxor xmm5, xmm4 psubw xmm1, xmm0 @@ -88,35 +91,30 @@ sym(vp8_fast_quantize_b_ssse3): movdqa [rax], xmm1 movdqa [rax + 16], xmm5 - movdqa xmm2, [rdi] - movdqa xmm3, [rdi + 16] - - pxor xmm4, xmm4 - pmullw xmm2, xmm1 - pmullw xmm3, xmm5 - - pcmpeqw xmm1, xmm4 ;non zero mask - pcmpeqw xmm5, xmm4 ;non zero mask - packsswb xmm1, xmm5 - pshufb xmm1, [GLOBAL(zz_shuf)] + movdqa xmm0, [rdi] + movdqa xmm4, [rdi + 16] - pmovmskb edx, xmm1 + pmullw xmm0, xmm1 + pmullw xmm4, xmm5 + pxor xmm1, xmm1 - xor rdi, rdi - mov eax, -1 - xor dx, ax ;flip the bits for bsr - bsr eax, edx + pcmpgtw xmm2, xmm1 ;calculate eob + pcmpgtw xmm3, xmm1 + packsswb xmm2, xmm3 + pshufb xmm2, [GLOBAL(zz_shuf)] - movdqa [rcx], xmm2 ;store dqcoeff - movdqa [rcx + 16], xmm3 ;store dqcoeff + pmovmskb edx, xmm2 + movdqa [rcx], xmm0 ;store dqcoeff + movdqa [rcx + 16], xmm4 ;store dqcoeff mov rcx, [rsi + vp8_blockd_eob] - sub edi, edx ;check for all zeros in bit mask - sar edi, 31 ;0 or -1 + bsr eax, edx ;count 0 add eax, 1 - and eax, edi ;if the bit mask was all zero, - ;then eob = 0 + + cmp edx, 0 ;if all 0, eob=0 + cmove eax, edx + mov BYTE PTR [rcx], al ;store eob ; begin epilog diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk index eb483d235..0e564320f 100644 --- a/vp8/vp8_common.mk +++ b/vp8/vp8_common.mk @@ -39,7 +39,6 @@ VP8_COMMON_SRCS-yes += common/entropymode.h VP8_COMMON_SRCS-yes += common/entropymv.h VP8_COMMON_SRCS-yes += common/extend.h VP8_COMMON_SRCS-yes += common/findnearmv.h -VP8_COMMON_SRCS-yes += common/g_common.h VP8_COMMON_SRCS-yes += common/header.h VP8_COMMON_SRCS-yes += common/idct.h VP8_COMMON_SRCS-yes += common/invtrans.h diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c index 6181ee8ee..f2f376a7c 100644 --- a/vp8/vp8_cx_iface.c +++ b/vp8/vp8_cx_iface.c @@ -335,6 +335,10 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf, oxcf->under_shoot_pct = cfg.rc_undershoot_pct; oxcf->over_shoot_pct = cfg.rc_overshoot_pct; + oxcf->maximum_buffer_size_in_ms = cfg.rc_buf_sz; + oxcf->starting_buffer_level_in_ms = cfg.rc_buf_initial_sz; + oxcf->optimal_buffer_level_in_ms = cfg.rc_buf_optimal_sz; + oxcf->maximum_buffer_size = cfg.rc_buf_sz; oxcf->starting_buffer_level = cfg.rc_buf_initial_sz; oxcf->optimal_buffer_level = cfg.rc_buf_optimal_sz; @@ -1237,7 +1241,7 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] = /* keyframing settings (kf) */ VPX_KF_AUTO, /* g_kfmode*/ 0, /* kf_min_dist */ - 9999, /* kf_max_dist */ + 128, /* kf_max_dist */ #if VPX_ENCODER_ABI_VERSION == (1 + VPX_CODEC_ABI_VERSION) 1, /* g_delete_first_pass_file */ diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c index fbe58171c..de2714317 100644 --- a/vp8/vp8_dx_iface.c +++ b/vp8/vp8_dx_iface.c @@ -412,7 +412,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) { ctx->postproc_cfg.post_proc_flag = - VP8_DEBLOCK | VP8_DEMACROBLOCK; + VP8_DEBLOCK | VP8_DEMACROBLOCK | VP8_MFQE; ctx->postproc_cfg.deblocking_level = 4; ctx->postproc_cfg.noise_level = 0; } @@ -700,6 +700,27 @@ static vpx_codec_err_t vp8_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_INVALID_PARAM; } +extern int vp8_references_buffer( VP8_COMMON *oci, int ref_frame ); +static vpx_codec_err_t vp8_get_last_ref_frame(vpx_codec_alg_priv_t *ctx, + int ctrl_id, + va_list args) +{ + int *ref_info = va_arg(args, int *); + VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi; + VP8_COMMON *oci = &pbi->common; + + if (ref_info) + { + *ref_info = + (vp8_references_buffer( oci, ALTREF_FRAME )?VP8_ALTR_FRAME:0) | + (vp8_references_buffer( oci, GOLDEN_FRAME )?VP8_GOLD_FRAME:0) | + (vp8_references_buffer( oci, LAST_FRAME )?VP8_LAST_FRAME:0); + + return VPX_CODEC_OK; + } + else + return VPX_CODEC_INVALID_PARAM; +} static vpx_codec_err_t vp8_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, int ctrl_id, diff --git a/vp8_scalable_patterns.c b/vp8_scalable_patterns.c index 41ecaa78b..65883ff80 100644 --- a/vp8_scalable_patterns.c +++ b/vp8_scalable_patterns.c @@ -129,27 +129,29 @@ int main(int argc, char **argv) { int got_data; int flags = 0; int i; + int pts = 0; // PTS starts at 0 + int frame_duration = 1; // 1 timebase tick per frame int layering_mode = 0; int frames_in_layer[MAX_LAYERS] = {0}; int layer_flags[MAX_PERIODICITY] = {0}; // Check usage and arguments - if (argc < 7) - die("Usage: %s <infile> <outfile> <width> <height> <mode> " - "<Rate_0> ... <Rate_nlayers-1>\n", argv[0]); + if (argc < 9) + die("Usage: %s <infile> <outfile> <width> <height> <rate_num> " + " <rate_den> <mode> <Rate_0> ... <Rate_nlayers-1>\n", argv[0]); width = strtol (argv[3], NULL, 0); height = strtol (argv[4], NULL, 0); if (width < 16 || width%2 || height <16 || height%2) die ("Invalid resolution: %d x %d", width, height); - if (!sscanf(argv[5], "%d", &layering_mode)) - die ("Invalid mode %s", argv[5]); + if (!sscanf(argv[7], "%d", &layering_mode)) + die ("Invalid mode %s", argv[7]); if (layering_mode<0 || layering_mode>6) - die ("Invalid mode (0..6) %s", argv[5]); + die ("Invalid mode (0..6) %s", argv[7]); - if (argc != 6+mode_to_num_layers[layering_mode]) + if (argc != 8+mode_to_num_layers[layering_mode]) die ("Invalid number of arguments"); if (!vpx_img_alloc (&raw, VPX_IMG_FMT_I420, width, height, 1)) @@ -168,8 +170,14 @@ int main(int argc, char **argv) { cfg.g_w = width; cfg.g_h = height; - for (i=6; i<6+mode_to_num_layers[layering_mode]; i++) - if (!sscanf(argv[i], "%d", &cfg.ts_target_bitrate[i-6])) + // Timebase format e.g. 30fps: numerator=1, demoninator=30 + if (!sscanf (argv[5], "%d", &cfg.g_timebase.num )) + die ("Invalid timebase numerator %s", argv[5]); + if (!sscanf (argv[6], "%d", &cfg.g_timebase.den )) + die ("Invalid timebase denominator %s", argv[6]); + + for (i=8; i<8+mode_to_num_layers[layering_mode]; i++) + if (!sscanf(argv[i], "%d", &cfg.ts_target_bitrate[i-8])) die ("Invalid data rate %s", argv[i]); // Real time parameters @@ -193,7 +201,7 @@ int main(int argc, char **argv) { cfg.kf_min_dist = cfg.kf_max_dist = 1000; // Temporal scaling parameters: - // NOTE: The 3 prediction frames cannot be used interchangebly due to + // NOTE: The 3 prediction frames cannot be used interchangeably due to // differences in the way they are handled throughout the code. The // frames should be allocated to layers in the order LAST, GF, ARF. // Other combinations work, but may produce slightly inferior results. @@ -210,14 +218,15 @@ int main(int argc, char **argv) { cfg.ts_rate_decimator[1] = 1; memcpy(cfg.ts_layer_id, ids, sizeof(ids)); +#if 1 // 0=L, 1=GF, Intra-layer prediction enabled layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_REF_ARF; -#if 0 - // 0=L, 1=GF, Intra-layer 1 prediction disabled +#else + // 0=L, 1=GF, Intra-layer prediction disabled layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; @@ -275,7 +284,7 @@ int main(int argc, char **argv) { case 3: { // 3-layers, 4-frame period - int ids[6] = {0,2,1,2}; + int ids[4] = {0,2,1,2}; cfg.ts_number_layers = 3; cfg.ts_periodicity = 4; cfg.ts_rate_decimator[0] = 4; @@ -295,13 +304,12 @@ int main(int argc, char **argv) { VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; break; - cfg.ts_rate_decimator[2] = 1; } case 4: { // 3-layers, 4-frame period - int ids[6] = {0,2,1,2}; + int ids[4] = {0,2,1,2}; cfg.ts_number_layers = 3; cfg.ts_periodicity = 4; cfg.ts_rate_decimator[0] = 4; @@ -326,7 +334,7 @@ int main(int argc, char **argv) { case 5: { // 3-layers, 4-frame period - int ids[6] = {0,2,1,2}; + int ids[4] = {0,2,1,2}; cfg.ts_number_layers = 3; cfg.ts_periodicity = 4; cfg.ts_rate_decimator[0] = 4; @@ -417,7 +425,7 @@ int main(int argc, char **argv) { flags = layer_flags[frame_cnt % cfg.ts_periodicity]; frame_avail = read_frame(infile, &raw); - if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, frame_cnt, + if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, pts, 1, flags, VPX_DL_REALTIME)) die_codec(&codec, "Failed to encode frame"); @@ -446,6 +454,7 @@ int main(int argc, char **argv) { fflush (stdout); } frame_cnt++; + pts += frame_duration; } printf ("\n"); fclose (infile); @@ -63,6 +63,7 @@ enum vp8_postproc_level VP8_DEBUG_TXT_MBLK_MODES = 1<<4, /**< print macro block modes over each macro block */ VP8_DEBUG_TXT_DC_DIFF = 1<<5, /**< print dc diff for each macro block */ VP8_DEBUG_TXT_RATE_INFO = 1<<6, /**< print video rate info (encoder only) */ + VP8_MFQE = 1<<10, }; /*!\brief post process flags diff --git a/vpx/vp8dx.h b/vpx/vp8dx.h index 1d9d53165..86610358c 100644 --- a/vpx/vp8dx.h +++ b/vpx/vp8dx.h @@ -55,6 +55,11 @@ enum vp8_dec_control_id /** check if the indicated frame is corrupted */ VP8D_GET_FRAME_CORRUPTED, + /** control function to get info on which reference frames were used + * by the last decode + */ + VP8D_GET_LAST_REF_USED, + VP8_DECODER_CTRL_ID_MAX } ; @@ -69,7 +74,7 @@ enum vp8_dec_control_id VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_UPDATES, int *) VPX_CTRL_USE_TYPE(VP8D_GET_FRAME_CORRUPTED, int *) - +VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_USED, int *) /*! @} - end defgroup vp8_decoder */ diff --git a/vpx_ports/vpxtypes.h b/vpx_ports/vpxtypes.h index c7ccc0510..f2fb08954 100644 --- a/vpx_ports/vpxtypes.h +++ b/vpx_ports/vpxtypes.h @@ -96,11 +96,6 @@ typedef unsigned __int64 vpxu64; # define PRId64 "lld" # define VPX64 PRId64 typedef long vpxs64; -#elif defined(__SYMBIAN32__) -# undef PRId64 -# define PRId64 "u" -# define VPX64 PRId64 -typedef unsigned int vpxs64; #else # error "64 bit integer type undefined for this platform!" #endif diff --git a/vpx_scale/arm/armv4/gen_scalers_armv4.asm b/vpx_scale/arm/armv4/gen_scalers_armv4.asm deleted file mode 100644 index e495184e7..000000000 --- a/vpx_scale/arm/armv4/gen_scalers_armv4.asm +++ /dev/null @@ -1,774 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |horizontal_line_4_5_scale_armv4| - EXPORT |vertical_band_4_5_scale_armv4| - EXPORT |horizontal_line_2_3_scale_armv4| - EXPORT |vertical_band_2_3_scale_armv4| - EXPORT |horizontal_line_3_5_scale_armv4| - EXPORT |vertical_band_3_5_scale_armv4| - EXPORT |horizontal_line_3_4_scale_armv4| - EXPORT |vertical_band_3_4_scale_armv4| - EXPORT |horizontal_line_1_2_scale_armv4| - EXPORT |vertical_band_1_2_scale_armv4| - - AREA |.text|, CODE, READONLY ; name this block of code - -src RN r0 -srcw RN r1 -dest RN r2 -mask RN r12 -c51_205 RN r10 -c102_154 RN r11 -;/**************************************************************************** -; * -; * ROUTINE : horizontal_line_4_5_scale_armv4 -; * -; * INPUTS : const unsigned char *source : Pointer to source data. -; * unsigned int source_width : Stride of source. -; * unsigned char *dest : Pointer to destination data. -; * unsigned int dest_width : Stride of destination (NOT USED). -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Copies horizontal line of pixels from source to -; * destination scaling up by 4 to 5. -; * -; * SPECIAL NOTES : None. -; * -; ****************************************************************************/ -;void horizontal_line_4_5_scale_armv4 -;( -; r0 = UINT8 *source -; r1 = UINT32 source_width -; r2 = UINT8 *dest -; r3 = UINT32 dest_width -;) -|horizontal_line_4_5_scale_armv4| PROC - stmdb sp!, {r4 - r11, lr} - - mov mask, #255 ; mask for selection - ldr c51_205, =0x3300cd - ldr c102_154, =0x66009a - - ldr r3, [src], #4 - -hl45_loop - - and r4, r3, mask ; a = src[0] - and r5, mask, r3, lsr #8 ; b = src[1] - strb r4, [dest], #1 - - orr r6, r4, r5, lsl #16 ; b | a - and r7, mask, r3, lsr #16 ; c = src[2] - mul r6, c51_205, r6 ; a * 51 + 205 * b - - orr r5, r5, r7, lsl #16 ; c | b - mul r5, c102_154, r5 ; b * 102 + 154 * c - add r6, r6, #0x8000 - and r8, mask, r3, lsr #24 ; d = src[3] - mov r6, r6, lsr #24 - strb r6, [dest], #1 - - orr r7, r8, r7, lsl #16 ; c | d - mul r7, c102_154, r7 ; c * 154 + 102 * d - add r5, r5, #0x8000 - ldr r3, [src], #4 - mov r5, r5, lsr #24 - strb r5, [dest], #1 - - add r7, r7, #0x8000 - and r9, mask, r3 ; e = src[4] - orr r9, r9, r8, lsl #16 ; d | e - mul r9, c51_205, r9 ; d * 205 + 51 * e - mov r7, r7, lsr #24 - strb r7, [dest], #1 - - add r9, r9, #0x8000 - subs srcw, srcw, #4 - mov r9, r9, lsr #24 - strb r9, [dest], #1 - - bne hl45_loop - - and r4, r3, mask - and r5, mask, r3, lsl #8 - strb r4, [dest], #1 - - orr r6, r4, r5, lsl #16 ; b | a - mul r6, c51_205, r6 - - and r7, mask, r3, lsl #16 - orr r5, r5, r7, lsl #16 ; c | b - mul r5, c102_154, r5 - add r6, r6, #0x8000 - and r8, mask, r3, lsl #24 - mov r6, r6, lsr #24 - strb r6, [dest], #1 - - orr r7, r8, r7, lsl #16 ; c | d - mul r7, c102_154, r7 - add r5, r5, #0x8000 - mov r5, r5, lsr #24 - strb r5, [dest], #1 - - add r7, r7, #0x8000 - mov r7, r7, lsr #24 - strb r7, [dest], #1 - - ldrb r3, [src] - strb r3, [dest], #1 - - ldmia sp!, {r4 - r11, pc} - ENDP ;|vp8cx_horizontal_line_4_5_scale_c| - -;/**************************************************************************** -; * -; * ROUTINE : vertical_band_4_5_scale_armv4 -; * -; * INPUTS : unsigned char *dest : Pointer to destination data. -; * unsigned int dest_pitch : Stride of destination data. -; * unsigned int dest_width : Width of destination data. -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Scales vertical band of pixels by scale 4 to 5. The -; * height of the band scaled is 4-pixels. -; * -; * SPECIAL NOTES : The routine uses the first line of the band below -; * the current band. -; * -; ****************************************************************************/ -;void vertical_band_4_5_scale_armv4 -;( -; r0 = UINT8 *dest -; r1 = UINT32 dest_pitch -; r2 = UINT32 dest_width -;) -|vertical_band_4_5_scale_armv4| PROC - stmdb sp!, {r4 - r11, lr} - - ldr c51_205, =0x3300cd - ldr c102_154, =0x66009a - -vl45_loop - mov r3, src - ldrb r4, [r3], r1 ; a = des [0] - ldrb r5, [r3], r1 ; b = des [dest_pitch] - ldrb r7, [r3], r1 ; c = des[dest_pitch*2] - add lr, src, r1 - - orr r6, r4, r5, lsl #16 ; b | a - mul r6, c51_205, r6 ; a * 51 + 205 * b - - ldrb r8, [r3], r1 ; d = des[dest_pitch*3] - orr r5, r5, r7, lsl #16 ; c | b - mul r5, c102_154, r5 ; b * 102 + 154 * c - add r6, r6, #0x8000 - orr r7, r8, r7, lsl #16 ; c | d - mov r6, r6, lsr #24 - strb r6, [lr], r1 - - ldrb r9, [r3, r1] ; e = des [dest_pitch * 5] - mul r7, c102_154, r7 ; c * 154 + 102 * d - add r5, r5, #0x8000 - orr r9, r9, r8, lsl #16 ; d | e - mov r5, r5, lsr #24 - strb r5, [lr], r1 - - mul r9, c51_205, r9 ; d * 205 + 51 * e - add r7, r7, #0x8000 - add src, src, #1 - mov r7, r7, lsr #24 - strb r7, [lr], r1 - - add r9, r9, #0x8000 - subs r2, r2, #1 - mov r9, r9, lsr #24 - strb r9, [lr], r1 - - bne vl45_loop - - ldmia sp!, {r4 - r11, pc} - ENDP ;|vertical_band_4_5_scale_armv4| - -;/**************************************************************************** -; * -; * ROUTINE : horizontal_line_2_3_scale_armv4 -; * -; * INPUTS : const unsigned char *source : Pointer to source data. -; * unsigned int source_width : Stride of source. -; * unsigned char *dest : Pointer to destination data. -; * unsigned int dest_width : Stride of destination (NOT USED). -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Copies horizontal line of pixels from source to -; * destination scaling up by 2 to 3. -; * -; * SPECIAL NOTES : None. -; * -; * -; ****************************************************************************/ -;void horizontal_line_2_3_scale_armv4 -;( -; const unsigned char *source, -; unsigned int source_width, -; unsigned char *dest, -; unsigned int dest_width -;) -|horizontal_line_2_3_scale_armv4| PROC - stmdb sp!, {r4 - r11, lr} - ldr lr, =85 - ldr r12, =171 - -hl23_loop - - ldrb r3, [src], #1 ; a - ldrb r4, [src], #1 ; b - ldrb r5, [src] ; c - - strb r3, [dest], #1 - mul r4, r12, r4 ; b * 171 - mla r6, lr, r3, r4 ; a * 85 - mla r7, lr, r5, r4 ; c * 85 - - add r6, r6, #128 - mov r6, r6, lsr #8 - strb r6, [dest], #1 - - add r7, r7, #128 - mov r7, r7, lsr #8 - strb r7, [dest], #1 - - subs srcw, srcw, #2 - bne hl23_loop - - ldrb r4, [src, #1] ; b - strb r5, [dest], #1 - strb r4, [dest, #1] - - mul r4, r12, r4 ; b * 171 - mla r6, lr, r5, r4 ; a * 85 + b *171 - - add r6, r6, #128 - mov r6, r6, lsr #8 - strb r6, [dest] - - ldmia sp!, {r4 - r11, pc} - ENDP ;|horizontal_line_2_3_scale_armv4| - -;/**************************************************************************** -; * -; * ROUTINE : vertical_band_2_3_scale_armv4 -; * -; * INPUTS : unsigned char *dest : Pointer to destination data. -; * unsigned int dest_pitch : Stride of destination data. -; * unsigned int dest_width : Width of destination data. -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Scales vertical band of pixels by scale 2 to 3. The -; * height of the band scaled is 2-pixels. -; * -; * SPECIAL NOTES : The routine uses the first line of the band below -; * the current band. -; * -; ****************************************************************************/ -;void vertical_band_2_3_scale_armv4 -;( -; r0 = UINT8 *dest -; r1 = UINT32 dest_pitch -; r2 = UINT32 dest_width -;) -|vertical_band_2_3_scale_armv4| PROC - stmdb sp!, {r4 - r8, lr} - ldr lr, =85 - ldr r12, =171 - add r3, r1, r1, lsl #1 ; 3 * dest_pitch - -vl23_loop - ldrb r4, [src] ; a = des [0] - ldrb r5, [src, r1] ; b = des [dest_pitch] - ldrb r7, [src, r3] ; c = des [dest_pitch*3] - subs r2, r2, #1 - - mul r5, r12, r5 ; b * 171 - mla r6, lr, r4, r5 ; a * 85 - mla r8, lr, r7, r5 ; c * 85 - - add r6, r6, #128 - mov r6, r6, lsr #8 - strb r6, [src, r1] - - add r8, r8, #128 - mov r8, r8, lsr #8 - strb r8, [src, r1, lsl #1] - - add src, src, #1 - - bne vl23_loop - - ldmia sp!, {r4 - r8, pc} - ENDP ;|vertical_band_2_3_scale_armv4| - -;/**************************************************************************** -; * -; * ROUTINE : vp8cx_horizontal_line_3_5_scale_c -; * -; * INPUTS : const unsigned char *source : Pointer to source data. -; * unsigned int source_width : Stride of source. -; * unsigned char *dest : Pointer to destination data. -; * unsigned int dest_width : Stride of destination (NOT USED). -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Copies horizontal line of pixels from source to -; * destination scaling up by 3 to 5. -; * -; * SPECIAL NOTES : None. -; * -; * -; ****************************************************************************/ -;void vp8cx_horizontal_line_3_5_scale_c -;( -; const unsigned char *source, -; unsigned int source_width, -; unsigned char *dest, -; unsigned int dest_width -;) -|horizontal_line_3_5_scale_armv4| PROC - stmdb sp!, {r4 - r11, lr} - - ldr c51_205, =0x3300cd - ldr c102_154, =0x66009a - - ldrb r4, [src], #1 ; a = src[0] - -hl35_loop - - ldrb r8, [src], #1 ; b = src[1] - strb r4, [dest], #1 - - orr r6, r4, r8, lsl #16 ; b | a - ldrb r9, [src], #1 ; c = src[2] - mul r6, c102_154, r6 ; a * 102 + 154 * b - - orr r5, r9, r8, lsl #16 ; b | c - mul r5, c51_205, r5 ; b * 205 + 51 * c - add r6, r6, #0x8000 - ldrb r4, [src], #1 ; d = src[3] - mov r6, r6, lsr #24 - strb r6, [dest], #1 - - orr r7, r8, r9, lsl #16 ; c | b - mul r7, c51_205, r7 ; c * 205 + 154 * b - add r5, r5, #0x8000 - mov r5, r5, lsr #24 - strb r5, [dest], #1 - - orr r9, r4, r9, lsl #16 ; c | d - mul r9, c102_154, r9 ; c * 154 + 102 * d - add r7, r7, #0x8000 - mov r7, r7, lsr #24 - strb r7, [dest], #1 - - add r9, r9, #0x8000 - subs srcw, srcw, #3 - mov r9, r9, lsr #24 - strb r9, [dest], #1 - - bpl hl35_loop - - ldrb r5, [src], #1 ; b = src[1] - strb r4, [dest], #1 - - orr r6, r4, r8, lsl #16 ; b | a - ldrb r9, [src], #1 ; c = src[2] - mul r6, c102_154, r6 ; a * 102 + 154 * b - - orr r5, r9, r8, lsl #16 ; b | c - mul r5, c51_205, r5 ; b * 205 + 51 * c - add r6, r6, #0x8000 - mov r6, r6, lsr #24 - strb r6, [dest], #1 - - orr r7, r8, r9, lsl #16 ; c | b - mul r7, c51_205, r7 ; c * 205 + 154 * b - add r5, r5, #0x8000 - mov r5, r5, lsr #24 - strb r5, [dest], #1 - - add r7, r7, #0x8000 - mov r7, r7, lsr #24 - strb r7, [dest], #1 - strb r9, [dest], #1 - - ldmia sp!, {r4 - r11, pc} - ENDP ;|vp8cx_horizontal_line_3_5_scale_c| - - -;/**************************************************************************** -; * -; * ROUTINE : vp8cx_vertical_band_3_5_scale_c -; * -; * INPUTS : unsigned char *dest : Pointer to destination data. -; * unsigned int dest_pitch : Stride of destination data. -; * unsigned int dest_width : Width of destination data. -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Scales vertical band of pixels by scale 3 to 5. The -; * height of the band scaled is 3-pixels. -; * -; * SPECIAL NOTES : The routine uses the first line of the band below -; * the current band. -; * -; ****************************************************************************/ -;void vertical_band_4_5_scale_armv4 -;( -; r0 = UINT8 *dest -; r1 = UINT32 dest_pitch -; r2 = UINT32 dest_width -;) -|vertical_band_3_5_scale_armv4| PROC - stmdb sp!, {r4 - r11, lr} - - ldr c51_205, =0x3300cd - ldr c102_154, =0x66009a - -vl35_loop - mov r3, src - ldrb r4, [r3], r1 ; a = des [0] - ldrb r5, [r3], r1 ; b = des [dest_pitch] - ldrb r7, [r3], r1 ; c = des[dest_pitch*2] - add lr, src, r1 - - orr r8, r4, r5, lsl #16 ; b | a - mul r6, c102_154, r8 ; a * 102 + 154 * b - - ldrb r8, [r3, r1, lsl #1] ; d = des[dest_pitch*5] - orr r3, r7, r5, lsl #16 ; b | c - mul r9, c51_205, r3 ; b * 205 + 51 * c - add r6, r6, #0x8000 - orr r3, r5, r7, lsl #16 ; c | b - mov r6, r6, lsr #24 - strb r6, [lr], r1 - - mul r5, c51_205, r3 ; c * 205 + 154 * b - add r9, r9, #0x8000 - orr r3, r8, r7, lsl #16 ; c | d - mov r9, r9, lsr #24 - strb r9, [lr], r1 - - mul r7, c102_154, r3 ; c * 154 + 102 * d - add r5, r5, #0x8000 - add src, src, #1 - mov r5, r5, lsr #24 - strb r5, [lr], r1 - - add r7, r7, #0x8000 - subs r2, r2, #1 - mov r7, r7, lsr #24 - strb r7, [lr], r1 - - - bne vl35_loop - - ldmia sp!, {r4 - r11, pc} - ENDP ;|vertical_band_3_5_scale_armv4| - -;/**************************************************************************** -; * -; * ROUTINE : horizontal_line_3_4_scale_armv4 -; * -; * INPUTS : const unsigned char *source : Pointer to source data. -; * unsigned int source_width : Stride of source. -; * unsigned char *dest : Pointer to destination data. -; * unsigned int dest_width : Stride of destination (NOT USED). -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Copies horizontal line of pixels from source to -; * destination scaling up by 3 to 4. -; * -; * SPECIAL NOTES : None. -; * -; * -; ****************************************************************************/ -;void horizontal_line_3_4_scale_armv4 -;( -; const unsigned char *source, -; unsigned int source_width, -; unsigned char *dest, -; unsigned int dest_width -;) -|horizontal_line_3_4_scale_armv4| PROC - stmdb sp!, {r4 - r11, lr} - - ldr r10, =64 - ldr r11, =192 - mov r9, #128 - - ldrb r4, [src], #1 ; a = src[0] - -hl34_loop - - ldrb r8, [src], #1 ; b = src[1] - ldrb r7, [src], #1 ; c = src[2] - strb r4, [dest], #1 - - mla r4, r10, r4, r9 ; a*64 + 128 - mla r4, r11, r8, r4 ; a*64 + b*192 + 1 - - add r8, r8, #1 ; b + 1 - add r8, r8, r7 ; b + c + 1 - mov r8, r8, asr #1 ; (b + c + 1) >> 1 - - mov r4, r4, asr #8 ; (a*64 + b*192 + 1) >> 8 - strb r4, [dest], #1 - - strb r8, [dest], #1 - - ldrb r4, [src], #1 ; [a+1] - - mla r7, r11, r7, r9 ; c*192 + 128 - mla r7, r4, r10, r7 ; a*64 + b*192 + 128 - - subs srcw, srcw, #3 - - mov r7, r7, asr #8 ; (a*64 + b*192 + 128) >> 8 - strb r7, [dest], #1 - - bpl hl34_loop - - ldrb r8, [src], #1 ; b = src[1] - ldrb r7, [src], #1 ; c = src[2] - strb r4, [dest], #1 - - mla r4, r10, r4, r9 ; a*64 + 128 - mla r4, r11, r8, r4 ; a*64 + b*192 + 1 - mov r4, r4, asr #8 ; (a*64 + b*192 + 1) >> 8 - strb r4, [dest], #1 - - add r8, r8, #1 ; b + 1 - add r8, r8, r7 ; b + c + 1 - mov r8, r8, asr #1 ; (b + c + 1) >> 1 - strb r8, [dest], #1 - strb r7, [dest], #1 - - ldmia sp!, {r4 - r11, pc} - ENDP ;|vp8cx_horizontal_line_3_4_scale_c| - - -;/**************************************************************************** -; * -; * ROUTINE : vertical_band_3_4_scale_armv4 -; * -; * INPUTS : unsigned char *dest : Pointer to destination data. -; * unsigned int dest_pitch : Stride of destination data. -; * unsigned int dest_width : Width of destination data. -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Scales vertical band of pixels by scale 3 to 4. The -; * height of the band scaled is 3-pixels. -; * -; * SPECIAL NOTES : The routine uses the first line of the band below -; * the current band. -; * -; ****************************************************************************/ -;void vertical_band_3_4_scale_armv4 -;( -; r0 = UINT8 *dest -; r1 = UINT32 dest_pitch -; r2 = UINT32 dest_width -;) -|vertical_band_3_4_scale_armv4| PROC - stmdb sp!, {r4 - r11, lr} - - ldr r10, =64 - ldr r11, =192 - mov r9, #128 - -; ldr r1,[r1] -vl34_loop - mov r3, src - ldrb r4, [r3], r1 ; a = des [0] - ldrb r5, [r3], r1 ; b = des [dest_pitch] - ldrb r7, [r3], r1 ; c = des [dest_pitch*2] - add lr, src, r1 - - mla r4, r10, r4, r9 ; a*64 + 128 - mla r4, r11, r5, r4 ; a*64 + b*192 + 1 - - add r5, r5, #1 ; b + 1 - add r5, r5, r7 ; b + c + 1 - mov r5, r5, asr #1 ; (b + c + 1) >> 1 - - mov r4, r4, asr #8 ; (a*64 + b*192 + 1) >> 8 - strb r4, [lr], r1 - - ldrb r4, [r3, r1] ; a = des [dest_pitch*4] - - strb r5, [lr], r1 - - mla r7, r11, r7, r9 ; c*192 + 128 - mla r7, r4, r10, r7 ; a*64 + b*192 + 128 - mov r7, r7, asr #8 ; (a*64 + b*192 + 128) >> 8 - - add src, src, #1 - subs r2, r2, #1 - - strb r7, [lr] - - bne vl34_loop - - ldmia sp!, {r4 - r11, pc} - ENDP ;|vertical_band_3_4_scale_armv4| - -;/**************************************************************************** -; * -; * ROUTINE : vp8cx_horizontal_line_1_2_scale_c -; * -; * INPUTS : const unsigned char *source : Pointer to source data. -; * unsigned int source_width : Stride of source. -; * unsigned char *dest : Pointer to destination data. -; * unsigned int dest_width : Stride of destination (NOT USED). -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Copies horizontal line of pixels from source to -; * destination scaling up by 1 to 2. -; * -; * SPECIAL NOTES : None. -; * -; ****************************************************************************/ -;void vp8cx_horizontal_line_1_2_scale_c -;( -; const unsigned char *source, -; unsigned int source_width, -; unsigned char *dest, -; unsigned int dest_width -;) -|horizontal_line_1_2_scale_armv4| PROC - stmdb sp!, {r4 - r5, lr} - - sub srcw, srcw, #1 - - ldrb r3, [src], #1 - ldrb r4, [src], #1 -hl12_loop - subs srcw, srcw, #1 - - add r5, r3, r4 - add r5, r5, #1 - mov r5, r5, lsr #1 - - orr r5, r3, r5, lsl #8 - strh r5, [dest], #2 - - mov r3, r4 - - ldrneb r4, [src], #1 - bne hl12_loop - - orr r5, r4, r4, lsl #8 - strh r5, [dest] - - ldmia sp!, {r4 - r5, pc} - ENDP ;|vertical_band_3_5_scale_armv4| - -;/**************************************************************************** -; * -; * ROUTINE : vp8cx_vertical_band_1_2_scale_c -; * -; * INPUTS : unsigned char *dest : Pointer to destination data. -; * unsigned int dest_pitch : Stride of destination data. -; * unsigned int dest_width : Width of destination data. -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Scales vertical band of pixels by scale 1 to 2. The -; * height of the band scaled is 1-pixel. -; * -; * SPECIAL NOTES : The routine uses the first line of the band below -; * the current band. -; * -; ****************************************************************************/ -;void vp8cx_vertical_band_1_2_scale_c -;( -; r0 = UINT8 *dest -; r1 = UINT32 dest_pitch -; r2 = UINT32 dest_width -;) -|vertical_band_1_2_scale_armv4| PROC - stmdb sp!, {r4 - r7, lr} - - ldr mask, =0xff00ff ; mask for selection - ldr lr, = 0x010001 - -vl12_loop - mov r3, src - ldr r4, [r3], r1 - ldr r5, [r3, r1] - - add src, src, #4 - subs r2, r2, #4 - - and r6, r4, mask - and r7, r5, mask - - add r6, r7, r6 - add r6, r6, lr - - and r4, mask, r4, lsr #8 - and r5, mask, r5, lsr #8 - - mov r6, r6, lsr #1 - and r6, r6, mask - - add r4, r5, r4 - add r4, r4, lr - - mov r4, r4, lsr #1 - and r4, r4, mask - - orr r5, r6, r4, lsl #8 - - str r5, [r3] - - bpl vl12_loop - - ldmia sp!, {r4 - r7, pc} - ENDP ;|vertical_band_3_5_scale_armv4| - - END diff --git a/vpx_scale/generic/vpxscale.c b/vpx_scale/generic/vpxscale.c index 13c9122f0..29b130876 100644 --- a/vpx_scale/generic/vpxscale.c +++ b/vpx_scale/generic/vpxscale.c @@ -27,7 +27,6 @@ /**************************************************************************** * Exports ****************************************************************************/ -#ifndef VPX_NO_GLOBALS void (*vp8_vertical_band_4_5_scale)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) = 0; void (*vp8_last_vertical_band_4_5_scale)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) = 0; void (*vp8_vertical_band_2_3_scale)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) = 0; @@ -51,9 +50,6 @@ void (*vp8_vertical_band_2_1_scale_i)(unsigned char *source, unsigned int src_pi void (*vp8_horizontal_line_2_1_scale)(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width) = 0; void (*vp8_horizontal_line_5_3_scale)(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width) = 0; void (*vp8_horizontal_line_5_4_scale)(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width) = 0; -#else -# include "vpxscale_nofp.h" -#endif typedef struct { diff --git a/vpx_scale/include/arm/vpxscale_nofp.h b/vpx_scale/include/arm/vpxscale_nofp.h deleted file mode 100644 index 3e1a9fa83..000000000 --- a/vpx_scale/include/arm/vpxscale_nofp.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -void vp8cx_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_last_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_last_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_last_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_last_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_horizontal_line_1_2_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vp8cx_horizontal_line_3_4_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vp8cx_horizontal_line_3_5_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vp8cx_horizontal_line_2_3_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vp8cx_horizontal_line_4_5_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vp8cx_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_last_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); - -void vp8cx_vertical_band_5_4_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_vertical_band_5_3_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_vertical_band_2_1_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_vertical_band_2_1_scale_i_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_horizontal_line_2_1_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vp8cx_horizontal_line_5_3_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vp8cx_horizontal_line_5_4_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); - -void horizontal_line_4_5_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void horizontal_line_2_3_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void horizontal_line_3_5_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void horizontal_line_3_4_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void horizontal_line_1_2_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vertical_band_4_5_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vertical_band_2_3_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vertical_band_3_5_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vertical_band_3_4_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vertical_band_1_2_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); - -#define vp8_vertical_band_4_5_scale vertical_band_4_5_scale_armv4 -#define vp8_last_vertical_band_4_5_scale vp8cx_last_vertical_band_4_5_scale_c -#define vp8_vertical_band_2_3_scale vertical_band_2_3_scale_armv4 -#define vp8_last_vertical_band_2_3_scale vp8cx_last_vertical_band_2_3_scale_c -#define vp8_vertical_band_3_5_scale vertical_band_3_5_scale_armv4 -#define vp8_last_vertical_band_3_5_scale vp8cx_last_vertical_band_3_5_scale_c -#define vp8_vertical_band_3_4_scale vertical_band_3_4_scale_armv4 -#define vp8_last_vertical_band_3_4_scale vp8cx_last_vertical_band_3_4_scale_c -#define vp8_horizontal_line_1_2_scale horizontal_line_1_2_scale_armv4 -#define vp8_horizontal_line_3_5_scale horizontal_line_3_5_scale_armv4 -#define vp8_horizontal_line_3_4_scale horizontal_line_3_4_scale_armv4 -#define vp8_horizontal_line_4_5_scale horizontal_line_4_5_scale_armv4 -#define vp8_horizontal_line_2_3_scale horizontal_line_2_3_scale_armv4 -#define vp8_vertical_band_1_2_scale vertical_band_1_2_scale_armv4 -#define vp8_last_vertical_band_1_2_scale vp8cx_last_vertical_band_1_2_scale_c -#define vp8_vertical_band_5_4_scale vp8cx_vertical_band_5_4_scale_c -#define vp8_vertical_band_5_3_scale vp8cx_vertical_band_5_3_scale_c -#define vp8_vertical_band_2_1_scale vp8cx_vertical_band_2_1_scale_c -#define vp8_vertical_band_2_1_scale_i vp8cx_vertical_band_2_1_scale_i_c -#define vp8_horizontal_line_2_1_scale vp8cx_horizontal_line_2_1_scale_c -#define vp8_horizontal_line_5_3_scale vp8cx_horizontal_line_5_3_scale_c -#define vp8_horizontal_line_5_4_scale vp8cx_horizontal_line_5_4_scale_c diff --git a/vpx_scale/include/generic/vpxscale_nofp.h b/vpx_scale/include/generic/vpxscale_nofp.h deleted file mode 100644 index 7b8205a1b..000000000 --- a/vpx_scale/include/generic/vpxscale_nofp.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -void vp8cx_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_last_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_last_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_last_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_horizontal_line_1_2_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vp8cx_horizontal_line_3_5_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vp8cx_horizontal_line_2_3_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vp8cx_horizontal_line_4_5_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vp8cx_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_last_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); - -void vp8cx_vertical_band_5_4_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_vertical_band_5_3_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_vertical_band_2_1_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_vertical_band_2_1_scale_i_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_horizontal_line_2_1_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vp8cx_horizontal_line_5_3_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vp8cx_horizontal_line_5_4_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); - -#define vp8_vertical_band_4_5_scale vp8cx_vertical_band_4_5_scale_c -#define vp8_last_vertical_band_4_5_scale vp8cx_last_vertical_band_4_5_scale_c -#define vp8_vertical_band_2_3_scale vp8cx_vertical_band_2_3_scale_c -#define vp8_last_vertical_band_2_3_scale vp8cx_last_vertical_band_2_3_scale_c -#define vp8_vertical_band_3_5_scale vp8cx_vertical_band_3_5_scale_c -#define vp8_last_vertical_band_3_5_scale vp8cx_last_vertical_band_3_5_scale_c -#define vp8_horizontal_line_1_2_scale vp8cx_horizontal_line_1_2_scale_c -#define vp8_horizontal_line_3_5_scale vp8cx_horizontal_line_3_5_scale_c -#define vp8_horizontal_line_4_5_scale vp8cx_horizontal_line_4_5_scale_c -#define vp8_horizontal_line_2_3_scale vp8cx_horizontal_line_2_3_scale_c -#define vp8_vertical_band_1_2_scale vp8cx_vertical_band_1_2_scale_c -#define vp8_last_vertical_band_1_2_scale vp8cx_last_vertical_band_1_2_scale_c -#define vp8_vertical_band_5_4_scale vp8cx_vertical_band_5_4_scale_c -#define vp8_vertical_band_5_3_scale vp8cx_vertical_band_5_3_scale_c -#define vp8_vertical_band_2_1_scale vp8cx_vertical_band_2_1_scale_c -#define vp8_vertical_band_2_1_scale_i vp8cx_vertical_band_2_1_scale_i_c -#define vp8_horizontal_line_2_1_scale vp8cx_horizontal_line_2_1_scale_c -#define vp8_horizontal_line_5_3_scale vp8cx_horizontal_line_5_3_scale_c -#define vp8_horizontal_line_5_4_scale vp8cx_horizontal_line_5_4_scale_c diff --git a/vpx_scale/include/symbian/vpxscale_nofp.h b/vpx_scale/include/symbian/vpxscale_nofp.h deleted file mode 100644 index 3e1a9fa83..000000000 --- a/vpx_scale/include/symbian/vpxscale_nofp.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -void vp8cx_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_last_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_last_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_last_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_last_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_horizontal_line_1_2_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vp8cx_horizontal_line_3_4_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vp8cx_horizontal_line_3_5_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vp8cx_horizontal_line_2_3_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vp8cx_horizontal_line_4_5_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vp8cx_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_last_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); - -void vp8cx_vertical_band_5_4_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_vertical_band_5_3_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_vertical_band_2_1_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_vertical_band_2_1_scale_i_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vp8cx_horizontal_line_2_1_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vp8cx_horizontal_line_5_3_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vp8cx_horizontal_line_5_4_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); - -void horizontal_line_4_5_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void horizontal_line_2_3_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void horizontal_line_3_5_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void horizontal_line_3_4_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void horizontal_line_1_2_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vertical_band_4_5_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vertical_band_2_3_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vertical_band_3_5_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vertical_band_3_4_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vertical_band_1_2_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); - -#define vp8_vertical_band_4_5_scale vertical_band_4_5_scale_armv4 -#define vp8_last_vertical_band_4_5_scale vp8cx_last_vertical_band_4_5_scale_c -#define vp8_vertical_band_2_3_scale vertical_band_2_3_scale_armv4 -#define vp8_last_vertical_band_2_3_scale vp8cx_last_vertical_band_2_3_scale_c -#define vp8_vertical_band_3_5_scale vertical_band_3_5_scale_armv4 -#define vp8_last_vertical_band_3_5_scale vp8cx_last_vertical_band_3_5_scale_c -#define vp8_vertical_band_3_4_scale vertical_band_3_4_scale_armv4 -#define vp8_last_vertical_band_3_4_scale vp8cx_last_vertical_band_3_4_scale_c -#define vp8_horizontal_line_1_2_scale horizontal_line_1_2_scale_armv4 -#define vp8_horizontal_line_3_5_scale horizontal_line_3_5_scale_armv4 -#define vp8_horizontal_line_3_4_scale horizontal_line_3_4_scale_armv4 -#define vp8_horizontal_line_4_5_scale horizontal_line_4_5_scale_armv4 -#define vp8_horizontal_line_2_3_scale horizontal_line_2_3_scale_armv4 -#define vp8_vertical_band_1_2_scale vertical_band_1_2_scale_armv4 -#define vp8_last_vertical_band_1_2_scale vp8cx_last_vertical_band_1_2_scale_c -#define vp8_vertical_band_5_4_scale vp8cx_vertical_band_5_4_scale_c -#define vp8_vertical_band_5_3_scale vp8cx_vertical_band_5_3_scale_c -#define vp8_vertical_band_2_1_scale vp8cx_vertical_band_2_1_scale_c -#define vp8_vertical_band_2_1_scale_i vp8cx_vertical_band_2_1_scale_i_c -#define vp8_horizontal_line_2_1_scale vp8cx_horizontal_line_2_1_scale_c -#define vp8_horizontal_line_5_3_scale vp8cx_horizontal_line_5_3_scale_c -#define vp8_horizontal_line_5_4_scale vp8cx_horizontal_line_5_4_scale_c diff --git a/vpx_scale/include/vpxscale_nofp.h b/vpx_scale/include/vpxscale_nofp.h deleted file mode 100644 index a704bd92c..000000000 --- a/vpx_scale/include/vpxscale_nofp.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#if defined(__S60_V20__) || defined(__SYMBIAN32__) && !defined(__WINS__) -#include "symbian\vpxscale_nofp.h" -#else -#include "generic\vpxscale_nofp.h" -#endif diff --git a/vpx_scale/vpxscale.h b/vpx_scale/vpxscale.h index a13a65f57..1a4997c55 100644 --- a/vpx_scale/vpxscale.h +++ b/vpx_scale/vpxscale.h @@ -61,19 +61,6 @@ extern void (*vp8_horizontal_line_2_1_scale)(const unsigned char *source, unsign extern void (*vp8_horizontal_line_5_3_scale)(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); extern void (*vp8_horizontal_line_5_4_scale)(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void horizontal_line_4_5_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void horizontal_line_2_3_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void horizontal_line_3_5_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void horizontal_line_3_4_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void horizontal_line_1_2_scale_armv4(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -void vertical_band_4_5_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vertical_band_2_3_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vertical_band_3_5_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vertical_band_3_4_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -void vertical_band_1_2_scale_armv4(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); - - -extern void dmachine_specific_config(int mmx_enabled, int xmm_enabled, int wmt_enabled); extern void vp8_yv12_scale_or_center ( YV12_BUFFER_CONFIG *src_yuv_config, @@ -124,11 +124,13 @@ static const arg_def_t pp_disp_b_modes = ARG_DEF(NULL, "pp-dbg-b-modes", 1, "Display only selected block modes"); static const arg_def_t pp_disp_mvs = ARG_DEF(NULL, "pp-dbg-mvs", 1, "Draw only selected motion vectors"); +static const arg_def_t mfqe = ARG_DEF(NULL, "mfqe", 0, + "Enable multiframe quality enhancement"); static const arg_def_t *vp8_pp_args[] = { &addnoise_level, &deblock, &demacroblock_level, &pp_debug_info, - &pp_disp_ref_frame, &pp_disp_mb_modes, &pp_disp_b_modes, &pp_disp_mvs, + &pp_disp_ref_frame, &pp_disp_mb_modes, &pp_disp_b_modes, &pp_disp_mvs, &mfqe, NULL }; #endif @@ -803,6 +805,11 @@ int main(int argc, const char **argv_) postproc = 1; vp8_pp_cfg.post_proc_flag |= VP8_DEBLOCK; } + else if (arg_match(&arg, &mfqe, argi)) + { + postproc = 1; + vp8_pp_cfg.post_proc_flag |= VP8_MFQE; + } else if (arg_match(&arg, &pp_debug_info, argi)) { unsigned int level = arg_parse_uint(&arg); |