summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--vp8/common/generic/systemdependent.c6
-rw-r--r--vp8/common/recon.h25
-rw-r--r--vp8/common/reconinter.c104
-rw-r--r--vp8/common/reconinter.h1
-rw-r--r--vp8/common/reconintra.h5
-rw-r--r--vp8/common/reconintra4x4.c2
-rw-r--r--vp8/common/x86/recon_sse2.asm408
-rw-r--r--vp8/common/x86/recon_wrapper_sse2.c90
-rw-r--r--vp8/common/x86/recon_x86.h22
-rw-r--r--vp8/common/x86/x86_systemdependent.c9
-rw-r--r--vp8/decoder/dboolhuff.h50
-rw-r--r--vp8/decoder/decodframe.c7
-rw-r--r--vp8/encoder/encodeframe.c2
-rw-r--r--vp8/encoder/encodeintra.c5
-rw-r--r--vp8/encoder/onyx_if.c85
-rw-r--r--vp8/encoder/onyx_int.h3
-rw-r--r--vp8/encoder/pickinter.c3
-rw-r--r--vp8/encoder/ratectrl.c219
-rw-r--r--vp8/encoder/ratectrl.h5
-rw-r--r--vp8/encoder/rdopt.c6
-rw-r--r--vp8/vp8_common.mk1
-rw-r--r--vpxdec.c15
-rw-r--r--vpxenc.c2
23 files changed, 716 insertions, 359 deletions
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index 2f8997953..fea6dcd23 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -43,6 +43,12 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
vp8_build_intra_predictors_mby;
rtcd->recon.build_intra_predictors_mby_s =
vp8_build_intra_predictors_mby_s;
+ rtcd->recon.build_intra_predictors_mbuv =
+ vp8_build_intra_predictors_mbuv;
+ rtcd->recon.build_intra_predictors_mbuv_s =
+ vp8_build_intra_predictors_mbuv_s;
+ rtcd->recon.intra4x4_predict =
+ vp8_intra4x4_predict;
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_c;
rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_c;
diff --git a/vp8/common/recon.h b/vp8/common/recon.h
index e608f218c..7cfc779cd 100644
--- a/vp8/common/recon.h
+++ b/vp8/common/recon.h
@@ -26,6 +26,9 @@
#define prototype_build_intra_predictors(sym) \
void sym(MACROBLOCKD *x)
+#define prototype_intra4x4_predict(sym) \
+ void sym(BLOCKD *x, int b_mode, unsigned char *predictor)
+
struct vp8_recon_rtcd_vtable;
#if ARCH_X86 || ARCH_X86_64
@@ -88,11 +91,30 @@ extern prototype_build_intra_predictors\
extern prototype_build_intra_predictors\
(vp8_recon_build_intra_predictors_mby_s);
+#ifndef vp8_recon_build_intra_predictors_mbuv
+#define vp8_recon_build_intra_predictors_mbuv vp8_build_intra_predictors_mbuv
+#endif
+extern prototype_build_intra_predictors\
+ (vp8_recon_build_intra_predictors_mbuv);
+
+#ifndef vp8_recon_build_intra_predictors_mbuv_s
+#define vp8_recon_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s
+#endif
+extern prototype_build_intra_predictors\
+ (vp8_recon_build_intra_predictors_mbuv_s);
+
+#ifndef vp8_recon_intra4x4_predict
+#define vp8_recon_intra4x4_predict vp8_intra4x4_predict
+#endif
+extern prototype_intra4x4_predict\
+ (vp8_recon_intra4x4_predict);
+
typedef prototype_copy_block((*vp8_copy_block_fn_t));
typedef prototype_recon_block((*vp8_recon_fn_t));
typedef prototype_recon_macroblock((*vp8_recon_mb_fn_t));
typedef prototype_build_intra_predictors((*vp8_build_intra_pred_fn_t));
+typedef prototype_intra4x4_predict((*vp8_intra4x4_pred_fn_t));
typedef struct vp8_recon_rtcd_vtable
{
vp8_copy_block_fn_t copy16x16;
@@ -105,6 +127,9 @@ typedef struct vp8_recon_rtcd_vtable
vp8_recon_mb_fn_t recon_mby;
vp8_build_intra_pred_fn_t build_intra_predictors_mby_s;
vp8_build_intra_pred_fn_t build_intra_predictors_mby;
+ vp8_build_intra_pred_fn_t build_intra_predictors_mbuv_s;
+ vp8_build_intra_pred_fn_t build_intra_predictors_mbuv;
+ vp8_intra4x4_pred_fn_t intra4x4_predict;
} vp8_recon_rtcd_vtable_t;
#if CONFIG_RUNTIME_CPU_DETECT
diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c
index 6862bae11..80b17acb6 100644
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -543,107 +543,3 @@ void vp8_build_inter16x16_predictors_mb_s(MACROBLOCKD *x)
RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, vdst_ptr, x->dst.uv_stride);
}
}
-void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
-{
- unsigned char *dst_ptr = x->dst.y_buffer;
-
- if (x->mode_info_context->mbmi.mode != SPLITMV)
- {
- vp8_build_inter16x16_predictors_mb_s(x);
- }
- else
- {
- /* note: this whole ELSE part is not executed at all. So, no way to test the correctness of my modification. Later,
- * if sth is wrong, go back to what it is in build_inter_predictors_mb.
- */
- int i;
-
- if (x->mode_info_context->mbmi.partitioning < 3)
- {
- for (i = 0; i < 4; i++)
- {
- unsigned char *ptr_base;
- unsigned char *ptr;
- BLOCKD *d = &x->block[bbb[i]];
-
- ptr_base = *(d->base_pre);
- ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
-
- if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
- {
- x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
- }
- else
- {
- RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
- }
- }
- }
- else
- {
- for (i = 0; i < 16; i += 2)
- {
- BLOCKD *d0 = &x->block[i];
- BLOCKD *d1 = &x->block[i+1];
-
- if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
- {
- /*build_inter_predictors2b(x, d0, 16);*/
- unsigned char *ptr_base;
- unsigned char *ptr;
-
- ptr_base = *(d0->base_pre);
- ptr = ptr_base + d0->pre + (d0->bmi.mv.as_mv.row >> 3) * d0->pre_stride + (d0->bmi.mv.as_mv.col >> 3);
-
- if (d0->bmi.mv.as_mv.row & 7 || d0->bmi.mv.as_mv.col & 7)
- {
- x->subpixel_predict8x4(ptr, d0->pre_stride, d0->bmi.mv.as_mv.col & 7, d0->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride);
- }
- else
- {
- RECON_INVOKE(&x->rtcd->recon, copy8x4)(ptr, d0->pre_stride, dst_ptr, x->dst.y_stride);
- }
- }
- else
- {
- vp8_build_inter_predictors_b_s(d0, dst_ptr, x->subpixel_predict);
- vp8_build_inter_predictors_b_s(d1, dst_ptr, x->subpixel_predict);
- }
- }
- }
-
- for (i = 16; i < 24; i += 2)
- {
- BLOCKD *d0 = &x->block[i];
- BLOCKD *d1 = &x->block[i+1];
-
- if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
- {
- /*build_inter_predictors2b(x, d0, 8);*/
- unsigned char *ptr_base;
- unsigned char *ptr;
-
- ptr_base = *(d0->base_pre);
- ptr = ptr_base + d0->pre + (d0->bmi.mv.as_mv.row >> 3) * d0->pre_stride + (d0->bmi.mv.as_mv.col >> 3);
-
- if (d0->bmi.mv.as_mv.row & 7 || d0->bmi.mv.as_mv.col & 7)
- {
- x->subpixel_predict8x4(ptr, d0->pre_stride,
- d0->bmi.mv.as_mv.col & 7,
- d0->bmi.mv.as_mv.row & 7,
- dst_ptr, x->dst.uv_stride);
- }
- else
- {
- RECON_INVOKE(&x->rtcd->recon, copy8x4)(ptr,
- d0->pre_stride, dst_ptr, x->dst.uv_stride);
- }
- }
- else
- {
- vp8_build_inter_predictors_b_s(d0, dst_ptr, x->subpixel_predict);
- vp8_build_inter_predictors_b_s(d1, dst_ptr, x->subpixel_predict);
- }
- }
- }
-}
diff --git a/vp8/common/reconinter.h b/vp8/common/reconinter.h
index 688bebe96..bdf49c9d0 100644
--- a/vp8/common/reconinter.h
+++ b/vp8/common/reconinter.h
@@ -13,7 +13,6 @@
#define __INC_RECONINTER_H
extern void vp8_build_inter_predictors_mb(MACROBLOCKD *x);
-extern void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x);
extern void vp8_build_inter16x16_predictors_mb_s(MACROBLOCKD *x);
extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x);
diff --git a/vp8/common/reconintra.h b/vp8/common/reconintra.h
index 4025a5307..47e479285 100644
--- a/vp8/common/reconintra.h
+++ b/vp8/common/reconintra.h
@@ -14,9 +14,4 @@
extern void init_intra_left_above_pixels(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mbuv(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x);
-
-extern void vp8_predict_intra4x4(BLOCKD *x, int b_mode, unsigned char *Predictor);
-
#endif
diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c
index 8ddae0059..12e2e60c7 100644
--- a/vp8/common/reconintra4x4.c
+++ b/vp8/common/reconintra4x4.c
@@ -14,7 +14,7 @@
#include "vpx_mem/vpx_mem.h"
#include "reconintra.h"
-void vp8_predict_intra4x4(BLOCKD *x,
+void vp8_intra4x4_predict(BLOCKD *x,
int b_mode,
unsigned char *predictor)
{
diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm
index 67b6420a9..a1cc2b1fe 100644
--- a/vp8/common/x86/recon_sse2.asm
+++ b/vp8/common/x86/recon_sse2.asm
@@ -229,3 +229,411 @@ sym(vp8_copy_mem16x16_sse2):
UNSHADOW_ARGS
pop rbp
ret
+
+
+;void vp8_intra_pred_uv_dc_mmx2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp8_intra_pred_uv_dc_mmx2)
+sym(vp8_intra_pred_uv_dc_mmx2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; from top
+ mov rsi, arg(2) ;src;
+ movsxd rax, dword ptr arg(3) ;src_stride;
+ sub rsi, rax
+ pxor mm0, mm0
+ movd mm1, [rsi]
+ movd mm2, [rsi+4]
+ punpcklbw mm1, mm0
+ punpcklbw mm2, mm0
+ paddw mm1, mm2
+ pshufw mm2, mm1, 0x0e
+ paddw mm1, mm2
+ pshufw mm2, mm1, 0x01
+ paddw mm1, mm2
+
+ ; from left
+ dec rsi
+ lea rdi, [rax*3]
+ movzx ecx, byte [rsi+rax]
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ add ecx, edx
+ lea rsi, [rsi+rax*4]
+ movzx edx, byte [rsi]
+ add ecx, edx
+ movzx edx, byte [rsi+rax]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*4]
+ add ecx, edx
+
+ ; add up
+ pextrw edx, mm1, 0x0
+ lea edx, [edx+ecx+8]
+ sar edx, 4
+ movd mm1, edx
+ pshufw mm1, mm1, 0x0
+ packuswb mm1, mm1
+
+ ; write out
+ mov rdi, arg(0) ;dst;
+ movsxd rcx, dword ptr arg(1) ;dst_stride
+ lea rax, [rcx*3]
+
+ movq [rdi ], mm1
+ movq [rdi+rcx ], mm1
+ movq [rdi+rcx*2], mm1
+ movq [rdi+rax ], mm1
+ lea rdi, [rdi+rcx*4]
+ movq [rdi ], mm1
+ movq [rdi+rcx ], mm1
+ movq [rdi+rcx*2], mm1
+ movq [rdi+rax ], mm1
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp8_intra_pred_uv_dctop_mmx2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp8_intra_pred_uv_dctop_mmx2)
+sym(vp8_intra_pred_uv_dctop_mmx2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; from top
+ mov rsi, arg(2) ;src;
+ movsxd rax, dword ptr arg(3) ;src_stride;
+ sub rsi, rax
+ pxor mm0, mm0
+ movd mm1, [rsi]
+ movd mm2, [rsi+4]
+ punpcklbw mm1, mm0
+ punpcklbw mm2, mm0
+ paddw mm1, mm2
+ pshufw mm2, mm1, 0x0e
+ paddw mm1, mm2
+ pshufw mm2, mm1, 0x01
+ paddw mm1, mm2
+
+ ; add up
+ paddw mm1, [GLOBAL(dc_4)]
+ psraw mm1, 3
+ pshufw mm1, mm1, 0x0
+ packuswb mm1, mm1
+
+ ; write out
+ mov rdi, arg(0) ;dst;
+ movsxd rcx, dword ptr arg(1) ;dst_stride
+ lea rax, [rcx*3]
+
+ movq [rdi ], mm1
+ movq [rdi+rcx ], mm1
+ movq [rdi+rcx*2], mm1
+ movq [rdi+rax ], mm1
+ lea rdi, [rdi+rcx*4]
+ movq [rdi ], mm1
+ movq [rdi+rcx ], mm1
+ movq [rdi+rcx*2], mm1
+ movq [rdi+rax ], mm1
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp8_intra_pred_uv_dcleft_mmx2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp8_intra_pred_uv_dcleft_mmx2)
+sym(vp8_intra_pred_uv_dcleft_mmx2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; from left
+ mov rsi, arg(2) ;src;
+ movsxd rax, dword ptr arg(3) ;src_stride;
+ dec rsi
+ lea rdi, [rax*3]
+ movzx ecx, byte [rsi]
+ movzx edx, byte [rsi+rax]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ add ecx, edx
+ lea rsi, [rsi+rax*4]
+ movzx edx, byte [rsi]
+ add ecx, edx
+ movzx edx, byte [rsi+rax]
+ add ecx, edx
+ movzx edx, byte [rsi+rax*2]
+ add ecx, edx
+ movzx edx, byte [rsi+rdi]
+ lea edx, [ecx+edx+4]
+
+ ; add up
+ shr edx, 3
+ movd mm1, edx
+ pshufw mm1, mm1, 0x0
+ packuswb mm1, mm1
+
+ ; write out
+ mov rdi, arg(0) ;dst;
+ movsxd rcx, dword ptr arg(1) ;dst_stride
+ lea rax, [rcx*3]
+
+ movq [rdi ], mm1
+ movq [rdi+rcx ], mm1
+ movq [rdi+rcx*2], mm1
+ movq [rdi+rax ], mm1
+ lea rdi, [rdi+rcx*4]
+ movq [rdi ], mm1
+ movq [rdi+rcx ], mm1
+ movq [rdi+rcx*2], mm1
+ movq [rdi+rax ], mm1
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp8_intra_pred_uv_dc128_mmx(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp8_intra_pred_uv_dc128_mmx)
+sym(vp8_intra_pred_uv_dc128_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ ; end prolog
+
+ ; write out
+ movq mm1, [GLOBAL(dc_128)]
+ mov rax, arg(0) ;dst;
+ movsxd rdx, dword ptr arg(1) ;dst_stride
+ lea rcx, [rdx*3]
+
+ movq [rax ], mm1
+ movq [rax+rdx ], mm1
+ movq [rax+rdx*2], mm1
+ movq [rax+rcx ], mm1
+ lea rax, [rax+rdx*4]
+ movq [rax ], mm1
+ movq [rax+rdx ], mm1
+ movq [rax+rdx*2], mm1
+ movq [rax+rcx ], mm1
+
+ ; begin epilog
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp8_intra_pred_uv_tm_sse2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+%macro vp8_intra_pred_uv_tm 1
+global sym(vp8_intra_pred_uv_tm_%1)
+sym(vp8_intra_pred_uv_tm_%1):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; read top row
+ mov edx, 4
+ mov rsi, arg(2) ;src;
+ movsxd rax, dword ptr arg(3) ;src_stride;
+ sub rsi, rax
+ pxor xmm0, xmm0
+%ifidn %1, ssse3
+ movdqa xmm2, [GLOBAL(dc_1024)]
+%endif
+ movq xmm1, [rsi]
+ punpcklbw xmm1, xmm0
+
+ ; set up left ptrs ans subtract topleft
+ movd xmm3, [rsi-1]
+ lea rsi, [rsi+rax-1]
+%ifidn %1, sse2
+ punpcklbw xmm3, xmm0
+ pshuflw xmm3, xmm3, 0x0
+ punpcklqdq xmm3, xmm3
+%else
+ pshufb xmm3, xmm2
+%endif
+ psubw xmm1, xmm3
+
+ ; set up dest ptrs
+ mov rdi, arg(0) ;dst;
+ movsxd rcx, dword ptr arg(1) ;dst_stride
+
+vp8_intra_pred_uv_tm_%1_loop:
+ movd xmm3, [rsi]
+ movd xmm5, [rsi+rax]
+%ifidn %1, sse2
+ punpcklbw xmm3, xmm0
+ punpcklbw xmm5, xmm0
+ pshuflw xmm3, xmm3, 0x0
+ pshuflw xmm5, xmm5, 0x0
+ punpcklqdq xmm3, xmm3
+ punpcklqdq xmm5, xmm5
+%else
+ pshufb xmm3, xmm2
+ pshufb xmm5, xmm2
+%endif
+ paddw xmm3, xmm1
+ paddw xmm5, xmm1
+ packuswb xmm3, xmm5
+ movq [rdi ], xmm3
+ movhps[rdi+rcx], xmm3
+ lea rsi, [rsi+rax*2]
+ lea rdi, [rdi+rcx*2]
+ dec edx
+ jnz vp8_intra_pred_uv_tm_%1_loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+%endmacro
+
+vp8_intra_pred_uv_tm sse2
+vp8_intra_pred_uv_tm ssse3
+
+;void vp8_intra_pred_uv_ve_mmx(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp8_intra_pred_uv_ve_mmx)
+sym(vp8_intra_pred_uv_ve_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ ; end prolog
+
+ ; read from top
+ mov rax, arg(2) ;src;
+ movsxd rdx, dword ptr arg(3) ;src_stride;
+ sub rax, rdx
+ movq mm1, [rax]
+
+ ; write out
+ mov rax, arg(0) ;dst;
+ movsxd rdx, dword ptr arg(1) ;dst_stride
+ lea rcx, [rdx*3]
+
+ movq [rax ], mm1
+ movq [rax+rdx ], mm1
+ movq [rax+rdx*2], mm1
+ movq [rax+rcx ], mm1
+ lea rax, [rax+rdx*4]
+ movq [rax ], mm1
+ movq [rax+rdx ], mm1
+ movq [rax+rdx*2], mm1
+ movq [rax+rcx ], mm1
+
+ ; begin epilog
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp8_intra_pred_uv_ho_mmx2(
+; unsigned char *dst,
+; int dst_stride
+; unsigned char *src,
+; int src_stride,
+; )
+global sym(vp8_intra_pred_uv_ho_mmx2)
+sym(vp8_intra_pred_uv_ho_mmx2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; read from left and write out
+ mov edx, 4
+ mov rsi, arg(2) ;src;
+ movsxd rax, dword ptr arg(3) ;src_stride;
+ mov rdi, arg(0) ;dst;
+ movsxd rcx, dword ptr arg(1) ;dst_stride
+ dec rsi
+vp8_intra_pred_uv_ho_mmx2_loop:
+ movd mm0, [rsi]
+ movd mm1, [rsi+rax]
+ punpcklbw mm0, mm0
+ punpcklbw mm1, mm1
+ pshufw mm0, mm0, 0x0
+ pshufw mm1, mm1, 0x0
+ movq [rdi ], mm0
+ movq [rdi+rcx], mm1
+ lea rsi, [rsi+rax*2]
+ lea rdi, [rdi+rcx*2]
+ dec edx
+ jnz vp8_intra_pred_uv_ho_mmx2_loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+dc_128:
+ times 8 db 128
+dc_4:
+ times 4 dw 4
+align 16
+dc_1024:
+ times 8 dw 0x400
diff --git a/vp8/common/x86/recon_wrapper_sse2.c b/vp8/common/x86/recon_wrapper_sse2.c
new file mode 100644
index 000000000..7b17851b5
--- /dev/null
+++ b/vp8/common/x86/recon_wrapper_sse2.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "vp8/common/recon.h"
+#include "recon_x86.h"
+#include "vpx_mem/vpx_mem.h"
+
+#define build_intra_predictors_mbuv_prototype(sym) \
+ void sym(unsigned char *dst, int dst_stride, \
+ const unsigned char *src, int src_stride)
+typedef build_intra_predictors_mbuv_prototype((*build_intra_predictors_mbuv_fn_t));
+
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dc_mmx2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dctop_mmx2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dcleft_mmx2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dc128_mmx);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ho_mmx2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ve_mmx);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_tm_sse2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_tm_ssse3);
+
+static inline void vp8_build_intra_predictors_mbuv_x86(MACROBLOCKD *x,
+ unsigned char *dst_u,
+ unsigned char *dst_v,
+ int dst_stride,
+ build_intra_predictors_mbuv_fn_t tm_func)
+{
+ int mode = x->mode_info_context->mbmi.uv_mode;
+ build_intra_predictors_mbuv_fn_t fn;
+ int src_stride = x->dst.uv_stride;
+
+ switch (mode) {
+ case V_PRED: fn = vp8_intra_pred_uv_ve_mmx; break;
+ case H_PRED: fn = vp8_intra_pred_uv_ho_mmx2; break;
+ case TM_PRED: fn = tm_func; break;
+ case DC_PRED:
+ if (x->up_available) {
+ if (x->left_available) {
+ fn = vp8_intra_pred_uv_dc_mmx2; break;
+ } else {
+ fn = vp8_intra_pred_uv_dctop_mmx2; break;
+ }
+ } else if (x->left_available) {
+ fn = vp8_intra_pred_uv_dcleft_mmx2; break;
+ } else {
+ fn = vp8_intra_pred_uv_dc128_mmx; break;
+ }
+ break;
+ default: return;
+ }
+
+ fn(dst_u, dst_stride, x->dst.u_buffer, src_stride);
+ fn(dst_v, dst_stride, x->dst.v_buffer, src_stride);
+}
+
+void vp8_build_intra_predictors_mbuv_sse2(MACROBLOCKD *x)
+{
+ vp8_build_intra_predictors_mbuv_x86(x, &x->predictor[256],
+ &x->predictor[320], 8,
+ vp8_intra_pred_uv_tm_sse2);
+}
+
+void vp8_build_intra_predictors_mbuv_ssse3(MACROBLOCKD *x)
+{
+ vp8_build_intra_predictors_mbuv_x86(x, &x->predictor[256],
+ &x->predictor[320], 8,
+ vp8_intra_pred_uv_tm_ssse3);
+}
+
+void vp8_build_intra_predictors_mbuv_s_sse2(MACROBLOCKD *x)
+{
+ vp8_build_intra_predictors_mbuv_x86(x, x->dst.u_buffer,
+ x->dst.v_buffer, x->dst.uv_stride,
+ vp8_intra_pred_uv_tm_sse2);
+}
+
+void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x)
+{
+ vp8_build_intra_predictors_mbuv_x86(x, x->dst.u_buffer,
+ x->dst.v_buffer, x->dst.uv_stride,
+ vp8_intra_pred_uv_tm_ssse3);
+}
diff --git a/vp8/common/x86/recon_x86.h b/vp8/common/x86/recon_x86.h
index 40ee65a12..fe0f8f0bc 100644
--- a/vp8/common/x86/recon_x86.h
+++ b/vp8/common/x86/recon_x86.h
@@ -46,6 +46,8 @@ extern prototype_copy_block(vp8_copy_mem16x16_mmx);
extern prototype_recon_block(vp8_recon2b_sse2);
extern prototype_recon_block(vp8_recon4b_sse2);
extern prototype_copy_block(vp8_copy_mem16x16_sse2);
+extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_sse2);
+extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_sse2);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_recon_recon2
@@ -57,6 +59,26 @@ extern prototype_copy_block(vp8_copy_mem16x16_sse2);
#undef vp8_recon_copy16x16
#define vp8_recon_copy16x16 vp8_copy_mem16x16_sse2
+#undef vp8_recon_build_intra_predictors_mbuv
+#define vp8_recon_build_intra_predictors_mbuv vp8_build_intra_predictors_mbuv_sse2
+
+#undef vp8_recon_build_intra_predictors_mbuv_s
+#define vp8_recon_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_sse2
+
+#endif
+#endif
+
+#if HAVE_SSSE3
+extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_ssse3);
+extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_ssse3);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef vp8_recon_build_intra_predictors_mbuv
+#define vp8_recon_build_intra_predictors_mbuv vp8_build_intra_predictors_mbuv_ssse3
+
+#undef vp8_recon_build_intra_predictors_mbuv_s
+#define vp8_recon_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_ssse3
+
#endif
#endif
#endif
diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index e89c07a4f..17667330a 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@@ -88,6 +88,10 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
rtcd->recon.recon2 = vp8_recon2b_sse2;
rtcd->recon.recon4 = vp8_recon4b_sse2;
rtcd->recon.copy16x16 = vp8_copy_mem16x16_sse2;
+ rtcd->recon.build_intra_predictors_mbuv =
+ vp8_build_intra_predictors_mbuv_sse2;
+ rtcd->recon.build_intra_predictors_mbuv_s =
+ vp8_build_intra_predictors_mbuv_s_sse2;
rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_sse2;
@@ -126,6 +130,11 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
rtcd->subpix.sixtap4x4 = vp8_sixtap_predict4x4_ssse3;
rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_ssse3;
rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_ssse3;
+
+ rtcd->recon.build_intra_predictors_mbuv =
+ vp8_build_intra_predictors_mbuv_ssse3;
+ rtcd->recon.build_intra_predictors_mbuv_s =
+ vp8_build_intra_predictors_mbuv_s_ssse3;
}
#endif
diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h
index a83e3f012..5f6b211ea 100644
--- a/vp8/decoder/dboolhuff.h
+++ b/vp8/decoder/dboolhuff.h
@@ -51,19 +51,26 @@ void vp8dx_bool_decoder_fill(BOOL_DECODER *br);
#define VP8DX_BOOL_DECODER_FILL(_count,_value,_bufptr,_bufend) \
do \
{ \
- int shift; \
- for(shift = VP8_BD_VALUE_SIZE - 8 - ((_count) + 8); shift >= 0; ) \
+ int shift = VP8_BD_VALUE_SIZE - 8 - ((_count) + 8); \
+ int loop_end, x; \
+ size_t bits_left = ((_bufend)-(_bufptr))*CHAR_BIT; \
+ \
+ x = shift + CHAR_BIT - bits_left; \
+ loop_end = 0; \
+ if(x >= 0) \
{ \
- if((_bufptr) >= (_bufend)) { \
- (_count) = VP8_LOTS_OF_BITS; \
- break; \
- } \
- (_count) += 8; \
+ (_count) += VP8_LOTS_OF_BITS; \
+ loop_end = x; \
+ if(!bits_left) break; \
+ } \
+ while(shift >= loop_end) \
+ { \
+ (_count) += CHAR_BIT; \
(_value) |= (VP8_BD_VALUE)*(_bufptr)++ << shift; \
- shift -= 8; \
+ shift -= CHAR_BIT; \
} \
} \
- while(0)
+ while(0) \
static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
@@ -119,18 +126,19 @@ static int vp8_decode_value(BOOL_DECODER *br, int bits)
static int vp8dx_bool_error(BOOL_DECODER *br)
{
- /* Check if we have reached the end of the buffer.
- *
- * Variable 'count' stores the number of bits in the 'value' buffer,
- * minus 8. So if count == 8, there are 16 bits available to be read.
- * Normally, count is filled with 8 and one byte is filled into the
- * value buffer. When we reach the end of the buffer, count is instead
- * filled with VP8_LOTS_OF_BITS, 8 of which represent the last 8 real
- * bits from the bitstream. So the last bit in the bitstream will be
- * represented by count == VP8_LOTS_OF_BITS - 16.
- */
- if ((br->count > VP8_BD_VALUE_SIZE)
- && (br->count <= VP8_LOTS_OF_BITS - 16))
+ /* Check if we have reached the end of the buffer.
+ *
+ * Variable 'count' stores the number of bits in the 'value' buffer, minus
+ * 8. The top byte is part of the algorithm, and the remainder is buffered
+ * to be shifted into it. So if count == 8, the top 16 bits of 'value' are
+ * occupied, 8 for the algorithm and 8 in the buffer.
+ *
+ * When reading a byte from the user's buffer, count is filled with 8 and
+ * one byte is filled into the value buffer. When we reach the end of the
+ * data, count is additionally filled with VP8_LOTS_OF_BITS. So when
+ * count == VP8_LOTS_OF_BITS - 1, the user's data has been exhausted.
+ */
+ if ((br->count > VP8_BD_VALUE_SIZE) && (br->count < VP8_LOTS_OF_BITS))
{
/* We have tried to decode bits after the end of
* stream was encountered.
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 8cf4e4711..782dfefa1 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -113,7 +113,7 @@ static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd)
{
if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
{
- vp8_build_intra_predictors_mbuv_s(xd);
+ RECON_INVOKE(&pbi->common.rtcd.recon, build_intra_predictors_mbuv_s)(xd);
RECON_INVOKE(&pbi->common.rtcd.recon,
build_intra_predictors_mby_s)(xd);
}
@@ -213,7 +213,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
/* do prediction */
if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
{
- vp8_build_intra_predictors_mbuv(xd);
+ RECON_INVOKE(&pbi->common.rtcd.recon, build_intra_predictors_mbuv)(xd);
if (xd->mode_info_context->mbmi.mode != B_PRED)
{
@@ -264,7 +264,8 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
{
BLOCKD *b = &xd->block[i];
- vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+ RECON_INVOKE(RTCD_VTABLE(recon), intra4x4_predict)
+ (b, b->bmi.mode, b->predictor);
if (xd->eobs[i] > 1)
{
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 98bd983a7..9fc8c1790 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -1529,7 +1529,7 @@ int vp8cx_encode_inter_macroblock
}
else
- vp8_build_inter_predictors_mb_s(xd);
+ vp8_build_inter16x16_predictors_mb_s(xd);
}
if (!x->skip)
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 7d835a1cc..9517a1d89 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -32,7 +32,8 @@
#endif
void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x, BLOCK *be, BLOCKD *b, int best_mode)
{
- vp8_predict_intra4x4(b, best_mode, b->predictor);
+ RECON_INVOKE(&rtcd->common->recon, intra4x4_predict)
+ (b, best_mode, b->predictor);
ENCODEMB_INVOKE(&rtcd->encodemb, subb)(be, b, 16);
@@ -113,7 +114,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
{
- vp8_build_intra_predictors_mbuv(&x->e_mbd);
+ RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mbuv)(&x->e_mbd);
ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 8d1429e0b..4a5a3c690 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -56,7 +56,6 @@ extern void vp8_loop_filter_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt
extern void vp8_loop_filter_frame_yonly(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val, int sharpness_lvl);
extern void vp8_dmachine_specific_config(VP8_COMP *cpi);
extern void vp8_cmachine_specific_config(VP8_COMP *cpi);
-extern void vp8_calc_auto_iframe_target_size(VP8_COMP *cpi);
extern void vp8_deblock_frame(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *post, int filt_lvl, int low_var_thresh, int flag);
extern void print_parms(VP8_CONFIG *ocf, char *filenam);
extern unsigned int vp8_get_processor_freq();
@@ -1508,10 +1507,7 @@ static void init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
cpi->auto_gold = 1;
cpi->auto_adjust_gold_quantizer = 1;
- cpi->goldquantizer = 1;
cpi->goldfreq = 7;
- cpi->auto_adjust_key_quantizer = 1;
- cpi->keyquantizer = 1;
cm->version = oxcf->Version;
vp8_setup_version(cm);
@@ -2711,78 +2707,7 @@ static void resize_key_frame(VP8_COMP *cpi)
#endif
}
-// return of 0 means drop frame
-static int pick_frame_size(VP8_COMP *cpi)
-{
- VP8_COMMON *cm = &cpi->common;
-
- // First Frame is a special case
- if (cm->current_video_frame == 0)
- {
-#if !(CONFIG_REALTIME_ONLY)
-
- if (cpi->pass == 2)
- vp8_calc_auto_iframe_target_size(cpi);
-
- else
-#endif
- {
- /* 1 Pass there is no information on which to base size so use
- * bandwidth per second * fraction of the initial buffer
- * level
- */
- cpi->this_frame_target = cpi->oxcf.starting_buffer_level / 2;
-
- if(cpi->this_frame_target > cpi->oxcf.target_bandwidth * 3 / 2)
- cpi->this_frame_target = cpi->oxcf.target_bandwidth * 3 / 2;
- }
-
- // Key frame from VFW/auto-keyframe/first frame
- cm->frame_type = KEY_FRAME;
-
- }
- // Special case for forced key frames
- // The frame sizing here is still far from ideal for 2 pass.
- else if (cm->frame_flags & FRAMEFLAGS_KEY)
- {
- cm->frame_type = KEY_FRAME;
- resize_key_frame(cpi);
- vp8_calc_iframe_target_size(cpi);
- }
- else if (cm->frame_type == KEY_FRAME)
- {
- vp8_calc_auto_iframe_target_size(cpi);
- }
- else
- {
- // INTER frame: compute target frame size
- cm->frame_type = INTER_FRAME;
- vp8_calc_pframe_target_size(cpi);
- // Check if we're dropping the frame:
- if (cpi->drop_frame)
- {
- cpi->drop_frame = FALSE;
- cpi->drop_count++;
- return 0;
- }
- }
-
- /* Apply limits on keyframe target.
- *
- * TODO: move this after consolidating
- * vp8_calc_iframe_target_size() and vp8_calc_auto_iframe_target_size()
- */
- if (cm->frame_type == KEY_FRAME && cpi->oxcf.rc_max_intra_bitrate_pct)
- {
- unsigned int max_rate = cpi->av_per_frame_bandwidth
- * cpi->oxcf.rc_max_intra_bitrate_pct / 100;
-
- if (cpi->this_frame_target > max_rate)
- cpi->this_frame_target = max_rate;
- }
- return 1;
-}
static void set_quantizer(VP8_COMP *cpi, int Q)
{
@@ -3581,7 +3506,7 @@ static void encode_frame_to_data_rate
}
// Decide how big to make the frame
- if (!pick_frame_size(cpi))
+ if (!vp8_pick_frame_size(cpi))
{
cm->current_video_frame++;
cpi->frames_since_key++;
@@ -3909,7 +3834,10 @@ static void encode_frame_to_data_rate
}
if (cm->frame_type == KEY_FRAME)
+ {
+ resize_key_frame(cpi);
vp8_setup_key_frame(cpi);
+ }
// transform / motion compensation build reconstruction frame
vp8_encode_frame(cpi);
@@ -3944,11 +3872,11 @@ static void encode_frame_to_data_rate
#else
if (decide_key_frame(cpi))
{
- vp8_calc_auto_iframe_target_size(cpi);
-
// Reset all our sizing numbers and recode
cm->frame_type = KEY_FRAME;
+ vp8_pick_frame_size(cpi);
+
// Clear the Alt reference frame active flag when we have a key frame
cpi->source_alt_ref_active = FALSE;
@@ -3977,7 +3905,6 @@ static void encode_frame_to_data_rate
loop_count++;
Loop = TRUE;
- resize_key_frame(cpi);
continue;
}
#endif
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 57cf4c000..982b24aae 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -505,10 +505,7 @@ typedef struct
int interquantizer;
int auto_gold;
int auto_adjust_gold_quantizer;
- int goldquantizer;
int goldfreq;
- int auto_adjust_key_quantizer;
- int keyquantizer;
int auto_worst_q;
int cpu_used;
int chroma_boost;
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 765ff26f6..ea4f01fad 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -194,7 +194,8 @@ static int pick_intra4x4block(
int this_rd;
rate = mode_costs[mode];
- vp8_predict_intra4x4(b, mode, b->predictor);
+ RECON_INVOKE(&rtcd->common->recon, intra4x4_predict)
+ (b, mode, b->predictor);
distortion = get_prediction_error(be, b, &rtcd->variance);
this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index fa9f04b15..9d778962a 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -374,62 +374,96 @@ void vp8_setup_key_frame(VP8_COMP *cpi)
cpi->common.refresh_alt_ref_frame = TRUE;
}
-void vp8_calc_auto_iframe_target_size(VP8_COMP *cpi)
+
+static int estimate_bits_at_q(int frame_kind, int Q, int MBs,
+ double correction_factor)
+{
+ int Bpm = (int)(.5 + correction_factor * vp8_bits_per_mb[frame_kind][Q]);
+
+ /* Attempt to retain reasonable accuracy without overflow. The cutoff is
+ * chosen such that the maximum product of Bpm and MBs fits 31 bits. The
+ * largest Bpm takes 20 bits.
+ */
+ if (MBs > (1 << 11))
+ return (Bpm >> BPER_MB_NORMBITS) * MBs;
+ else
+ return (Bpm * MBs) >> BPER_MB_NORMBITS;
+}
+
+
+static void calc_iframe_target_size(VP8_COMP *cpi)
{
// boost defaults to half second
int kf_boost;
+ int target;
// Clear down mmx registers to allow floating point in what follows
vp8_clear_system_state(); //__asm emms;
if (cpi->oxcf.fixed_q >= 0)
{
- vp8_calc_iframe_target_size(cpi);
- return;
- }
+ int Q = cpi->oxcf.key_q;
- if (cpi->pass == 2)
+ target = estimate_bits_at_q(INTRA_FRAME, Q, cpi->common.MBs,
+ cpi->key_frame_rate_correction_factor);
+ }
+ else if (cpi->pass == 2)
{
- cpi->this_frame_target = cpi->per_frame_bandwidth; // New Two pass RC
+ // New Two pass RC
+ target = cpi->per_frame_bandwidth;
+ }
+ // First Frame is a special case
+ else if (cpi->common.current_video_frame == 0)
+ {
+ /* 1 Pass there is no information on which to base size so use
+ * bandwidth per second * fraction of the initial buffer
+ * level
+ */
+ target = cpi->oxcf.starting_buffer_level / 2;
+
+ if(target > cpi->oxcf.target_bandwidth * 3 / 2)
+ target = cpi->oxcf.target_bandwidth * 3 / 2;
}
else
{
+ // if this keyframe was forced, use a more recent Q estimate
+ int Q = (cpi->common.frame_flags & FRAMEFLAGS_KEY)
+ ? cpi->avg_frame_qindex : cpi->ni_av_qi;
+
// Boost depends somewhat on frame rate
kf_boost = (int)(2 * cpi->output_frame_rate - 16);
// adjustment up based on q
- kf_boost = kf_boost * kf_boost_qadjustment[cpi->ni_av_qi] / 100;
+ kf_boost = kf_boost * kf_boost_qadjustment[Q] / 100;
// frame separation adjustment ( down)
if (cpi->frames_since_key < cpi->output_frame_rate / 2)
- kf_boost = (int)(kf_boost * cpi->frames_since_key / (cpi->output_frame_rate / 2));
+ kf_boost = (int)(kf_boost
+ * cpi->frames_since_key / (cpi->output_frame_rate / 2));
if (kf_boost < 16)
kf_boost = 16;
- // Reset the active worst quality to the baseline value for key frames.
- cpi->active_worst_quality = cpi->worst_quality;
-
- cpi->this_frame_target = ((16 + kf_boost) * cpi->per_frame_bandwidth) >> 4;
+ target = ((16 + kf_boost) * cpi->per_frame_bandwidth) >> 4;
}
- // Should the next frame be an altref frame
- if (cpi->pass != 2)
+ if (cpi->oxcf.rc_max_intra_bitrate_pct)
{
- // For now Alt ref is not allowed except in 2 pass modes.
- cpi->source_alt_ref_pending = FALSE;
+ unsigned int max_rate = cpi->per_frame_bandwidth
+ * cpi->oxcf.rc_max_intra_bitrate_pct / 100;
- /*if ( cpi->oxcf.fixed_q == -1)
- {
- if ( cpi->oxcf.play_alternate && ( (cpi->last_boost/2) > (100+(AF_THRESH*cpi->frames_till_gf_update_due)) ) )
- cpi->source_alt_ref_pending = TRUE;
- else
- cpi->source_alt_ref_pending = FALSE;
- }*/
+ if (target > max_rate)
+ target = max_rate;
}
- if (0)
+ cpi->this_frame_target = target;
+
+ // TODO: if we separate rate targeting from Q targetting, move this.
+ // Reset the active worst quality to the baseline value for key frames.
+ cpi->active_worst_quality = cpi->worst_quality;
+
+#if 0
{
FILE *f;
@@ -442,8 +476,10 @@ void vp8_calc_auto_iframe_target_size(VP8_COMP *cpi)
fclose(f);
}
+#endif
}
+
// Do the best we can to define the parameteres for the next GF based on what information we have available.
static void calc_gf_params(VP8_COMP *cpi)
{
@@ -609,100 +645,9 @@ static void calc_gf_params(VP8_COMP *cpi)
}*/
}
}
-/* This is equvialent to estimate_bits_at_q without the rate_correction_factor. */
-static int baseline_bits_at_q(int frame_kind, int Q, int MBs)
-{
- int Bpm = vp8_bits_per_mb[frame_kind][Q];
-
- /* Attempt to retain reasonable accuracy without overflow. The cutoff is
- * chosen such that the maximum product of Bpm and MBs fits 31 bits. The
- * largest Bpm takes 20 bits.
- */
- if (MBs > (1 << 11))
- return (Bpm >> BPER_MB_NORMBITS) * MBs;
- else
- return (Bpm * MBs) >> BPER_MB_NORMBITS;
-}
-
-void vp8_calc_iframe_target_size(VP8_COMP *cpi)
-{
- int Q;
- int Boost = 100;
-
- Q = (cpi->oxcf.fixed_q >= 0) ? cpi->oxcf.fixed_q : cpi->avg_frame_qindex;
-
- if (cpi->auto_adjust_key_quantizer == 1)
- {
- // If (auto_adjust_key_quantizer==1) then a lower Q is selected for key-frames.
- // The enhanced Q is calculated so as to boost the key frame size by a factor
- // specified in kf_boost_qadjustment. Also, can adjust based on distance
- // between key frames.
-
- // Adjust boost based upon ambient Q
- Boost = kf_boost_qadjustment[Q];
-
- // Make the Key frame boost less if the seperation from the previous key frame is small
- if (cpi->frames_since_key < 16)
- Boost = Boost * kf_boost_seperation_adjustment[cpi->frames_since_key] / 100;
- else
- Boost = Boost * kf_boost_seperation_adjustment[15] / 100;
-
- // Apply limits on boost
- if (Boost > kf_gf_boost_qlimits[Q])
- Boost = kf_gf_boost_qlimits[Q];
- else if (Boost < 120)
- Boost = 120;
- }
-
- // Keep a record of the boost that was used
- cpi->last_boost = Boost;
-
- // Should the next frame be an altref frame
- if (cpi->pass != 2)
- {
- // For now Alt ref is not allowed except in 2 pass modes.
- cpi->source_alt_ref_pending = FALSE;
-
- /*if ( cpi->oxcf.fixed_q == -1)
- {
- if ( cpi->oxcf.play_alternate && ( (cpi->last_boost/2) > (100+(AF_THRESH*cpi->frames_till_gf_update_due)) ) )
- cpi->source_alt_ref_pending = TRUE;
- else
- cpi->source_alt_ref_pending = FALSE;
- }*/
- }
-
- if (cpi->oxcf.fixed_q >= 0)
- {
- cpi->this_frame_target = (baseline_bits_at_q(0, Q, cpi->common.MBs) * Boost) / 100;
- }
- else
- {
-
- int bits_per_mb_at_this_q ;
-
- if (cpi->oxcf.error_resilient_mode == 1)
- {
- cpi->this_frame_target = 2 * cpi->av_per_frame_bandwidth;
- return;
- }
-
- // Rate targetted scenario:
- // Be careful of 32-bit OVERFLOW if restructuring the caluclation of cpi->this_frame_target
- bits_per_mb_at_this_q = (int)(.5 +
- cpi->key_frame_rate_correction_factor * vp8_bits_per_mb[0][Q]);
-
- cpi->this_frame_target = (((bits_per_mb_at_this_q * cpi->common.MBs) >> BPER_MB_NORMBITS) * Boost) / 100;
-
- // Reset the active worst quality to the baseline value for key frames.
- if (cpi->pass < 2)
- cpi->active_worst_quality = cpi->worst_quality;
- }
-}
-
-void vp8_calc_pframe_target_size(VP8_COMP *cpi)
+static void calc_pframe_target_size(VP8_COMP *cpi)
{
int min_frame_target;
int Adjustment;
@@ -1194,7 +1139,9 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi)
}
}
else
- cpi->this_frame_target = (baseline_bits_at_q(1, Q, cpi->common.MBs) * cpi->last_boost) / 100;
+ cpi->this_frame_target =
+ (estimate_bits_at_q(1, Q, cpi->common.MBs, 1.0)
+ * cpi->last_boost) / 100;
}
// If there is an active ARF at this location use the minimum
@@ -1316,21 +1263,6 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var)
}
}
-static int estimate_bits_at_q(VP8_COMP *cpi, int Q)
-{
- int Bpm = (int)(.5 + cpi->rate_correction_factor * vp8_bits_per_mb[INTER_FRAME][Q]);
-
- /* Attempt to retain reasonable accuracy without overflow. The cutoff is
- * chosen such that the maximum product of Bpm and MBs fits 31 bits. The
- * largest Bpm takes 20 bits.
- */
- if (cpi->common.MBs > (1 << 11))
- return (Bpm >> BPER_MB_NORMBITS) * cpi->common.MBs;
- else
- return (Bpm * cpi->common.MBs) >> BPER_MB_NORMBITS;
-
-}
-
int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame)
{
@@ -1614,3 +1546,26 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit,
}
}
}
+
+
+// return of 0 means drop frame
+int vp8_pick_frame_size(VP8_COMP *cpi)
+{
+ VP8_COMMON *cm = &cpi->common;
+
+ if (cm->frame_type == KEY_FRAME)
+ calc_iframe_target_size(cpi);
+ else
+ {
+ calc_pframe_target_size(cpi);
+
+ // Check if we're dropping the frame:
+ if (cpi->drop_frame)
+ {
+ cpi->drop_frame = FALSE;
+ cpi->drop_count++;
+ return 0;
+ }
+ }
+ return 1;
+}
diff --git a/vp8/encoder/ratectrl.h b/vp8/encoder/ratectrl.h
index 766dfdfce..d4f779677 100644
--- a/vp8/encoder/ratectrl.h
+++ b/vp8/encoder/ratectrl.h
@@ -17,11 +17,12 @@ extern void vp8_save_coding_context(VP8_COMP *cpi);
extern void vp8_restore_coding_context(VP8_COMP *cpi);
extern void vp8_setup_key_frame(VP8_COMP *cpi);
-extern void vp8_calc_iframe_target_size(VP8_COMP *cpi);
-extern void vp8_calc_pframe_target_size(VP8_COMP *cpi);
extern void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var);
extern int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame);
extern void vp8_adjust_key_frame_context(VP8_COMP *cpi);
extern void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, int *frame_over_shoot_limit);
+// return of 0 means drop frame
+extern int vp8_pick_frame_size(VP8_COMP *cpi);
+
#endif
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index dfc9bec95..020c1560a 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -681,7 +681,8 @@ static int rd_pick_intra4x4block(
rate = bmode_costs[mode];
- vp8_predict_intra4x4(b, mode, b->predictor);
+ RECON_INVOKE(&cpi->rtcd.common->recon, intra4x4_predict)
+ (b, mode, b->predictor);
ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), subb)(be, b, 16);
x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
x->quantize_b(be, b);
@@ -870,7 +871,8 @@ void vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *r
int this_rd;
x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
- vp8_build_intra_predictors_mbuv(&x->e_mbd);
+ RECON_INVOKE(&cpi->rtcd.common->recon, build_intra_predictors_mbuv)
+ (&x->e_mbd);
ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), submbuv)(x->src_diff,
x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor,
x->src.uv_stride);
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index 54a7eacab..f06ad42f9 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -89,6 +89,7 @@ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/subpixel_mmx.asm
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/loopfilter_mmx.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idctllm_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_sse2.asm
+VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_wrapper_sse2.c
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/subpixel_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/iwalsh_sse2.asm
diff --git a/vpxdec.c b/vpxdec.c
index ca9af1ec1..3869e20ad 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -725,6 +725,7 @@ int main(int argc, const char **argv_)
int vp8_dbg_display_mv = 0;
#endif
struct input_ctx input = {0};
+ int frames_corrupted = 0;
/* Parse command line */
exec_name = argv_[0];
@@ -1018,6 +1019,7 @@ int main(int argc, const char **argv_)
vpx_codec_iter_t iter = NULL;
vpx_image_t *img;
struct vpx_usec_timer timer;
+ int corrupted;
vpx_usec_timer_start(&timer);
@@ -1037,6 +1039,14 @@ int main(int argc, const char **argv_)
++frame_in;
+ if (vpx_codec_control(&decoder, VP8D_GET_FRAME_CORRUPTED, &corrupted))
+ {
+ fprintf(stderr, "Failed VP8_GET_FRAME_CORRUPTED: %s\n",
+ vpx_codec_error(&decoder));
+ goto fail;
+ }
+ frames_corrupted += corrupted;
+
if ((img = vpx_codec_get_frame(&decoder, &iter)))
++frame_out;
@@ -1102,6 +1112,9 @@ int main(int argc, const char **argv_)
fprintf(stderr, "\n");
}
+ if (frames_corrupted)
+ fprintf(stderr, "WARNING: %d frames corrupted.\n",frames_corrupted);
+
fail:
if (vpx_codec_destroy(&decoder))
@@ -1120,5 +1133,5 @@ fail:
fclose(infile);
free(argv);
- return EXIT_SUCCESS;
+ return frames_corrupted ? EXIT_FAILURE : EXIT_SUCCESS;
}
diff --git a/vpxenc.c b/vpxenc.c
index 14775031b..bdecaef62 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -918,7 +918,7 @@ static const arg_def_t *main_args[] =
&debugmode,
&outputfile, &codecarg, &passes, &pass_arg, &fpf_name, &limit, &deadline,
&best_dl, &good_dl, &rt_dl,
- &verbosearg, &psnrarg, &use_ivf, &framerate,
+ &verbosearg, &psnrarg, &use_ivf,
NULL
};