summaryrefslogtreecommitdiff
path: root/vp8
diff options
context:
space:
mode:
Diffstat (limited to 'vp8')
-rw-r--r--vp8/common/blockd.h2
-rw-r--r--vp8/common/invtrans.c5
-rw-r--r--vp8/common/mbpitch.c1
-rw-r--r--vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm5
-rw-r--r--vp8/encoder/arm/neon/fastquantizeb_neon.asm16
-rw-r--r--vp8/encoder/arm/quantize_arm.c2
-rw-r--r--vp8/encoder/encodeintra.c3
-rw-r--r--vp8/encoder/encodemb.c54
-rw-r--r--vp8/encoder/quantize.c11
-rw-r--r--vp8/encoder/rdopt.c10
-rw-r--r--vp8/encoder/tokenize.c19
-rw-r--r--vp8/encoder/x86/quantize_sse2.asm10
-rw-r--r--vp8/encoder/x86/quantize_sse4.asm4
-rw-r--r--vp8/encoder/x86/quantize_ssse3.asm4
-rw-r--r--vp8/encoder/x86/x86_csystemdependent.c22
-rw-r--r--vp8/vp8_cx_iface.c2
16 files changed, 110 insertions, 60 deletions
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 5012edd61..a90c1c0b6 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -187,7 +187,7 @@ typedef struct
int dst;
int dst_stride;
- int eob;
+ char *eob;
union b_mode_info bmi;
} BLOCKD;
diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c
index 7712b59b7..090c19cca 100644
--- a/vp8/common/invtrans.c
+++ b/vp8/common/invtrans.c
@@ -15,7 +15,7 @@
void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b,
int pitch)
{
- if (b->eob > 1)
+ if (*b->eob > 1)
{
IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->predictor, pitch,
*(b->base_dst) + b->dst, b->dst_stride);
@@ -65,6 +65,3 @@ void vp8_inverse_transform_mbuv(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD
}
}
-
-
-
diff --git a/vp8/common/mbpitch.c b/vp8/common/mbpitch.c
index 054042c0b..11fa3ffa7 100644
--- a/vp8/common/mbpitch.c
+++ b/vp8/common/mbpitch.c
@@ -118,6 +118,7 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x)
{
x->block[r].qcoeff = x->qcoeff + r * 16;
x->block[r].dqcoeff = x->dqcoeff + r * 16;
+ x->block[r].eob = x->eobs + r;
}
}
diff --git a/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm b/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm
index ae2f6030d..d61f5d94d 100644
--- a/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm
+++ b/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm
@@ -102,9 +102,10 @@ loop
bne loop
; PART 2: check position for eob...
+ ldr r11, [sp, #0] ; restore BLOCKD pointer
mov lr, #0 ; init eob
cmp r1, #0 ; coeffs after quantization?
- ldr r11, [sp, #0] ; restore BLOCKD pointer
+ ldr r12, [r11, #vp8_blockd_eob]
beq end ; skip eob calculations if all zero
ldr r0, [r11, #vp8_blockd_qcoeff]
@@ -212,7 +213,7 @@ quant_coeff_1_0
mov lr, #1 ; rc=0, i=0
end
- str lr, [r11, #vp8_blockd_eob]
+ strb lr, [r12]
ldmfd sp!, {r1, r4-r11, pc}
ENDP
diff --git a/vp8/encoder/arm/neon/fastquantizeb_neon.asm b/vp8/encoder/arm/neon/fastquantizeb_neon.asm
index dcf3c5090..c970cb73e 100644
--- a/vp8/encoder/arm/neon/fastquantizeb_neon.asm
+++ b/vp8/encoder/arm/neon/fastquantizeb_neon.asm
@@ -135,17 +135,16 @@
vmovl.u16 q0, d0
vmovl.u16 q10, d20
-
vmax.u32 d0, d0, d1
vmax.u32 d20, d20, d21
vpmax.u32 d0, d0, d0
vpmax.u32 d20, d20, d20
- add r4, r2, #vp8_blockd_eob
- add r5, r3, #vp8_blockd_eob
+ ldr r4, [r2, #vp8_blockd_eob]
+ ldr r5, [r3, #vp8_blockd_eob]
- vst1.32 {d0[0]}, [r4@32]
- vst1.32 {d20[0]}, [r5@32]
+ vst1.8 {d0[0]}, [r4] ; store eob
+ vst1.8 {d20[0]}, [r5] ; store eob
vldmia sp!, {q4-q7}
ldmfd sp!, {r4-r9}
@@ -196,6 +195,8 @@
vshr.s16 q12, #1 ; right shift 1 after vqdmulh
vshr.s16 q13, #1
+ ldr r5, [r1, #vp8_blockd_eob]
+
orr r2, r2, r3 ; check if all zero (step 4)
cmp r2, #0 ; check if all zero (step 5)
beq zero_output ; check if all zero (step 6)
@@ -230,14 +231,13 @@
vst1.s16 {q2, q3}, [r7@128] ; store dqcoeff = x * Dequant
- add r4, r1, #vp8_blockd_eob
- vst1.32 {d0[0]}, [r4@32]
+ vst1.8 {d0[0]}, [r5] ; store eob
ldmfd sp!, {r4-r7}
bx lr
zero_output
- str r2, [r1, #vp8_blockd_eob]
+ strb r2, [r5] ; store eob
vst1.s16 {q0, q1}, [r6@128] ; qcoeff = 0
vst1.s16 {q0, q1}, [r7@128] ; dqcoeff = 0
diff --git a/vp8/encoder/arm/quantize_arm.c b/vp8/encoder/arm/quantize_arm.c
index 52d84013e..5b3a0275f 100644
--- a/vp8/encoder/arm/quantize_arm.c
+++ b/vp8/encoder/arm/quantize_arm.c
@@ -46,7 +46,7 @@ void vp8_quantize_mb_neon(MACROBLOCK *x)
&x->e_mbd.block[i], &x->e_mbd.block[i+1]);
if (has_2nd_order)
- x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
+ x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
}
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 74e40323d..1c07cbdd5 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -94,7 +94,8 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mby)(&x->e_mbd);
- ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), x->e_mbd.predictor, b->src_stride);
+ ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src),
+ x->e_mbd.predictor, b->src_stride);
vp8_transform_intra_mby(x);
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index b3c7df502..faa1a8e33 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -274,7 +274,7 @@ static void optimize_b(MACROBLOCK *mb, int ib, int type,
qcoeff_ptr = d->qcoeff;
dqcoeff_ptr = d->dqcoeff;
i0 = !type;
- eob = d->eob;
+ eob = *d->eob;
/* Now set up a Viterbi trellis to evaluate alternative roundings. */
rdmult = mb->rdmult * err_mult;
@@ -466,8 +466,45 @@ static void optimize_b(MACROBLOCK *mb, int ib, int type,
}
final_eob++;
- d->eob = final_eob;
- *a = *l = (d->eob != !type);
+ *a = *l = (final_eob != !type);
+ *d->eob = (char)final_eob;
+}
+static void check_reset_2nd_coeffs(MACROBLOCKD *x, int type,
+ ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
+{
+ int sum=0;
+ int i;
+ BLOCKD *bd = &x->block[24];
+
+ if(bd->dequant[0]>=35 && bd->dequant[1]>=35)
+ return;
+
+ for(i=0;i<bd->eob;i++)
+ {
+ int coef = bd->dqcoeff[vp8_default_zig_zag1d[i]];
+ sum+= (coef>=0)?coef:-coef;
+ if(sum>=35)
+ return;
+ }
+ /**************************************************************************
+ our inverse hadamard transform effectively is weighted sum of all 16 inputs
+ with weight either 1 or -1. It has a last stage scaling of (sum+3)>>3. And
+ dc only idct is (dc+4)>>3. So if all the sums are between -35 and 29, the
+ output after inverse wht and idct will be all zero. A sum of absolute value
+ smaller than 35 guarantees all 16 different (+1/-1) weighted sums in wht
+ fall between -35 and +35.
+ **************************************************************************/
+ if(sum < 35)
+ {
+ for(i=0;i<bd->eob;i++)
+ {
+ int rc = vp8_default_zig_zag1d[i];
+ bd->qcoeff[rc]=0;
+ bd->dqcoeff[rc]=0;
+ }
+ bd->eob = 0;
+ *a = *l = (bd->eob != !type);
+ }
}
static void optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
@@ -475,6 +512,7 @@ static void optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
int b;
int type;
int has_2nd_order;
+
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
@@ -506,6 +544,8 @@ static void optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
b=24;
optimize_b(x, b, PLANE_TYPE_Y2,
ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
+ check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2,
+ ta + vp8_block2above[b], tl + vp8_block2left[b]);
}
}
@@ -539,7 +579,7 @@ void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
for (b = 0; b < 16; b++)
{
optimize_b(x, b, type,
- ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
+ ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
}
@@ -548,6 +588,8 @@ void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
b=24;
optimize_b(x, b, PLANE_TYPE_Y2,
ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
+ check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2,
+ ta + vp8_block2above[b], tl + vp8_block2left[b]);
}
}
@@ -608,7 +650,7 @@ static void inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd,
{
BLOCKD *b = &x->block[i];
- if (b->eob > 1)
+ if (*b->eob > 1)
{
IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->predictor, 16,
*(b->base_dst) + b->dst, b->dst_stride);
@@ -625,7 +667,7 @@ static void inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd,
{
BLOCKD *b = &x->block[i];
- if (b->eob > 1)
+ if (*b->eob > 1)
{
IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->predictor, 8,
*(b->base_dst) + b->dst, b->dst_stride);
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 503d24123..22cbbee85 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -62,7 +62,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
}
}
}
- d->eob = eob + 1;
+ *d->eob = (char)(eob + 1);
}
#else
@@ -97,7 +97,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
eob = i; // last nonzero coeffs
}
}
- d->eob = eob + 1;
+ *d->eob = (char)(eob + 1);
}
#endif
@@ -152,7 +152,7 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
}
}
- d->eob = eob + 1;
+ *d->eob = (char)(eob + 1);
}
/* Perform regular quantization, with unbiased rounding and no zero bin. */
@@ -210,7 +210,7 @@ void vp8_strict_quantize_b(BLOCK *b, BLOCKD *d)
}
}
- d->eob = eob + 1;
+ *d->eob = (char)(eob + 1);
}
#else
@@ -264,7 +264,7 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
}
}
- d->eob = eob + 1;
+ *d->eob = (char)(eob + 1);
}
#endif
@@ -731,4 +731,3 @@ void vp8_set_quantizer(struct VP8_COMP *cpi, int Q)
vp8cx_init_quantizer(cpi);
}
-
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index fdb519c19..7950960de 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -485,7 +485,7 @@ int VP8_UVSSE(MACROBLOCK *x, const vp8_variance_rtcd_vtable_t *rtcd)
static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
{
int c = !type; /* start at coef 0, unless Y with Y2 */
- int eob = b->eob;
+ int eob = (int)(*b->eob);
int pt ; /* surrounding block/prev coef predictor */
int cost = 0;
short *qcoeff_ptr = b->qcoeff;
@@ -1299,11 +1299,9 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
// store everything needed to come back to this!!
for (i = 0; i < 16; i++)
{
- BLOCKD *bd = &x->e_mbd.block[i];
-
bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
bsi->modes[i] = x->partition_info->bmi[i].mode;
- bsi->eobs[i] = bd->eob;
+ bsi->eobs[i] = x->e_mbd.eobs[i];
}
}
}
@@ -1432,7 +1430,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
BLOCKD *bd = &x->e_mbd.block[i];
bd->bmi.mv.as_int = bsi.mvs[i].as_int;
- bd->eob = bsi.eobs[i];
+ *bd->eob = bsi.eobs[i];
}
*returntotrate = bsi.r;
@@ -2271,7 +2269,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
for (i = 0; i <= 24; i++)
{
- tteob += x->e_mbd.block[i].eob;
+ tteob += x->e_mbd.eobs[i];
}
if (tteob == 0)
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index c8db4f067..e81948567 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -108,15 +108,16 @@ static void tokenize2nd_order_b
ENTROPY_CONTEXT * a;
ENTROPY_CONTEXT * l;
int band, rc, v, token;
+ int eob;
b = x->block + 24;
qcoeff_ptr = b->qcoeff;
a = (ENTROPY_CONTEXT *)x->above_context + 8;
l = (ENTROPY_CONTEXT *)x->left_context + 8;
-
+ eob = x->eobs[24];
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
- if(!b->eob)
+ if(!eob)
{
/* c = band for this case */
t->Token = DCT_EOB_TOKEN;
@@ -142,7 +143,7 @@ static void tokenize2nd_order_b
t++;
c = 1;
- for (; c < b->eob; c++)
+ for (; c < eob; c++)
{
rc = vp8_default_zig_zag1d[c];
band = vp8_coef_bands[c];
@@ -213,7 +214,7 @@ static void tokenize1st_order_b
c = type ? 0 : 1;
- if(c >= b->eob)
+ if(c >= *b->eob)
{
/* c = band for this case */
t->Token = DCT_EOB_TOKEN;
@@ -240,7 +241,7 @@ static void tokenize1st_order_b
t++;
c++;
- for (; c < b->eob; c++)
+ for (; c < *b->eob; c++)
{
rc = vp8_default_zig_zag1d[c];
band = vp8_coef_bands[c];
@@ -284,7 +285,7 @@ static void tokenize1st_order_b
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
- if(!b->eob)
+ if(!(*b->eob))
{
/* c = band for this case */
t->Token = DCT_EOB_TOKEN;
@@ -311,7 +312,7 @@ static void tokenize1st_order_b
t++;
c = 1;
- for (; c < b->eob; c++)
+ for (; c < *b->eob; c++)
{
rc = vp8_default_zig_zag1d[c];
band = vp8_coef_bands[c];
@@ -356,11 +357,11 @@ static int mb_is_skippable(MACROBLOCKD *x, int has_y2_block)
if (has_y2_block)
{
for (i = 0; i < 16; i++)
- skip &= (x->block[i].eob < 2);
+ skip &= (x->eobs[i] < 2);
}
for (; i < 24 + has_y2_block; i++)
- skip &= (!x->block[i].eob);
+ skip &= (!x->eobs[i]);
return skip;
}
diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm
index c483933df..7c249ff88 100644
--- a/vp8/encoder/x86/quantize_sse2.asm
+++ b/vp8/encoder/x86/quantize_sse2.asm
@@ -194,6 +194,8 @@ ZIGZAG_LOOP 15
movdqa [rdi], xmm0 ; store dqcoeff
movdqa [rdi + 16], xmm1
+ mov rcx, [rsi + vp8_blockd_eob]
+
; select the last value (in zig_zag order) for EOB
pcmpeqw xmm2, xmm6
pcmpeqw xmm3, xmm6
@@ -214,7 +216,8 @@ ZIGZAG_LOOP 15
pmaxsw xmm2, xmm3
movd eax, xmm2
and eax, 0xff
- mov [rsi + vp8_blockd_eob], eax
+
+ mov BYTE PTR [rcx], al ; store eob
; begin epilog
add rsp, stack_size
@@ -337,6 +340,8 @@ sym(vp8_fast_quantize_b_sse2):
pmaxsw xmm1, xmm5
+ mov rcx, [rsi + vp8_blockd_eob]
+
; now down to 8
pshufd xmm5, xmm1, 00001110b
@@ -354,7 +359,8 @@ sym(vp8_fast_quantize_b_sse2):
movd eax, xmm1
and eax, 0xff
- mov [rsi + vp8_blockd_eob], eax
+
+ mov BYTE PTR [rcx], al ; store eob
; begin epilog
%if ABI_IS_32BIT
diff --git a/vp8/encoder/x86/quantize_sse4.asm b/vp8/encoder/x86/quantize_sse4.asm
index 95e1c2074..70eac0c0f 100644
--- a/vp8/encoder/x86/quantize_sse4.asm
+++ b/vp8/encoder/x86/quantize_sse4.asm
@@ -208,6 +208,8 @@ ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8
movdqa [rdi], xmm0
movdqa [rdi + 16], xmm1
+ mov rcx, [rsi + vp8_blockd_eob]
+
; select the last value (in zig_zag order) for EOB
pxor xmm6, xmm6
pcmpeqw xmm4, xmm6
@@ -225,7 +227,7 @@ ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8
add eax, 1
and eax, edi
- mov [rsi + vp8_blockd_eob], eax
+ mov BYTE PTR [rcx], al ; store eob
; begin epilog
%if ABI_IS_32BIT
diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm
index 912007e02..34cc9c3bb 100644
--- a/vp8/encoder/x86/quantize_ssse3.asm
+++ b/vp8/encoder/x86/quantize_ssse3.asm
@@ -110,12 +110,14 @@ sym(vp8_fast_quantize_b_ssse3):
movdqa [rcx], xmm2 ;store dqcoeff
movdqa [rcx + 16], xmm3 ;store dqcoeff
+ mov rcx, [rsi + vp8_blockd_eob]
+
sub edi, edx ;check for all zeros in bit mask
sar edi, 31 ;0 or -1
add eax, 1
and eax, edi ;if the bit mask was all zero,
;then eob = 0
- mov [rsi + vp8_blockd_eob], eax
+ mov BYTE PTR [rcx], al ;store eob
; begin epilog
%if ABI_IS_32BIT
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index 191d61c60..7f5208461 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -37,17 +37,17 @@ void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
short *dqcoeff_ptr = d->dqcoeff;
short *dequant_ptr = d->dequant;
- d->eob = vp8_fast_quantize_b_impl_mmx(
- coeff_ptr,
- zbin_ptr,
- qcoeff_ptr,
- dequant_ptr,
- scan_mask,
-
- round_ptr,
- quant_ptr,
- dqcoeff_ptr
- );
+ *d->eob = (char)vp8_fast_quantize_b_impl_mmx(
+ coeff_ptr,
+ zbin_ptr,
+ qcoeff_ptr,
+ dequant_ptr,
+ scan_mask,
+
+ round_ptr,
+ quant_ptr,
+ dqcoeff_ptr
+ );
}
int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 1be7e337f..7260e942b 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -560,7 +560,7 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx)
priv->cx_data_sz = priv->cfg.g_w * priv->cfg.g_h * 3 / 2 * 2;
- if (priv->cx_data_sz < 4096) priv->cx_data_sz = 4096;
+ if (priv->cx_data_sz < 32768) priv->cx_data_sz = 32768;
priv->cx_data = malloc(priv->cx_data_sz);