From 9cfba09ac0e5182722bc562a5a071388611c2a04 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Mon, 19 Oct 2015 14:05:35 +0100 Subject: Optimize vpx_quantize_{b,b_32x32} assembler. Added optimization of the 8 bit assembly quantizer routines. This makes these functions up to 100% faster, depending on encoding parameters. This patch maskes the encoder faster in both the high bitdepth and 8bit configurations. In the high bitdepth configuration, it effects profile 0 only. Based on my profiling using 1080p input the net gain is between 1-3% for the 8 bit config, and around 2.5-4.5% for the high bitdepth config, depending on target bitrate. The difference between the 8 bit and high bitdepth configurations for the same encoder run is reduced by 1% in all cases I have profiled. Change-Id: I86714a6b7364da20cd468cd784247009663a5140 --- vp10/encoder/context_tree.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'vp10/encoder') diff --git a/vp10/encoder/context_tree.c b/vp10/encoder/context_tree.c index 532e82caa..6c056d28e 100644 --- a/vp10/encoder/context_tree.c +++ b/vp10/encoder/context_tree.c @@ -30,13 +30,13 @@ static void alloc_mode_context(VP10_COMMON *cm, int num_4x4_blk, for (i = 0; i < MAX_MB_PLANE; ++i) { for (k = 0; k < 3; ++k) { CHECK_MEM_ERROR(cm, ctx->coeff[i][k], - vpx_memalign(16, num_pix * sizeof(*ctx->coeff[i][k]))); + vpx_memalign(32, num_pix * sizeof(*ctx->coeff[i][k]))); CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k], - vpx_memalign(16, num_pix * sizeof(*ctx->qcoeff[i][k]))); + vpx_memalign(32, num_pix * sizeof(*ctx->qcoeff[i][k]))); CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k], - vpx_memalign(16, num_pix * sizeof(*ctx->dqcoeff[i][k]))); + vpx_memalign(32, num_pix * sizeof(*ctx->dqcoeff[i][k]))); CHECK_MEM_ERROR(cm, ctx->eobs[i][k], - vpx_memalign(16, num_blk * sizeof(*ctx->eobs[i][k]))); + vpx_memalign(32, num_blk * sizeof(*ctx->eobs[i][k]))); ctx->coeff_pbuf[i][k] = ctx->coeff[i][k]; ctx->qcoeff_pbuf[i][k] = ctx->qcoeff[i][k]; ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k]; -- cgit v1.2.3