From 9cfba09ac0e5182722bc562a5a071388611c2a04 Mon Sep 17 00:00:00 2001
From: Geza Lore <gezalore@gmail.com>
Date: Mon, 19 Oct 2015 14:05:35 +0100
Subject: Optimize vpx_quantize_{b,b_32x32} assembler.

Added optimization of the 8 bit assembly quantizer routines. This makes
these functions up to 100% faster, depending on encoding parameters.

This patch maskes the encoder faster in both the high bitdepth and 8bit
configurations. In the high bitdepth configuration, it effects profile 0
only.

Based on my profiling using 1080p input the net gain is between 1-3% for
the 8 bit config, and around 2.5-4.5% for the high bitdepth config,
depending on target bitrate. The difference between the 8 bit and high
bitdepth configurations for the same encoder run is reduced by 1% in all
cases I have profiled.

Change-Id: I86714a6b7364da20cd468cd784247009663a5140
---
 vp10/encoder/context_tree.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'vp10/encoder')

diff --git a/vp10/encoder/context_tree.c b/vp10/encoder/context_tree.c
index 532e82caa..6c056d28e 100644
--- a/vp10/encoder/context_tree.c
+++ b/vp10/encoder/context_tree.c
@@ -30,13 +30,13 @@ static void alloc_mode_context(VP10_COMMON *cm, int num_4x4_blk,
   for (i = 0; i < MAX_MB_PLANE; ++i) {
     for (k = 0; k < 3; ++k) {
       CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
-                      vpx_memalign(16, num_pix * sizeof(*ctx->coeff[i][k])));
+                      vpx_memalign(32, num_pix * sizeof(*ctx->coeff[i][k])));
       CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
-                      vpx_memalign(16, num_pix * sizeof(*ctx->qcoeff[i][k])));
+                      vpx_memalign(32, num_pix * sizeof(*ctx->qcoeff[i][k])));
       CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k],
-                      vpx_memalign(16, num_pix * sizeof(*ctx->dqcoeff[i][k])));
+                      vpx_memalign(32, num_pix * sizeof(*ctx->dqcoeff[i][k])));
       CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
-                      vpx_memalign(16, num_blk * sizeof(*ctx->eobs[i][k])));
+                      vpx_memalign(32, num_blk * sizeof(*ctx->eobs[i][k])));
       ctx->coeff_pbuf[i][k]   = ctx->coeff[i][k];
       ctx->qcoeff_pbuf[i][k]  = ctx->qcoeff[i][k];
       ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k];
-- 
cgit v1.2.3