From 5eefd3ebfdf61f76676de4f86e128e3d101311a2 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Wed, 28 Oct 2015 14:35:04 +0000 Subject: Add AVX vectorized vp9_diamond_search_sad This function now has an AVX intrinsics version which is about 80% faster compared to the C implementation. This provides a 2-4% total speed-up for encode, depending on encoding parameters. The function utilizes 3 properties of the cost function lookup table, constructed in 'cal_nmvjointsadcost' and 'cal_nmvsadcosts'. For the joint cost: - mvjointsadcost[1] == mvjointsadcost[2] == mvjointsadcost[3] For the component costs: - For all i: mvsadcost[0][i] == mvsadcost[1][i] (equal per component cost) - For all i: mvsadcost[0][i] == mvsadcost[0][-i] (Cost function is even) These must hold, otherwise the AVX version of the function cannot be used. Change-Id: I6c2791d43022822a9e6ab43cd124a773946d0bdc --- vp9/encoder/vp9_encoder.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'vp9/encoder/vp9_encoder.c') diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index d86a7a7d3..eebd7c548 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -1570,7 +1570,30 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { #endif #define log2f(x) (log (x) / (float) M_LOG2_E) +/*********************************************************************** + * Read before modifying 'cal_nmvjointsadcost' or 'cal_nmvsadcosts' * + *********************************************************************** + * The following 2 functions ('cal_nmvjointsadcost' and * + * 'cal_nmvsadcosts') are used to calculate cost lookup tables * + * used by 'vp9_diamond_search_sad'. The C implementation of the * + * function is generic, but the AVX intrinsics optimised version * + * relies on the following properties of the computed tables: * + * For cal_nmvjointsadcost: * + * - mvjointsadcost[1] == mvjointsadcost[2] == mvjointsadcost[3] * + * For cal_nmvsadcosts: * + * - For all i: mvsadcost[0][i] == mvsadcost[1][i] * + * (Equal costs for both components) * + * - For all i: mvsadcost[0][i] == mvsadcost[0][-i] * + * (Cost function is even) * + * If these do not hold, then the AVX optimised version of the * + * 'vp9_diamond_search_sad' function cannot be used as it is, in which * + * case you can revert to using the C function instead. * + ***********************************************************************/ + static void cal_nmvjointsadcost(int *mvjointsadcost) { + /********************************************************************* + * Warning: Read the comments above before modifying this function * + *********************************************************************/ mvjointsadcost[0] = 600; mvjointsadcost[1] = 300; mvjointsadcost[2] = 300; @@ -1578,6 +1601,9 @@ static void cal_nmvjointsadcost(int *mvjointsadcost) { } static void cal_nmvsadcosts(int *mvsadcost[2]) { + /********************************************************************* + * Warning: Read the comments above before modifying this function * + *********************************************************************/ int i = 1; mvsadcost[0][0] = 0; @@ -1739,6 +1765,10 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, cpi->first_time_stamp_ever = INT64_MAX; + /********************************************************************* + * Warning: Read the comments around 'cal_nmvjointsadcost' and * + * 'cal_nmvsadcosts' before modifying how these tables are computed. * + *********************************************************************/ cal_nmvjointsadcost(cpi->td.mb.nmvjointsadcost); cpi->td.mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX]; cpi->td.mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX]; -- cgit v1.2.3