summaryrefslogtreecommitdiff
path: root/vp9/encoder/vp9_encoder.c
diff options
context:
space:
mode:
authorGeza Lore <gezalore@gmail.com>2015-10-28 14:35:04 +0000
committerGeza Lore <gezalore@gmail.com>2015-11-11 14:03:47 +0000
commit5eefd3ebfdf61f76676de4f86e128e3d101311a2 (patch)
treea763404e3e9890907b57fc522408fa2d63fd9ce1 /vp9/encoder/vp9_encoder.c
parent420e8d6d039c2224e00c13aba7f8908b68868359 (diff)
downloadlibvpx-5eefd3ebfdf61f76676de4f86e128e3d101311a2.tar
libvpx-5eefd3ebfdf61f76676de4f86e128e3d101311a2.tar.gz
libvpx-5eefd3ebfdf61f76676de4f86e128e3d101311a2.tar.bz2
libvpx-5eefd3ebfdf61f76676de4f86e128e3d101311a2.zip
Add AVX vectorized vp9_diamond_search_sad
This function now has an AVX intrinsics version which is about 80% faster compared to the C implementation. This provides a 2-4% total speed-up for encode, depending on encoding parameters. The function utilizes 3 properties of the cost function lookup table, constructed in 'cal_nmvjointsadcost' and 'cal_nmvsadcosts'. For the joint cost: - mvjointsadcost[1] == mvjointsadcost[2] == mvjointsadcost[3] For the component costs: - For all i: mvsadcost[0][i] == mvsadcost[1][i] (equal per component cost) - For all i: mvsadcost[0][i] == mvsadcost[0][-i] (Cost function is even) These must hold, otherwise the AVX version of the function cannot be used. Change-Id: I6c2791d43022822a9e6ab43cd124a773946d0bdc
Diffstat (limited to 'vp9/encoder/vp9_encoder.c')
-rw-r--r--vp9/encoder/vp9_encoder.c30
1 files changed, 30 insertions, 0 deletions
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index d86a7a7d3..eebd7c548 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -1570,7 +1570,30 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
#endif
#define log2f(x) (log (x) / (float) M_LOG2_E)
+/***********************************************************************
+ * Read before modifying 'cal_nmvjointsadcost' or 'cal_nmvsadcosts' *
+ ***********************************************************************
+ * The following 2 functions ('cal_nmvjointsadcost' and *
+ * 'cal_nmvsadcosts') are used to calculate cost lookup tables *
+ * used by 'vp9_diamond_search_sad'. The C implementation of the *
+ * function is generic, but the AVX intrinsics optimised version *
+ * relies on the following properties of the computed tables: *
+ * For cal_nmvjointsadcost: *
+ * - mvjointsadcost[1] == mvjointsadcost[2] == mvjointsadcost[3] *
+ * For cal_nmvsadcosts: *
+ * - For all i: mvsadcost[0][i] == mvsadcost[1][i] *
+ * (Equal costs for both components) *
+ * - For all i: mvsadcost[0][i] == mvsadcost[0][-i] *
+ * (Cost function is even) *
+ * If these do not hold, then the AVX optimised version of the *
+ * 'vp9_diamond_search_sad' function cannot be used as it is, in which *
+ * case you can revert to using the C function instead. *
+ ***********************************************************************/
+
static void cal_nmvjointsadcost(int *mvjointsadcost) {
+ /*********************************************************************
+ * Warning: Read the comments above before modifying this function *
+ *********************************************************************/
mvjointsadcost[0] = 600;
mvjointsadcost[1] = 300;
mvjointsadcost[2] = 300;
@@ -1578,6 +1601,9 @@ static void cal_nmvjointsadcost(int *mvjointsadcost) {
}
static void cal_nmvsadcosts(int *mvsadcost[2]) {
+ /*********************************************************************
+ * Warning: Read the comments above before modifying this function *
+ *********************************************************************/
int i = 1;
mvsadcost[0][0] = 0;
@@ -1739,6 +1765,10 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
cpi->first_time_stamp_ever = INT64_MAX;
+ /*********************************************************************
+ * Warning: Read the comments around 'cal_nmvjointsadcost' and *
+ * 'cal_nmvsadcosts' before modifying how these tables are computed. *
+ *********************************************************************/
cal_nmvjointsadcost(cpi->td.mb.nmvjointsadcost);
cpi->td.mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX];
cpi->td.mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX];