summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDeepa K G <deepa.kg@ittiam.com>2023-06-06 11:38:09 +0530
committerDeepa K G <deepa.kg@ittiam.com>2023-06-06 14:35:14 +0530
commite510716d7e9a0a34592eb8ff1f8a65b951fe2eeb (patch)
treee1710e085ebe66afab5ed4c74e4dfb23f47c8716
parent7b66c730a2edd3e232dce5e8ef2522ff83928a90 (diff)
downloadlibvpx-e510716d7e9a0a34592eb8ff1f8a65b951fe2eeb.tar
libvpx-e510716d7e9a0a34592eb8ff1f8a65b951fe2eeb.tar.gz
libvpx-e510716d7e9a0a34592eb8ff1f8a65b951fe2eeb.tar.bz2
libvpx-e510716d7e9a0a34592eb8ff1f8a65b951fe2eeb.zip
Add comments in vp9_diamond_search_sad_avx()
Added comments related to re-arranging the elements of the SAD vector to find the minimum. Change-Id: I58b702d304a6cdd32f04775fba603e39c19a8947
-rw-r--r--vp9/encoder/x86/vp9_diamond_search_sad_avx.c8
1 files changed, 8 insertions, 0 deletions
diff --git a/vp9/encoder/x86/vp9_diamond_search_sad_avx.c b/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
index c00579edc..63c35df09 100644
--- a/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
+++ b/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
@@ -233,12 +233,19 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
if (UNLIKELY(local_best_sad == 0xffff)) {
__m128i v_loval_d, v_hival_d, v_loidx_d, v_hiidx_d, v_sel_d;
+ // Re-arrange the values in v_sad_d as follows:
+ // v_loval_d[0] = v_sad_d[0], v_loval_d[1] = v_sad_d[2]
+ // v_loval_d[2] = v_sad_d[1], v_loval_d[3] = v_sad_d[3]
+ // v_loidx_d stores the corresponding indices 0, 2, 1, 3
+ // This re-arrangement is required to ensure that when there exists
+ // more than one minimum, the one with the least index is selected
v_loval_d = _mm_shuffle_epi32(v_sad_d, 0xd8);
v_loidx_d = _mm_set_epi32(3, 1, 2, 0);
v_hival_d = _mm_srli_si128(v_loval_d, 8);
v_hiidx_d = _mm_srli_si128(v_loidx_d, 8);
+ // Compare if v_sad_d[1] < v_sad_d[0], v_sad_d[3] < v_sad_d[2]
v_sel_d = _mm_cmplt_epi32(v_hival_d, v_loval_d);
v_loval_d = _mm_blendv_epi8(v_loval_d, v_hival_d, v_sel_d);
@@ -246,6 +253,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
v_hival_d = _mm_srli_si128(v_loval_d, 4);
v_hiidx_d = _mm_srli_si128(v_loidx_d, 4);
+ // min(v_sad_d[2], v_sad_d[3]) < min(v_sad_d[0], v_sad_d[1])
v_sel_d = _mm_cmplt_epi32(v_hival_d, v_loval_d);
v_loval_d = _mm_blendv_epi8(v_loval_d, v_hival_d, v_sel_d);