diff options
author | Deepa K G <deepa.kg@ittiam.com> | 2023-06-06 11:38:09 +0530 |
---|---|---|
committer | Deepa K G <deepa.kg@ittiam.com> | 2023-06-06 14:35:14 +0530 |
commit | e510716d7e9a0a34592eb8ff1f8a65b951fe2eeb (patch) | |
tree | e1710e085ebe66afab5ed4c74e4dfb23f47c8716 | |
parent | 7b66c730a2edd3e232dce5e8ef2522ff83928a90 (diff) | |
download | libvpx-e510716d7e9a0a34592eb8ff1f8a65b951fe2eeb.tar libvpx-e510716d7e9a0a34592eb8ff1f8a65b951fe2eeb.tar.gz libvpx-e510716d7e9a0a34592eb8ff1f8a65b951fe2eeb.tar.bz2 libvpx-e510716d7e9a0a34592eb8ff1f8a65b951fe2eeb.zip |
Add comments in vp9_diamond_search_sad_avx()
Added comments related to re-arranging the
elements of the SAD vector to find the
minimum.
Change-Id: I58b702d304a6cdd32f04775fba603e39c19a8947
-rw-r--r-- | vp9/encoder/x86/vp9_diamond_search_sad_avx.c | 8 |
1 files changed, 8 insertions, 0 deletions
diff --git a/vp9/encoder/x86/vp9_diamond_search_sad_avx.c b/vp9/encoder/x86/vp9_diamond_search_sad_avx.c index c00579edc..63c35df09 100644 --- a/vp9/encoder/x86/vp9_diamond_search_sad_avx.c +++ b/vp9/encoder/x86/vp9_diamond_search_sad_avx.c @@ -233,12 +233,19 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x, if (UNLIKELY(local_best_sad == 0xffff)) { __m128i v_loval_d, v_hival_d, v_loidx_d, v_hiidx_d, v_sel_d; + // Re-arrange the values in v_sad_d as follows: + // v_loval_d[0] = v_sad_d[0], v_loval_d[1] = v_sad_d[2] + // v_loval_d[2] = v_sad_d[1], v_loval_d[3] = v_sad_d[3] + // v_loidx_d stores the corresponding indices 0, 2, 1, 3 + // This re-arrangement is required to ensure that when there exists + // more than one minimum, the one with the least index is selected v_loval_d = _mm_shuffle_epi32(v_sad_d, 0xd8); v_loidx_d = _mm_set_epi32(3, 1, 2, 0); v_hival_d = _mm_srli_si128(v_loval_d, 8); v_hiidx_d = _mm_srli_si128(v_loidx_d, 8); + // Compare if v_sad_d[1] < v_sad_d[0], v_sad_d[3] < v_sad_d[2] v_sel_d = _mm_cmplt_epi32(v_hival_d, v_loval_d); v_loval_d = _mm_blendv_epi8(v_loval_d, v_hival_d, v_sel_d); @@ -246,6 +253,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x, v_hival_d = _mm_srli_si128(v_loval_d, 4); v_hiidx_d = _mm_srli_si128(v_loidx_d, 4); + // min(v_sad_d[2], v_sad_d[3]) < min(v_sad_d[0], v_sad_d[1]) v_sel_d = _mm_cmplt_epi32(v_hival_d, v_loval_d); v_loval_d = _mm_blendv_epi8(v_loval_d, v_hival_d, v_sel_d); |