diff options
author | hkuang <hkuang@google.com> | 2015-03-23 16:50:37 -0700 |
---|---|---|
committer | Gerrit Code Review <gerrit@gerrit.golo.chromium.org> | 2015-03-23 16:50:37 -0700 |
commit | 9f4f98fdbd17c6e88bb9d3478b8e89a27a14ac84 (patch) | |
tree | 9453290413e49a37148149cc8c84ceea3fd07df2 | |
parent | cd1d40ff5d58dd62ea540b357f74be7fd9850880 (diff) | |
parent | 85107641a48447875da27b56417430f3ec30e8b0 (diff) | |
download | libvpx-9f4f98fdbd17c6e88bb9d3478b8e89a27a14ac84.tar libvpx-9f4f98fdbd17c6e88bb9d3478b8e89a27a14ac84.tar.gz libvpx-9f4f98fdbd17c6e88bb9d3478b8e89a27a14ac84.tar.bz2 libvpx-9f4f98fdbd17c6e88bb9d3478b8e89a27a14ac84.zip |
Merge "Optimize the intra frame decode to skip some unnecessary copy."
-rw-r--r-- | vp9/common/vp9_reconintra.c | 148 |
1 files changed, 99 insertions, 49 deletions
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index 1668b99ce..f832a3b1c 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -30,6 +30,25 @@ const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = { ADST_ADST, // TM }; +enum { + NEED_LEFT = 1 << 1, + NEED_ABOVE = 1 << 2, + NEED_ABOVERIGHT = 1 << 3, +}; + +static const uint8_t extend_modes[INTRA_MODES] = { + NEED_ABOVE | NEED_LEFT, // DC + NEED_ABOVE, // V + NEED_LEFT, // H + NEED_ABOVERIGHT, // D45 + NEED_LEFT | NEED_ABOVE, // D135 + NEED_LEFT | NEED_ABOVE, // D117 + NEED_LEFT | NEED_ABOVE, // D153 + NEED_LEFT, // D207 + NEED_ABOVERIGHT, // D63 + NEED_LEFT | NEED_ABOVE, // TM +}; + // This serves as a wrapper function, so that all the prediction functions // can be unified and accessed as a pointer array. Note that the boundary // above and left are not necessarily used all the time. @@ -790,75 +809,106 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; - vpx_memset(left_col, 129, 64); - - // left - if (left_available) { - if (xd->mb_to_bottom_edge < 0) { - /* slower path if the block needs border extension */ - if (y0 + bs <= frame_height) { - for (i = 0; i < bs; ++i) - left_col[i] = ref[i * ref_stride - 1]; + // NEED_LEFT + if (extend_modes[mode] & NEED_LEFT) { + if (left_available) { + if (xd->mb_to_bottom_edge < 0) { + /* slower path if the block needs border extension */ + if (y0 + bs <= frame_height) { + for (i = 0; i < bs; ++i) + left_col[i] = ref[i * ref_stride - 1]; + } else { + const int extend_bottom = frame_height - y0; + for (i = 0; i < extend_bottom; ++i) + left_col[i] = ref[i * ref_stride - 1]; + for (; i < bs; ++i) + left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; + } } else { - const int extend_bottom = frame_height - y0; - for (i = 0; i < extend_bottom; ++i) + /* faster path if the block does not need extension */ + for (i = 0; i < bs; ++i) left_col[i] = ref[i * ref_stride - 1]; - for (; i < bs; ++i) - left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; } } else { - /* faster path if the block does not need extension */ - for (i = 0; i < bs; ++i) - left_col[i] = ref[i * ref_stride - 1]; + vpx_memset(left_col, 129, bs); } } - // TODO(hkuang) do not extend 2*bs pixels for all modes. - // above - if (up_available) { - const uint8_t *above_ref = ref - ref_stride; - if (xd->mb_to_right_edge < 0) { - /* slower path if the block needs border extension */ - if (x0 + 2 * bs <= frame_width) { - if (right_available && bs == 4) { - vpx_memcpy(above_row, above_ref, 2 * bs); + // NEED_ABOVE + if (extend_modes[mode] & NEED_ABOVE) { + if (up_available) { + const uint8_t *above_ref = ref - ref_stride; + if (xd->mb_to_right_edge < 0) { + /* slower path if the block needs border extension */ + if (x0 + bs <= frame_width) { + vpx_memcpy(above_row, above_ref, bs); + } else if (x0 <= frame_width) { + const int r = frame_width - x0; + vpx_memcpy(above_row, above_ref, r); + vpx_memset(above_row + r, above_row[r - 1], + x0 + bs - frame_width); + } + } else { + /* faster path if the block does not need extension */ + if (bs == 4 && right_available && left_available) { + const_above_row = above_ref; } else { vpx_memcpy(above_row, above_ref, bs); - vpx_memset(above_row + bs, above_row[bs - 1], bs); } - } else if (x0 + bs <= frame_width) { - const int r = frame_width - x0; - if (right_available && bs == 4) { + } + above_row[-1] = left_available ? above_ref[-1] : 129; + } else { + vpx_memset(above_row, 127, bs); + above_row[-1] = 127; + } + } + + // NEED_ABOVERIGHT + if (extend_modes[mode] & NEED_ABOVERIGHT) { + if (up_available) { + const uint8_t *above_ref = ref - ref_stride; + if (xd->mb_to_right_edge < 0) { + /* slower path if the block needs border extension */ + if (x0 + 2 * bs <= frame_width) { + if (right_available && bs == 4) { + vpx_memcpy(above_row, above_ref, 2 * bs); + } else { + vpx_memcpy(above_row, above_ref, bs); + vpx_memset(above_row + bs, above_row[bs - 1], bs); + } + } else if (x0 + bs <= frame_width) { + const int r = frame_width - x0; + if (right_available && bs == 4) { + vpx_memcpy(above_row, above_ref, r); + vpx_memset(above_row + r, above_row[r - 1], + x0 + 2 * bs - frame_width); + } else { + vpx_memcpy(above_row, above_ref, bs); + vpx_memset(above_row + bs, above_row[bs - 1], bs); + } + } else if (x0 <= frame_width) { + const int r = frame_width - x0; vpx_memcpy(above_row, above_ref, r); vpx_memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); + } + } else { + /* faster path if the block does not need extension */ + if (bs == 4 && right_available && left_available) { + const_above_row = above_ref; } else { vpx_memcpy(above_row, above_ref, bs); - vpx_memset(above_row + bs, above_row[bs - 1], bs); + if (bs == 4 && right_available) + vpx_memcpy(above_row + bs, above_ref + bs, bs); + else + vpx_memset(above_row + bs, above_row[bs - 1], bs); } - } else if (x0 <= frame_width) { - const int r = frame_width - x0; - vpx_memcpy(above_row, above_ref, r); - vpx_memset(above_row + r, above_row[r - 1], - x0 + 2 * bs - frame_width); } above_row[-1] = left_available ? above_ref[-1] : 129; } else { - /* faster path if the block does not need extension */ - if (bs == 4 && right_available && left_available) { - const_above_row = above_ref; - } else { - vpx_memcpy(above_row, above_ref, bs); - if (bs == 4 && right_available) - vpx_memcpy(above_row + bs, above_ref + bs, bs); - else - vpx_memset(above_row + bs, above_row[bs - 1], bs); - above_row[-1] = left_available ? above_ref[-1] : 129; - } + vpx_memset(above_row, 127, bs * 2); + above_row[-1] = 127; } - } else { - vpx_memset(above_row, 127, bs * 2); - above_row[-1] = 127; } // predict |