summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhkuang <hkuang@google.com>2015-03-23 16:50:37 -0700
committerGerrit Code Review <gerrit@gerrit.golo.chromium.org>2015-03-23 16:50:37 -0700
commit9f4f98fdbd17c6e88bb9d3478b8e89a27a14ac84 (patch)
tree9453290413e49a37148149cc8c84ceea3fd07df2
parentcd1d40ff5d58dd62ea540b357f74be7fd9850880 (diff)
parent85107641a48447875da27b56417430f3ec30e8b0 (diff)
downloadlibvpx-9f4f98fdbd17c6e88bb9d3478b8e89a27a14ac84.tar
libvpx-9f4f98fdbd17c6e88bb9d3478b8e89a27a14ac84.tar.gz
libvpx-9f4f98fdbd17c6e88bb9d3478b8e89a27a14ac84.tar.bz2
libvpx-9f4f98fdbd17c6e88bb9d3478b8e89a27a14ac84.zip
Merge "Optimize the intra frame decode to skip some unnecessary copy."
-rw-r--r--vp9/common/vp9_reconintra.c148
1 files changed, 99 insertions, 49 deletions
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
index 1668b99ce..f832a3b1c 100644
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -30,6 +30,25 @@ const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = {
ADST_ADST, // TM
};
+enum {
+ NEED_LEFT = 1 << 1,
+ NEED_ABOVE = 1 << 2,
+ NEED_ABOVERIGHT = 1 << 3,
+};
+
+static const uint8_t extend_modes[INTRA_MODES] = {
+ NEED_ABOVE | NEED_LEFT, // DC
+ NEED_ABOVE, // V
+ NEED_LEFT, // H
+ NEED_ABOVERIGHT, // D45
+ NEED_LEFT | NEED_ABOVE, // D135
+ NEED_LEFT | NEED_ABOVE, // D117
+ NEED_LEFT | NEED_ABOVE, // D153
+ NEED_LEFT, // D207
+ NEED_ABOVERIGHT, // D63
+ NEED_LEFT | NEED_ABOVE, // TM
+};
+
// This serves as a wrapper function, so that all the prediction functions
// can be unified and accessed as a pointer array. Note that the boundary
// above and left are not necessarily used all the time.
@@ -790,75 +809,106 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
- vpx_memset(left_col, 129, 64);
-
- // left
- if (left_available) {
- if (xd->mb_to_bottom_edge < 0) {
- /* slower path if the block needs border extension */
- if (y0 + bs <= frame_height) {
- for (i = 0; i < bs; ++i)
- left_col[i] = ref[i * ref_stride - 1];
+ // NEED_LEFT
+ if (extend_modes[mode] & NEED_LEFT) {
+ if (left_available) {
+ if (xd->mb_to_bottom_edge < 0) {
+ /* slower path if the block needs border extension */
+ if (y0 + bs <= frame_height) {
+ for (i = 0; i < bs; ++i)
+ left_col[i] = ref[i * ref_stride - 1];
+ } else {
+ const int extend_bottom = frame_height - y0;
+ for (i = 0; i < extend_bottom; ++i)
+ left_col[i] = ref[i * ref_stride - 1];
+ for (; i < bs; ++i)
+ left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];
+ }
} else {
- const int extend_bottom = frame_height - y0;
- for (i = 0; i < extend_bottom; ++i)
+ /* faster path if the block does not need extension */
+ for (i = 0; i < bs; ++i)
left_col[i] = ref[i * ref_stride - 1];
- for (; i < bs; ++i)
- left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];
}
} else {
- /* faster path if the block does not need extension */
- for (i = 0; i < bs; ++i)
- left_col[i] = ref[i * ref_stride - 1];
+ vpx_memset(left_col, 129, bs);
}
}
- // TODO(hkuang) do not extend 2*bs pixels for all modes.
- // above
- if (up_available) {
- const uint8_t *above_ref = ref - ref_stride;
- if (xd->mb_to_right_edge < 0) {
- /* slower path if the block needs border extension */
- if (x0 + 2 * bs <= frame_width) {
- if (right_available && bs == 4) {
- vpx_memcpy(above_row, above_ref, 2 * bs);
+ // NEED_ABOVE
+ if (extend_modes[mode] & NEED_ABOVE) {
+ if (up_available) {
+ const uint8_t *above_ref = ref - ref_stride;
+ if (xd->mb_to_right_edge < 0) {
+ /* slower path if the block needs border extension */
+ if (x0 + bs <= frame_width) {
+ vpx_memcpy(above_row, above_ref, bs);
+ } else if (x0 <= frame_width) {
+ const int r = frame_width - x0;
+ vpx_memcpy(above_row, above_ref, r);
+ vpx_memset(above_row + r, above_row[r - 1],
+ x0 + bs - frame_width);
+ }
+ } else {
+ /* faster path if the block does not need extension */
+ if (bs == 4 && right_available && left_available) {
+ const_above_row = above_ref;
} else {
vpx_memcpy(above_row, above_ref, bs);
- vpx_memset(above_row + bs, above_row[bs - 1], bs);
}
- } else if (x0 + bs <= frame_width) {
- const int r = frame_width - x0;
- if (right_available && bs == 4) {
+ }
+ above_row[-1] = left_available ? above_ref[-1] : 129;
+ } else {
+ vpx_memset(above_row, 127, bs);
+ above_row[-1] = 127;
+ }
+ }
+
+ // NEED_ABOVERIGHT
+ if (extend_modes[mode] & NEED_ABOVERIGHT) {
+ if (up_available) {
+ const uint8_t *above_ref = ref - ref_stride;
+ if (xd->mb_to_right_edge < 0) {
+ /* slower path if the block needs border extension */
+ if (x0 + 2 * bs <= frame_width) {
+ if (right_available && bs == 4) {
+ vpx_memcpy(above_row, above_ref, 2 * bs);
+ } else {
+ vpx_memcpy(above_row, above_ref, bs);
+ vpx_memset(above_row + bs, above_row[bs - 1], bs);
+ }
+ } else if (x0 + bs <= frame_width) {
+ const int r = frame_width - x0;
+ if (right_available && bs == 4) {
+ vpx_memcpy(above_row, above_ref, r);
+ vpx_memset(above_row + r, above_row[r - 1],
+ x0 + 2 * bs - frame_width);
+ } else {
+ vpx_memcpy(above_row, above_ref, bs);
+ vpx_memset(above_row + bs, above_row[bs - 1], bs);
+ }
+ } else if (x0 <= frame_width) {
+ const int r = frame_width - x0;
vpx_memcpy(above_row, above_ref, r);
vpx_memset(above_row + r, above_row[r - 1],
x0 + 2 * bs - frame_width);
+ }
+ } else {
+ /* faster path if the block does not need extension */
+ if (bs == 4 && right_available && left_available) {
+ const_above_row = above_ref;
} else {
vpx_memcpy(above_row, above_ref, bs);
- vpx_memset(above_row + bs, above_row[bs - 1], bs);
+ if (bs == 4 && right_available)
+ vpx_memcpy(above_row + bs, above_ref + bs, bs);
+ else
+ vpx_memset(above_row + bs, above_row[bs - 1], bs);
}
- } else if (x0 <= frame_width) {
- const int r = frame_width - x0;
- vpx_memcpy(above_row, above_ref, r);
- vpx_memset(above_row + r, above_row[r - 1],
- x0 + 2 * bs - frame_width);
}
above_row[-1] = left_available ? above_ref[-1] : 129;
} else {
- /* faster path if the block does not need extension */
- if (bs == 4 && right_available && left_available) {
- const_above_row = above_ref;
- } else {
- vpx_memcpy(above_row, above_ref, bs);
- if (bs == 4 && right_available)
- vpx_memcpy(above_row + bs, above_ref + bs, bs);
- else
- vpx_memset(above_row + bs, above_row[bs - 1], bs);
- above_row[-1] = left_available ? above_ref[-1] : 129;
- }
+ vpx_memset(above_row, 127, bs * 2);
+ above_row[-1] = 127;
}
- } else {
- vpx_memset(above_row, 127, bs * 2);
- above_row[-1] = 127;
}
// predict