summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG2
-rwxr-xr-xtest/vpxenc.sh25
-rw-r--r--vp9/encoder/vp9_encodeframe.c23
-rw-r--r--vp9/encoder/vp9_encoder.c76
-rw-r--r--vp9/encoder/vp9_encoder.h14
-rw-r--r--vp9/encoder/vp9_firstpass.c10
-rw-r--r--vp9/encoder/vp9_firstpass.h1
-rw-r--r--vpx_dsp/mips/convolve8_avg_dspr2.c3
-rw-r--r--vpx_dsp/mips/convolve8_avg_horiz_dspr2.c3
-rw-r--r--vpx_dsp/mips/convolve8_dspr2.c4
-rw-r--r--vpx_dsp/mips/convolve8_horiz_dspr2.c2
-rw-r--r--vpx_dsp/mips/convolve8_vert_dspr2.c2
-rw-r--r--vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c2
-rw-r--r--vpx_dsp/mips/vpx_convolve8_avg_msa.c8
-rw-r--r--vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c2
-rw-r--r--vpx_dsp/mips/vpx_convolve8_horiz_msa.c2
-rw-r--r--vpx_dsp/mips/vpx_convolve8_msa.c8
-rw-r--r--vpx_dsp/mips/vpx_convolve8_vert_msa.c2
-rw-r--r--vpx_dsp/vpx_filter.h9
19 files changed, 131 insertions, 67 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 2281394c8..52089df06 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,4 +1,4 @@
-2017-01-04 v1.7.0 "Mandarin Duck"
+2018-01-04 v1.7.0 "Mandarin Duck"
This release focused on high bit depth performance (10/12 bit) and vp9
encoding improvements.
diff --git a/test/vpxenc.sh b/test/vpxenc.sh
index e24c10672..f94e2e094 100755
--- a/test/vpxenc.sh
+++ b/test/vpxenc.sh
@@ -291,15 +291,14 @@ vpxenc_vp9_webm_rt_multithread_tiled() {
--threads=${threads} \
--tile-columns=${tile_cols} \
--output="${output}"
+
+ if [ ! -e "${output}" ]; then
+ elog "Output file does not exist."
+ return 1
+ fi
+ rm "${output}"
done
done
-
- if [ ! -e "${output}" ]; then
- elog "Output file does not exist."
- return 1
- fi
-
- rm "${output}"
fi
}
@@ -320,15 +319,13 @@ vpxenc_vp9_webm_rt_multithread_tiled_frameparallel() {
--tile-columns=${tile_cols} \
--frame-parallel=1 \
--output="${output}"
+ if [ ! -e "${output}" ]; then
+ elog "Output file does not exist."
+ return 1
+ fi
+ rm "${output}"
done
done
-
- if [ ! -e "${output}" ]; then
- elog "Output file does not exist."
- return 1
- fi
-
- rm "${output}"
fi
}
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 9864de075..bdd5b39c1 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -3183,7 +3183,7 @@ static int ml_pruning_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd,
#define FEATURES 4
// ML-based partition search breakout.
-static int ml_predict_breakout(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
+static int ml_predict_breakout(VP9_COMP *const cpi, BLOCK_SIZE bsize,
const MACROBLOCK *const x,
const RD_COST *const rd_cost) {
DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = { 0 };
@@ -3214,14 +3214,29 @@ static int ml_predict_breakout(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
if (!linear_weights) return 0;
{ // Generate feature values.
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int ac_q =
+ vp9_ac_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8);
+#else
const int ac_q = vp9_ac_quant(qindex, 0, cm->bit_depth);
+#endif // CONFIG_VP9_HIGHBITDEPTH
const int num_pels_log2 = num_pels_log2_lookup[bsize];
int feature_index = 0;
unsigned int var, sse;
float rate_f, dist_f;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ var =
+ vp9_high_get_sby_variance(cpi, &x->plane[0].src, bsize, x->e_mbd.bd);
+ } else {
+ var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride,
+ vp9_64_zeros, 0, &sse);
+ }
+#else
var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride,
vp9_64_zeros, 0, &sse);
+#endif
var = var >> num_pels_log2;
vpx_clear_system_state();
@@ -3827,13 +3842,9 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
}
if ((do_split || do_rect) && !x->e_mbd.lossless && ctx->skippable) {
- int use_ml_based_breakout =
+ const int use_ml_based_breakout =
cpi->sf.use_ml_partition_search_breakout &&
cm->base_qindex >= 100;
-#if CONFIG_VP9_HIGHBITDEPTH
- if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- use_ml_based_breakout = 0;
-#endif // CONFIG_VP9_HIGHBITDEPTH
if (use_ml_based_breakout) {
if (ml_predict_breakout(cpi, bsize, x, &this_rdc)) {
do_split = 0;
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 7ebd2a872..d9299f39e 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -2359,10 +2359,10 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
vp9_set_speed_features_framesize_dependent(cpi);
if (cpi->sf.enable_tpl_model) {
- for (frame = 0; frame < MAX_LAG_BUFFERS; ++frame) {
- int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
- int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
-
+ const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
+ const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
+ // TODO(jingning): Reduce the actual memory use for tpl model build up.
+ for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr,
vpx_calloc(mi_rows * mi_cols,
sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));
@@ -2373,6 +2373,11 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
cpi->tpl_stats[frame].mi_rows = cm->mi_rows;
cpi->tpl_stats[frame].mi_cols = cm->mi_cols;
}
+
+ for (frame = 0; frame < REF_FRAMES; ++frame) {
+ cpi->enc_frame_buf[frame].mem_valid = 0;
+ cpi->enc_frame_buf[frame].released = 1;
+ }
}
// Allocate memory to store variances for a frame.
@@ -2572,7 +2577,7 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
vp9_denoiser_free(&(cpi->denoiser));
#endif
- for (frame = 0; frame < MAX_LAG_BUFFERS; ++frame) {
+ for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
cpi->tpl_stats[frame].is_valid = 0;
}
@@ -5361,12 +5366,14 @@ void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
int pframe_qindex = cpi->tpl_stats[2].base_qindex;
RefCntBuffer *frame_bufs = cm->buffer_pool->frame_bufs;
- int recon_frame_index[REFS_PER_FRAME + 1] = { -1, -1, -1, -1 };
+ int8_t recon_frame_index[REFS_PER_FRAME + MAX_ARF_LAYERS];
+
+ memset(recon_frame_index, -1, sizeof(recon_frame_index));
// TODO(jingning): To be used later for gf frame type parsing.
(void)gf_group;
- for (i = 0; i < FRAME_BUFFERS && frame_idx < REFS_PER_FRAME + 1; ++i) {
+ for (i = 0; i < FRAME_BUFFERS; ++i) {
if (frame_bufs[i].ref_count == 0) {
alloc_frame_mvs(cm, i);
if (vpx_realloc_frame_buffer(&frame_bufs[i].buf, cm->width, cm->height,
@@ -5381,6 +5388,8 @@ void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
recon_frame_index[frame_idx] = i;
++frame_idx;
+
+ if (frame_idx >= REFS_PER_FRAME + cpi->oxcf.enable_auto_arf) break;
}
}
@@ -5407,8 +5416,9 @@ void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
// Initialize P frames
for (frame_idx = 2; frame_idx < MAX_LAG_BUFFERS; ++frame_idx) {
+ const int frame_gop_offset = gf_group->frame_gop_index[frame_idx];
struct lookahead_entry *buf =
- vp9_lookahead_peek(cpi->lookahead, frame_idx - 2);
+ vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
if (buf == NULL) break;
@@ -5452,7 +5462,7 @@ void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
void init_tpl_stats(VP9_COMP *cpi) {
int frame_idx;
- for (frame_idx = 0; frame_idx < MAX_LAG_BUFFERS; ++frame_idx) {
+ for (frame_idx = 0; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
memset(tpl_frame->tpl_stats_ptr, 0,
tpl_frame->height * tpl_frame->width *
@@ -5463,12 +5473,14 @@ void init_tpl_stats(VP9_COMP *cpi) {
#if CONFIG_NON_GREEDY_MV
static void prepare_nb_full_mvs(const TplDepFrame *tpl_frame, int mi_row,
- int mi_col, int rf_idx, int_mv *nb_full_mvs) {
+ int mi_col, int rf_idx, BLOCK_SIZE bsize,
+ int_mv *nb_full_mvs) {
+ const int mi_unit = num_8x8_blocks_wide_lookup[bsize];
const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } };
int i;
for (i = 0; i < NB_MVS_NUM; ++i) {
- int r = dirs[i][0];
- int c = dirs[i][1];
+ int r = dirs[i][0] * mi_unit;
+ int c = dirs[i][1] * mi_unit;
if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 &&
mi_col + c < tpl_frame->mi_cols) {
const TplDepStats *tpl_ptr =
@@ -5539,7 +5551,7 @@ uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
#if CONFIG_NON_GREEDY_MV
(void)search_method;
(void)sadpb;
- prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row, mi_col, rf_idx,
+ prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row, mi_col, rf_idx, bsize,
nb_full_mvs);
vp9_full_pixel_diamond_new(cpi, x, &best_ref_mv1_full, step_param, lambda,
MAX_MVSEARCH_STEPS - 1 - step_param, 1,
@@ -5837,21 +5849,12 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
int_mv mv;
if (ref_frame[rf_idx] == NULL) {
-#if CONFIG_NON_GREEDY_MV
- tpl_stats->ready[rf_idx] = 0;
-#endif
continue;
} else {
-#if CONFIG_NON_GREEDY_MV
- tpl_stats->ready[rf_idx] = 1;
-#endif
}
#if CONFIG_NON_GREEDY_MV
- motion_compensated_prediction(
- cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset,
- ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bsize,
- mi_row, mi_col, tpl_stats, rf_idx);
+ (void)td;
mv.as_int = tpl_stats->mv_arr[rf_idx].as_int;
#else
motion_compensated_prediction(
@@ -6018,6 +6021,31 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx,
tpl_frame->mv_dist_sum[rf_idx] = 0;
tpl_frame->mv_cost_sum[rf_idx] = 0;
}
+
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
+ const int mb_y_offset =
+ mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
+ TplDepStats *tpl_stats =
+ &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
+
+ set_mv_limits(cm, x, mi_row, mi_col);
+
+ for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+ if (ref_frame[rf_idx] == NULL) {
+ tpl_stats->ready[rf_idx] = 0;
+ continue;
+ } else {
+ tpl_stats->ready[rf_idx] = 1;
+ }
+ motion_compensated_prediction(
+ cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset,
+ ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride,
+ bsize, mi_row, mi_col, tpl_stats, rf_idx);
+ }
+ }
+ }
+
#endif
for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
@@ -6116,7 +6144,7 @@ static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames,
#endif // CONFIG_NON_GREEDY_MV
static void setup_tpl_stats(VP9_COMP *cpi) {
- GF_PICTURE gf_picture[MAX_LAG_BUFFERS];
+ GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE];
const GF_GROUP *gf_group = &cpi->twopass.gf_group;
int tpl_group_frames = 0;
int frame_idx;
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 9cec511f7..79346ed09 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -495,6 +495,15 @@ typedef struct ARNRFilterData {
struct scale_factors sf;
} ARNRFilterData;
+typedef struct EncFrameBuf {
+ int mem_valid;
+ int released;
+ YV12_BUFFER_CONFIG frame;
+} EncFrameBuf;
+
+// Maximum operating frame buffer size needed for a GOP using ARF reference.
+#define MAX_ARF_GOP_SIZE (2 * MAX_LAG_BUFFERS)
+
typedef struct VP9_COMP {
QUANTS quants;
ThreadData td;
@@ -518,8 +527,9 @@ typedef struct VP9_COMP {
#endif
YV12_BUFFER_CONFIG *raw_source_frame;
- TplDepFrame tpl_stats[MAX_LAG_BUFFERS];
- YV12_BUFFER_CONFIG *tpl_recon_frames[REFS_PER_FRAME + 1];
+ TplDepFrame tpl_stats[MAX_ARF_GOP_SIZE];
+ YV12_BUFFER_CONFIG *tpl_recon_frames[REF_FRAMES];
+ EncFrameBuf enc_frame_buf[REF_FRAMES];
TileDataEnc *tile_data;
int allocated_tiles; // Keep track of memory allocated for tiles.
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 58c3a435d..318dd21b7 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -2124,6 +2124,7 @@ static void find_arf_order(VP9_COMP *cpi, GF_GROUP *gf_group,
for (idx = start; idx < end; ++idx) {
gf_group->update_type[*index_counter] = LF_UPDATE;
gf_group->arf_src_offset[*index_counter] = 0;
+ gf_group->frame_gop_index[*index_counter] = idx;
gf_group->rf_level[*index_counter] = INTER_NORMAL;
gf_group->layer_depth[*index_counter] = depth;
++(*index_counter);
@@ -2137,6 +2138,7 @@ static void find_arf_order(VP9_COMP *cpi, GF_GROUP *gf_group,
gf_group->layer_depth[*index_counter] = depth;
gf_group->update_type[*index_counter] = ARF_UPDATE;
gf_group->arf_src_offset[*index_counter] = mid - start;
+ gf_group->frame_gop_index[*index_counter] = mid;
gf_group->rf_level[*index_counter] = GF_ARF_LOW;
for (idx = 0; idx <= mid; ++idx)
@@ -2153,6 +2155,7 @@ static void find_arf_order(VP9_COMP *cpi, GF_GROUP *gf_group,
gf_group->update_type[*index_counter] = USE_BUF_FRAME;
gf_group->arf_src_offset[*index_counter] = 0;
+ gf_group->frame_gop_index[*index_counter] = mid;
gf_group->rf_level[*index_counter] = INTER_NORMAL;
gf_group->layer_depth[*index_counter] = depth;
++(*index_counter);
@@ -2203,17 +2206,18 @@ static int define_gf_group_structure(VP9_COMP *cpi) {
gf_group->layer_depth[frame_index] = 1;
gf_group->arf_src_offset[frame_index] =
(unsigned char)(rc->baseline_gf_interval - 1);
+ gf_group->frame_gop_index[frame_index] = rc->baseline_gf_interval;
++frame_index;
}
if (rc->source_alt_ref_pending && cpi->multi_layer_arf) {
- find_arf_order(cpi, gf_group, &frame_index, 2, 0,
- rc->baseline_gf_interval - 1);
+ find_arf_order(cpi, gf_group, &frame_index, 2, 1, rc->baseline_gf_interval);
set_gf_overlay_frame_type(gf_group, frame_index,
rc->source_alt_ref_pending);
gf_group->arf_src_offset[frame_index] = 0;
+ gf_group->frame_gop_index[frame_index] = rc->baseline_gf_interval;
return frame_index;
}
@@ -2227,6 +2231,7 @@ static int define_gf_group_structure(VP9_COMP *cpi) {
gf_group->update_type[frame_index] = LF_UPDATE;
gf_group->rf_level[frame_index] = INTER_NORMAL;
gf_group->arf_src_offset[frame_index] = 0;
+ gf_group->frame_gop_index[frame_index] = i + 1;
gf_group->layer_depth[frame_index] = MAX_ARF_LAYERS - 1;
++frame_index;
@@ -2247,6 +2252,7 @@ static int define_gf_group_structure(VP9_COMP *cpi) {
gf_group->rf_level[frame_index] = GF_ARF_STD;
}
gf_group->arf_src_offset[frame_index] = 0;
+ gf_group->frame_gop_index[frame_index] = rc->baseline_gf_interval;
return frame_index;
}
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index 9bd0a9e04..9d1e9355a 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -129,6 +129,7 @@ typedef struct {
FRAME_UPDATE_TYPE update_type[MAX_STATIC_GF_GROUP_LENGTH + 2];
unsigned char arf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 2];
unsigned char layer_depth[MAX_STATIC_GF_GROUP_LENGTH + 2];
+ unsigned char frame_gop_index[MAX_STATIC_GF_GROUP_LENGTH + 2];
int bit_allocation[MAX_STATIC_GF_GROUP_LENGTH + 2];
int gfu_boost[MAX_STATIC_GF_GROUP_LENGTH + 2];
diff --git a/vpx_dsp/mips/convolve8_avg_dspr2.c b/vpx_dsp/mips/convolve8_avg_dspr2.c
index d9c2bef69..cc458c861 100644
--- a/vpx_dsp/mips/convolve8_avg_dspr2.c
+++ b/vpx_dsp/mips/convolve8_avg_dspr2.c
@@ -15,6 +15,7 @@
#include "vpx_dsp/mips/convolve_common_dspr2.h"
#include "vpx_dsp/vpx_convolve.h"
#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/vpx_filter.h"
#include "vpx_ports/mem.h"
#if HAVE_DSPR2
@@ -341,7 +342,7 @@ void vpx_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
assert(y_step_q4 == 16);
assert(((const int32_t *)filter_y)[1] != 0x800000);
- if (((const int32_t *)filter_y)[0] == 0) {
+ if (vpx_get_filter_taps(filter_y) == 2) {
vpx_convolve2_avg_vert_dspr2(src, src_stride, dst, dst_stride, filter,
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
} else {
diff --git a/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c b/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c
index fb68ad881..7a9aa49d8 100644
--- a/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c
+++ b/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c
@@ -15,6 +15,7 @@
#include "vpx_dsp/mips/convolve_common_dspr2.h"
#include "vpx_dsp/vpx_convolve.h"
#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/vpx_filter.h"
#include "vpx_ports/mem.h"
#if HAVE_DSPR2
@@ -945,7 +946,7 @@ void vpx_convolve8_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
assert(x_step_q4 == 16);
assert(((const int32_t *)filter_x)[1] != 0x800000);
- if (((const int32_t *)filter_x)[0] == 0) {
+ if (vpx_get_filter_taps(filter_x) == 2) {
vpx_convolve2_avg_horiz_dspr2(src, src_stride, dst, dst_stride, filter,
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
} else {
diff --git a/vpx_dsp/mips/convolve8_dspr2.c b/vpx_dsp/mips/convolve8_dspr2.c
index 89f0f4196..1e7052f6c 100644
--- a/vpx_dsp/mips/convolve8_dspr2.c
+++ b/vpx_dsp/mips/convolve8_dspr2.c
@@ -1322,7 +1322,7 @@ void vpx_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
if (filter_x[3] == 0x80) {
copy_horiz_transposed(src - src_stride * 3, src_stride, temp,
intermediate_height, w, intermediate_height);
- } else if (((const int32_t *)filter_x)[0] == 0) {
+ } else if (vpx_get_filter_taps(filter_x) == 2) {
vpx_convolve2_dspr2(src - src_stride * 3, src_stride, temp,
intermediate_height, filter_x, w, intermediate_height);
} else {
@@ -1365,7 +1365,7 @@ void vpx_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
/* copy the src to dst */
if (filter_y[3] == 0x80) {
copy_horiz_transposed(temp + 3, intermediate_height, dst, dst_stride, h, w);
- } else if (((const int32_t *)filter_y)[0] == 0) {
+ } else if (vpx_get_filter_taps(filter_y) == 2) {
vpx_convolve2_dspr2(temp + 3, intermediate_height, dst, dst_stride,
filter_y, h, w);
} else {
diff --git a/vpx_dsp/mips/convolve8_horiz_dspr2.c b/vpx_dsp/mips/convolve8_horiz_dspr2.c
index 77e95c844..09d6f36e5 100644
--- a/vpx_dsp/mips/convolve8_horiz_dspr2.c
+++ b/vpx_dsp/mips/convolve8_horiz_dspr2.c
@@ -825,7 +825,7 @@ void vpx_convolve8_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
assert(x_step_q4 == 16);
assert(((const int32_t *)filter_x)[1] != 0x800000);
- if (((const int32_t *)filter_x)[0] == 0) {
+ if (vpx_get_filter_taps(filter_x) == 2) {
vpx_convolve2_horiz_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4,
x_step_q4, y0_q4, y_step_q4, w, h);
} else {
diff --git a/vpx_dsp/mips/convolve8_vert_dspr2.c b/vpx_dsp/mips/convolve8_vert_dspr2.c
index c329f71cc..fd977b533 100644
--- a/vpx_dsp/mips/convolve8_vert_dspr2.c
+++ b/vpx_dsp/mips/convolve8_vert_dspr2.c
@@ -325,7 +325,7 @@ void vpx_convolve8_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
assert(y_step_q4 == 16);
assert(((const int32_t *)filter_y)[1] != 0x800000);
- if (((const int32_t *)filter_y)[0] == 0) {
+ if (vpx_get_filter_taps(filter_y) == 2) {
vpx_convolve2_vert_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4,
x_step_q4, y0_q4, y_step_q4, w, h);
} else {
diff --git a/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c b/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c
index 187a01342..5b5a1cbc3 100644
--- a/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c
@@ -658,7 +658,7 @@ void vpx_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
filt_hor[cnt] = filter_x[cnt];
}
- if (((const int32_t *)filter_x)[0] == 0) {
+ if (vpx_get_filter_taps(filter_x) == 2) {
switch (w) {
case 4:
common_hz_2t_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst,
diff --git a/vpx_dsp/mips/vpx_convolve8_avg_msa.c b/vpx_dsp/mips/vpx_convolve8_avg_msa.c
index 5187cea21..ba816192a 100644
--- a/vpx_dsp/mips/vpx_convolve8_avg_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_avg_msa.c
@@ -538,8 +538,8 @@ void vpx_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
filt_ver[cnt] = filter_y[cnt];
}
- if (((const int32_t *)filter_x)[0] == 0 &&
- ((const int32_t *)filter_y)[0] == 0) {
+ if (vpx_get_filter_taps(filter_x) == 2 &&
+ vpx_get_filter_taps(filter_y) == 2) {
switch (w) {
case 4:
common_hv_2ht_2vt_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst,
@@ -571,8 +571,8 @@ void vpx_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
- } else if (((const int32_t *)filter_x)[0] == 0 ||
- ((const int32_t *)filter_y)[0] == 0) {
+ } else if (vpx_get_filter_taps(filter_x) == 2 ||
+ vpx_get_filter_taps(filter_y) == 2) {
vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter, x0_q4,
x_step_q4, y0_q4, y_step_q4, w, h);
} else {
diff --git a/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c b/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c
index ef8c90114..e6a790dfc 100644
--- a/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c
@@ -625,7 +625,7 @@ void vpx_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
filt_ver[cnt] = filter_y[cnt];
}
- if (((const int32_t *)filter_y)[0] == 0) {
+ if (vpx_get_filter_taps(filter_y) == 2) {
switch (w) {
case 4:
common_vt_2t_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst,
diff --git a/vpx_dsp/mips/vpx_convolve8_horiz_msa.c b/vpx_dsp/mips/vpx_convolve8_horiz_msa.c
index 152dc2610..792c0f709 100644
--- a/vpx_dsp/mips/vpx_convolve8_horiz_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_horiz_msa.c
@@ -634,7 +634,7 @@ void vpx_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
filt_hor[cnt] = filter_x[cnt];
}
- if (((const int32_t *)filter_x)[0] == 0) {
+ if (vpx_get_filter_taps(filter_x) == 2) {
switch (w) {
case 4:
common_hz_2t_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
diff --git a/vpx_dsp/mips/vpx_convolve8_msa.c b/vpx_dsp/mips/vpx_convolve8_msa.c
index d35a5a7a6..c94216758 100644
--- a/vpx_dsp/mips/vpx_convolve8_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_msa.c
@@ -558,8 +558,8 @@ void vpx_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
filt_ver[cnt] = filter_y[cnt];
}
- if (((const int32_t *)filter_x)[0] == 0 &&
- ((const int32_t *)filter_y)[0] == 0) {
+ if (vpx_get_filter_taps(filter_x) == 2 &&
+ vpx_get_filter_taps(filter_y) == 2) {
switch (w) {
case 4:
common_hv_2ht_2vt_4w_msa(src, (int32_t)src_stride, dst,
@@ -591,8 +591,8 @@ void vpx_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
- } else if (((const int32_t *)filter_x)[0] == 0 ||
- ((const int32_t *)filter_y)[0] == 0) {
+ } else if (vpx_get_filter_taps(filter_x) == 2 ||
+ vpx_get_filter_taps(filter_y) == 2) {
vpx_convolve8_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4,
y0_q4, y_step_q4, w, h);
} else {
diff --git a/vpx_dsp/mips/vpx_convolve8_vert_msa.c b/vpx_dsp/mips/vpx_convolve8_vert_msa.c
index 13fce0077..195228689 100644
--- a/vpx_dsp/mips/vpx_convolve8_vert_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_vert_msa.c
@@ -641,7 +641,7 @@ void vpx_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
filt_ver[cnt] = filter_y[cnt];
}
- if (((const int32_t *)filter_y)[0] == 0) {
+ if (vpx_get_filter_taps(filter_y) == 2) {
switch (w) {
case 4:
common_vt_2t_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
diff --git a/vpx_dsp/vpx_filter.h b/vpx_dsp/vpx_filter.h
index 05eb57265..54357ee6c 100644
--- a/vpx_dsp/vpx_filter.h
+++ b/vpx_dsp/vpx_filter.h
@@ -11,6 +11,7 @@
#ifndef VPX_VPX_DSP_VPX_FILTER_H_
#define VPX_VPX_DSP_VPX_FILTER_H_
+#include <assert.h>
#include "vpx/vpx_integer.h"
#ifdef __cplusplus
@@ -26,6 +27,14 @@ extern "C" {
typedef int16_t InterpKernel[SUBPEL_TAPS];
+static INLINE int vpx_get_filter_taps(const int16_t *const filter) {
+ assert(filter[3] != 128);
+ if (!filter[0] && !filter[1] && !filter[2])
+ return 2;
+ else
+ return 8;
+}
+
#ifdef __cplusplus
} // extern "C"
#endif