summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm6
-rw-r--r--vp9/encoder/vp9_firstpass.c9
-rw-r--r--vp9/encoder/vp9_mcomp.c70
-rw-r--r--vp9/encoder/vp9_onyx_if.c25
-rw-r--r--vp9/encoder/vp9_rdopt.c2
-rw-r--r--vp9/encoder/x86/vp9_dct_sse2.c19
-rw-r--r--vp9/vp9_cx_iface.c4
7 files changed, 54 insertions, 81 deletions
diff --git a/vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm b/vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm
index 5c097ccc8..3a0ff608b 100644
--- a/vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm
+++ b/vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm
@@ -259,7 +259,7 @@ idct32_transpose_pair_loop
; transpose pair loop processing
add r3, r3, #1
cmp r3, #1
- BLE idct32_transpose_pair_loop
+ ble idct32_transpose_pair_loop
; restore r0/input to its original value
sub r0, r0, #32*8*2
@@ -954,7 +954,7 @@ idct32_transpose_pair_loop
; bands loop processing
add r4, r4, #1
cmp r4, #3
- BLE idct32_bands_loop
+ ble idct32_bands_loop
pop {r4}
bx lr
@@ -1005,7 +1005,7 @@ idct32_combine_add_loop
; loop processing
add r3, r3, #1
cmp r3, #31
- BLE idct32_combine_add_loop
+ ble idct32_combine_add_loop
bx lr
ENDP ; |idct32_transpose|
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index ad7e6d821..6e44e604c 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -2093,14 +2093,19 @@ void vp9_second_pass(VP9_COMP *cpi) {
cpi->twopass.est_max_qcorrection_factor = 1.0;
// Set a cq_level in constrained quality mode.
+ // Commenting this code out for now since it does not seem to be
+ // working well.
+ /*
if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) {
int est_cq = estimate_cq(cpi, &cpi->twopass.total_left_stats,
- section_target_bandwidth);
+ section_target_bandwidth);
- cpi->cq_target_quality = cpi->oxcf.cq_level;
if (est_cq > cpi->cq_target_quality)
cpi->cq_target_quality = est_cq;
+ else
+ cpi->cq_target_quality = cpi->oxcf.cq_level;
}
+ */
// guess at maxq needed in 2nd pass
cpi->twopass.maxq_max_limit = cpi->worst_quality;
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index ad8c8999a..7dd786904 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -136,66 +136,26 @@ void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
}
void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
- int len;
- int search_site_count = 0;
+ int len, ss_count = 1;
- // Generate offsets for 8 search sites per step.
- x->ss[search_site_count].mv.col = 0;
- x->ss[search_site_count].mv.row = 0;
- x->ss[search_site_count].offset = 0;
- search_site_count++;
+ x->ss[0].mv.col = x->ss[0].mv.row = 0;
+ x->ss[0].offset = 0;
for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = 0;
- x->ss[search_site_count].mv.row = -len;
- x->ss[search_site_count].offset = -len * stride;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = 0;
- x->ss[search_site_count].mv.row = len;
- x->ss[search_site_count].offset = len * stride;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = -len;
- x->ss[search_site_count].mv.row = 0;
- x->ss[search_site_count].offset = -len;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = len;
- x->ss[search_site_count].mv.row = 0;
- x->ss[search_site_count].offset = len;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = -len;
- x->ss[search_site_count].mv.row = -len;
- x->ss[search_site_count].offset = -len * stride - len;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = len;
- x->ss[search_site_count].mv.row = -len;
- x->ss[search_site_count].offset = -len * stride + len;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = -len;
- x->ss[search_site_count].mv.row = len;
- x->ss[search_site_count].offset = len * stride - len;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = len;
- x->ss[search_site_count].mv.row = len;
- x->ss[search_site_count].offset = len * stride + len;
- search_site_count++;
+ // Generate offsets for 8 search sites per step.
+ const MV ss_mvs[8] = {
+ {-len, 0 }, {len, 0 }, { 0, -len}, {0, len},
+ {-len, -len}, {-len, len}, {len, -len}, {len, len}
+ };
+ int i;
+ for (i = 0; i < 8; ++i) {
+ search_site *const ss = &x->ss[ss_count++];
+ ss->mv = ss_mvs[i];
+ ss->offset = ss->mv.row * stride + ss->mv.col;
+ }
}
- x->ss_count = search_site_count;
+ x->ss_count = ss_count;
x->searches_per_step = 8;
}
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index bc1b4a8c6..1d0733ed6 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -122,6 +122,8 @@ static int kf_high_motion_minq[QINDEX_RANGE];
static int gf_low_motion_minq[QINDEX_RANGE];
static int gf_high_motion_minq[QINDEX_RANGE];
static int inter_minq[QINDEX_RANGE];
+static int afq_low_motion_minq[QINDEX_RANGE];
+static int afq_high_motion_minq[QINDEX_RANGE];
static INLINE void Scale2Ratio(int mode, int *hr, int *hs) {
switch (mode) {
@@ -205,7 +207,16 @@ static void init_minq_luts(void) {
-0.00113,
0.697,
0.0);
-
+ afq_low_motion_minq[i] = calculate_minq_index(maxq,
+ 0.0000015,
+ -0.0009,
+ 0.33,
+ 0.0);
+ afq_high_motion_minq[i] = calculate_minq_index(maxq,
+ 0.0000021,
+ -0.00125,
+ 0.57,
+ 0.0);
}
}
@@ -2765,16 +2776,16 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
} else {
if (cpi->frames_since_key > 1) {
if (cpi->gfu_boost > high) {
- cpi->active_best_quality = cpi->cq_target_quality * 6 / 16;
+ cpi->active_best_quality = afq_low_motion_minq[q];
} else if (cpi->gfu_boost < low) {
- cpi->active_best_quality = cpi->cq_target_quality * 11 / 16;
+ cpi->active_best_quality = afq_high_motion_minq[q];
} else {
const int gap = high - low;
const int offset = high - cpi->gfu_boost;
- const int qdiff = cpi->cq_target_quality * 5 / 16;
+ const int qdiff = afq_high_motion_minq[q] - afq_low_motion_minq[q];
const int adjustment = ((offset * qdiff) + (gap >> 1)) / gap;
- cpi->active_best_quality = cpi->cq_target_quality * 6 / 16
- + adjustment;
+
+ cpi->active_best_quality = afq_low_motion_minq[q] + adjustment;
}
}
}
@@ -3262,7 +3273,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// in this frame.
// update_base_skip_probs(cpi);
-#if CONFIG_INTERNAL_STATS
+#if 0 // CONFIG_INTERNAL_STATS
{
FILE *f = fopen("tmp.stt", cm->current_video_frame ? "a" : "w");
int recon_err;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index adaa1a29d..041316b4f 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2318,7 +2318,7 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
// Further refinement that is encode side only to test the top few candidates
// in full and choose the best as the centre point for subsequent searches.
// The current implementation doesn't support scaling.
- if (!vp9_is_scaled(&scale[frame_type]))
+ if (!vp9_is_scaled(&scale[frame_type]) && block_size >= BLOCK_8X8)
mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride,
frame_type, block_size);
}
diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c
index eb271fef3..ad3d01da9 100644
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -171,22 +171,21 @@ static INLINE void transpose_4x4(__m128i *res) {
void fdct4_1d_sse2(__m128i *in) {
const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
- const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
- const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
__m128i u[4], v[4];
- u[0] = _mm_add_epi16(in[0], in[3]);
- u[1] = _mm_add_epi16(in[1], in[2]);
- u[2] = _mm_sub_epi16(in[1], in[2]);
- u[3] = _mm_sub_epi16(in[0], in[3]);
+ u[0]=_mm_unpacklo_epi16(in[0], in[1]);
+ u[1]=_mm_unpacklo_epi16(in[3], in[2]);
+
+ v[0] = _mm_add_epi16(u[0], u[1]);
+ v[1] = _mm_sub_epi16(u[0], u[1]);
- v[0] = _mm_unpacklo_epi16(u[0], u[1]);
- v[1] = _mm_unpacklo_epi16(u[2], u[3]);
u[0] = _mm_madd_epi16(v[0], k__cospi_p16_p16); // 0
u[1] = _mm_madd_epi16(v[0], k__cospi_p16_m16); // 2
- u[2] = _mm_madd_epi16(v[1], k__cospi_p24_p08); // 1
- u[3] = _mm_madd_epi16(v[1], k__cospi_m08_p24); // 3
+ u[2] = _mm_madd_epi16(v[1], k__cospi_p08_p24); // 1
+ u[3] = _mm_madd_epi16(v[1], k__cospi_p24_m08); // 3
v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 4f63c52fd..08a1a8458 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -277,11 +277,9 @@ static vpx_codec_err_t set_vp9e_config(VP9_CONFIG *oxcf,
// CBR code has been deprectated for experimental phase.
// CQ mode not yet tested
oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK;
- /*
if (cfg.rc_end_usage == VPX_CQ)
oxcf->end_usage = USAGE_CONSTRAINED_QUALITY;
- */
- if (cfg.rc_end_usage == VPX_Q)
+ else if (cfg.rc_end_usage == VPX_Q)
oxcf->end_usage = USAGE_CONSTANT_QUALITY;
oxcf->target_bandwidth = cfg.rc_target_bitrate;