summaryrefslogtreecommitdiff
path: root/vp9/encoder
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/encoder')
-rw-r--r--vp9/encoder/vp9_encodemb.c8
-rw-r--r--vp9/encoder/vp9_quantize.c43
-rw-r--r--vp9/encoder/vp9_speed_features.c2
-rw-r--r--vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm31
4 files changed, 65 insertions, 19 deletions
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index eb9624dde..cd0191e0a 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -320,10 +320,10 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
switch (tx_size) {
case TX_32X32:
fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
- vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
- p->quant, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, p->zbin_extra, eob, scan_order->scan,
- scan_order->iscan);
+ vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp,
+ p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, p->zbin_extra, eob, scan_order->scan,
+ scan_order->iscan);
break;
case TX_16X16:
vp9_fdct16x16(src_diff, coeff, diff_stride);
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 4964e0fd0..370e1ce77 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -104,6 +104,49 @@ void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t count,
*eob_ptr = eob + 1;
}
+// TODO(jingning) Refactor this file and combine functions with similar
+// operations.
+void vp9_quantize_fp_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr,
+ const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr,
+ int zbin_oq_value, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+ int i, eob = -1;
+ (void)zbin_ptr;
+ (void)quant_shift_ptr;
+ (void)zbin_oq_value;
+ (void)iscan;
+
+ vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
+ vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
+
+ if (!skip_block) {
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ int tmp = 0;
+ int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+
+ if (abs_coeff >= (dequant_ptr[rc != 0] >> 2)) {
+ abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+ abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+ tmp = (abs_coeff * quant_ptr[rc != 0]) >> 15;
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
+ }
+
+ if (tmp)
+ eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t count,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 98d6825e7..00f4d7da6 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -253,6 +253,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
}
if (speed >= 5) {
+ sf->use_quant_fp = cm->frame_type == KEY_FRAME ? 0 : 1;
sf->auto_min_max_partition_size = (cm->frame_type == KEY_FRAME) ?
RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX;
sf->max_partition_size = BLOCK_32X32;
@@ -287,7 +288,6 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->mv.reduce_first_step_size = 1;
}
if (speed >= 7) {
- sf->use_quant_fp = cm->frame_type == KEY_FRAME ? 0 : 1;
sf->mv.fullpel_search_step_param = 10;
sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
sf->encode_breakout_thresh = (MIN(cm->width, cm->height) >= 720) ?
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
index 2d9f2b056..508e1d4f5 100644
--- a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
+++ b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
@@ -234,21 +234,18 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
movifnidn quantq, quantmp
mova m1, [roundq] ; m1 = round
mova m2, [quantq] ; m2 = quant
-%ifidn %1, b_32x32
-; TODO(jingning) to be continued with 32x32 quantization process
+%ifidn %1, fp_32x32
pcmpeqw m5, m5
psrlw m5, 15
- paddw m0, m5
paddw m1, m5
- psrlw m0, 1 ; m0 = (m0 + 1) / 2
psrlw m1, 1 ; m1 = (m1 + 1) / 2
%endif
mova m3, [r2q] ; m3 = dequant
mov r3, qcoeffmp
mov r4, dqcoeffmp
mov r5, iscanmp
-%ifidn %1, b_32x32
- psllw m4, 1
+%ifidn %1, fp_32x32
+ psllw m2, 1
%endif
pxor m5, m5 ; m5 = dedicated zero
DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob
@@ -275,18 +272,19 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
psignw m13, m10 ; m13 = reinsert sign
mova [qcoeffq+ncoeffq*2+ 0], m8
mova [qcoeffq+ncoeffq*2+16], m13
-%ifidn %1, b_32x32
+%ifidn %1, fp_32x32
pabsw m8, m8
pabsw m13, m13
%endif
pmullw m8, m3 ; dqc[i] = qc[i] * q
punpckhqdq m3, m3
pmullw m13, m3 ; dqc[i] = qc[i] * q
-%ifidn %1, b_32x32
+%ifidn %1, fp_32x32
psrlw m8, 1
psrlw m13, 1
psignw m8, m9
psignw m13, m10
+ psrlw m0, m3, 2
%endif
mova [dqcoeffq+ncoeffq*2+ 0], m8
mova [dqcoeffq+ncoeffq*2+16], m13
@@ -307,13 +305,17 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
pabsw m6, m9 ; m6 = abs(m9)
pabsw m11, m10 ; m11 = abs(m10)
- pcmpeqw m7, m7
-%ifidn %1, b_32x32
+%ifidn %1, fp_32x32
+ pcmpgtw m7, m6, m0
+ pcmpgtw m12, m11, m0
pmovmskb r6, m7
- pmovmskb r2, m7
+ pmovmskb r2, m12
+
or r6, r2
jz .skip_iter
%endif
+ pcmpeqw m7, m7
+
paddsw m6, m1 ; m6 += round
paddsw m11, m1 ; m11 += round
pmulhw m14, m6, m2 ; m14 = m6*q>>16
@@ -322,13 +324,13 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
psignw m13, m10 ; m13 = reinsert sign
mova [qcoeffq+ncoeffq*2+ 0], m14
mova [qcoeffq+ncoeffq*2+16], m13
-%ifidn %1, b_32x32
+%ifidn %1, fp_32x32
pabsw m14, m14
pabsw m13, m13
%endif
pmullw m14, m3 ; dqc[i] = qc[i] * q
pmullw m13, m3 ; dqc[i] = qc[i] * q
-%ifidn %1, b_32x32
+%ifidn %1, fp_32x32
psrlw m14, 1
psrlw m13, 1
psignw m14, m9
@@ -349,7 +351,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
add ncoeffq, mmsize
jl .ac_only_loop
-%ifidn %1, b_32x32
+%ifidn %1, fp_32x32
jmp .accumulate_eob
.skip_iter:
mova [qcoeffq+ncoeffq*2+ 0], m5
@@ -397,3 +399,4 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
INIT_XMM ssse3
QUANTIZE_FP fp, 7
+QUANTIZE_FP fp_32x32, 7