summaryrefslogtreecommitdiff
path: root/vp8
diff options
context:
space:
mode:
Diffstat (limited to 'vp8')
-rw-r--r--vp8/common/alloccommon.c7
-rw-r--r--vp8/common/asm_com_offsets.c16
-rw-r--r--vp8/common/blockd.h6
-rw-r--r--vp8/common/loopfilter_filters.c18
-rw-r--r--vp8/common/mfqe.c271
-rw-r--r--vp8/common/onyx.h2
-rw-r--r--vp8/common/onyxc_int.h4
-rw-r--r--vp8/common/postproc.c219
-rw-r--r--vp8/common/postproc.h5
-rw-r--r--vp8/common/rtcd_defs.sh9
-rw-r--r--vp8/common/sad_c.c2
-rw-r--r--vp8/common/x86/mfqe_sse2.asm281
-rw-r--r--vp8/decoder/detokenize.c482
-rw-r--r--vp8/encoder/denoising.c212
-rw-r--r--vp8/encoder/denoising.h33
-rw-r--r--vp8/encoder/encodeframe.c7
-rw-r--r--vp8/encoder/encodemv.c2
-rw-r--r--vp8/encoder/firstpass.c28
-rw-r--r--vp8/encoder/lookahead.c34
-rw-r--r--vp8/encoder/lookahead.h5
-rw-r--r--vp8/encoder/onyx_if.c60
-rw-r--r--vp8/encoder/onyx_int.h9
-rw-r--r--vp8/encoder/pickinter.c151
-rw-r--r--vp8/encoder/rdopt.c515
-rw-r--r--vp8/encoder/temporal_filter.c3
-rw-r--r--vp8/encoder/tokenize.c6
-rw-r--r--vp8/encoder/treewriter.h10
-rw-r--r--vp8/vp8_common.mk2
-rw-r--r--vp8/vp8_cx_iface.c8
-rw-r--r--vp8/vp8cx.mk2
30 files changed, 1601 insertions, 808 deletions
diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c
index b606aaca0..919ef499a 100644
--- a/vp8/common/alloccommon.c
+++ b/vp8/common/alloccommon.c
@@ -37,14 +37,15 @@ static void update_mode_info_border(MODE_INFO *mi, int rows, int cols)
void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
{
int i;
-
for (i = 0; i < NUM_YV12_BUFFERS; i++)
vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]);
vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame);
+#if CONFIG_POSTPROC
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
if (oci->post_proc_buffer_int_used)
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int);
+#endif
vpx_free(oci->above_context);
vpx_free(oci->mip);
@@ -97,6 +98,7 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
return 1;
}
+#if CONFIG_POSTPROC
if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
{
vp8_de_alloc_frame_buffers(oci);
@@ -104,6 +106,9 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
}
oci->post_proc_buffer_int_used = 0;
+ vpx_memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
+ vpx_memset((&oci->post_proc_buffer)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size);
+#endif
oci->mb_rows = height >> 4;
oci->mb_cols = width >> 4;
diff --git a/vp8/common/asm_com_offsets.c b/vp8/common/asm_com_offsets.c
index 5cf151980..ae22b5f6b 100644
--- a/vp8/common/asm_com_offsets.c
+++ b/vp8/common/asm_com_offsets.c
@@ -15,6 +15,10 @@
#include "vpx_scale/yv12config.h"
#include "vp8/common/blockd.h"
+#if CONFIG_POSTPROC
+#include "postproc.h"
+#endif /* CONFIG_POSTPROC */
+
BEGIN
/* vpx_scale */
@@ -30,6 +34,11 @@ DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_b
DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border));
DEFINE(VP8BORDERINPIXELS_VAL, VP8BORDERINPIXELS);
+#if CONFIG_POSTPROC
+/* mfqe.c / filter_by_weight */
+DEFINE(MFQE_PRECISION_VAL, MFQE_PRECISION);
+#endif /* CONFIG_POSTPROC */
+
END
/* add asserts for any offset that is not supported by assembly code */
@@ -53,3 +62,10 @@ ct_assert(B_HU_PRED, B_HU_PRED == 9);
/* vp8_yv12_extend_frame_borders_neon makes several assumptions based on this */
ct_assert(VP8BORDERINPIXELS_VAL, VP8BORDERINPIXELS == 32)
#endif
+
+#if HAVE_SSE2
+#if CONFIG_POSTPROC
+/* vp8_filter_by_weight16x16 and 8x8 */
+ct_assert(MFQE_PRECISION_VAL, MFQE_PRECISION == 4)
+#endif /* CONFIG_POSTPROC */
+#endif /* HAVE_SSE2 */
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index c8d1bab7d..692f0ebd2 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -215,6 +215,12 @@ typedef struct macroblockd
MODE_INFO *mode_info_context;
int mode_info_stride;
+#if CONFIG_TEMPORAL_DENOISING
+ MB_PREDICTION_MODE best_sse_inter_mode;
+ int_mv best_sse_mv;
+ unsigned char need_to_clamp_best_mvs;
+#endif
+
FRAME_TYPE frame_type;
int up_available;
diff --git a/vp8/common/loopfilter_filters.c b/vp8/common/loopfilter_filters.c
index 60a7ff262..8235f6e9f 100644
--- a/vp8/common/loopfilter_filters.c
+++ b/vp8/common/loopfilter_filters.c
@@ -15,7 +15,7 @@
typedef unsigned char uc;
-static __inline signed char vp8_signed_char_clamp(int t)
+static signed char vp8_signed_char_clamp(int t)
{
t = (t < -128 ? -128 : t);
t = (t > 127 ? 127 : t);
@@ -24,9 +24,9 @@ static __inline signed char vp8_signed_char_clamp(int t)
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
-static __inline signed char vp8_filter_mask(uc limit, uc blimit,
- uc p3, uc p2, uc p1, uc p0,
- uc q0, uc q1, uc q2, uc q3)
+static signed char vp8_filter_mask(uc limit, uc blimit,
+ uc p3, uc p2, uc p1, uc p0,
+ uc q0, uc q1, uc q2, uc q3)
{
signed char mask = 0;
mask |= (abs(p3 - p2) > limit);
@@ -40,7 +40,7 @@ static __inline signed char vp8_filter_mask(uc limit, uc blimit,
}
/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
-static __inline signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1)
+static signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1)
{
signed char hev = 0;
hev |= (abs(p1 - p0) > thresh) * -1;
@@ -48,7 +48,7 @@ static __inline signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1)
return hev;
}
-static __inline void vp8_filter(signed char mask, uc hev, uc *op1,
+static void vp8_filter(signed char mask, uc hev, uc *op1,
uc *op0, uc *oq0, uc *oq1)
{
@@ -158,7 +158,7 @@ void vp8_loop_filter_vertical_edge_c
while (++i < count * 8);
}
-static __inline void vp8_mbfilter(signed char mask, uc hev,
+static void vp8_mbfilter(signed char mask, uc hev,
uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
{
signed char s, u;
@@ -279,7 +279,7 @@ void vp8_mbloop_filter_vertical_edge_c
}
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
-static __inline signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1)
+static signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1)
{
/* Why does this cause problems for win32?
* error C2143: syntax error : missing ';' before 'type'
@@ -289,7 +289,7 @@ static __inline signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q
return mask;
}
-static __inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
+static void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
{
signed char vp8_filter, Filter1, Filter2;
signed char p1 = (signed char) * op1 ^ 0x80;
diff --git a/vp8/common/mfqe.c b/vp8/common/mfqe.c
new file mode 100644
index 000000000..84e336915
--- /dev/null
+++ b/vp8/common/mfqe.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/* MFQE: Multiframe Quality Enhancement
+ * In rate limited situations keyframes may cause significant visual artifacts
+ * commonly referred to as "popping." This file implements a postproccesing
+ * algorithm which blends data from the preceeding frame when there is no
+ * motion and the q from the previous frame is lower which indicates that it is
+ * higher quality.
+ */
+
+#include "postproc.h"
+#include "variance.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vpx_rtcd.h"
+#include "vpx_scale/yv12config.h"
+
+#include <limits.h>
+#include <stdlib.h>
+
+
+static void filter_by_weight(unsigned char *src, int src_stride,
+ unsigned char *dst, int dst_stride,
+ int block_size, int src_weight)
+{
+ int dst_weight = (1 << MFQE_PRECISION) - src_weight;
+ int rounding_bit = 1 << (MFQE_PRECISION - 1);
+ int r, c;
+
+ for (r = 0; r < block_size; r++)
+ {
+ for (c = 0; c < block_size; c++)
+ {
+ dst[c] = (src[c] * src_weight +
+ dst[c] * dst_weight +
+ rounding_bit) >> MFQE_PRECISION;
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+void vp8_filter_by_weight16x16_c(unsigned char *src, int src_stride,
+ unsigned char *dst, int dst_stride,
+ int src_weight)
+{
+ filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight);
+}
+
+void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride,
+ unsigned char *dst, int dst_stride,
+ int src_weight)
+{
+ filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight);
+}
+
+void vp8_filter_by_weight4x4_c(unsigned char *src, int src_stride,
+ unsigned char *dst, int dst_stride,
+ int src_weight)
+{
+ filter_by_weight(src, src_stride, dst, dst_stride, 4, src_weight);
+}
+
+static void apply_ifactor(unsigned char *y_src,
+ int y_src_stride,
+ unsigned char *y_dst,
+ int y_dst_stride,
+ unsigned char *u_src,
+ unsigned char *v_src,
+ int uv_src_stride,
+ unsigned char *u_dst,
+ unsigned char *v_dst,
+ int uv_dst_stride,
+ int block_size,
+ int src_weight)
+{
+ if (block_size == 16)
+ {
+ vp8_filter_by_weight16x16(y_src, y_src_stride, y_dst, y_dst_stride, src_weight);
+ vp8_filter_by_weight8x8(u_src, uv_src_stride, u_dst, uv_dst_stride, src_weight);
+ vp8_filter_by_weight8x8(v_src, uv_src_stride, v_dst, uv_dst_stride, src_weight);
+ }
+ else /* if (block_size == 8) */
+ {
+ vp8_filter_by_weight8x8(y_src, y_src_stride, y_dst, y_dst_stride, src_weight);
+ vp8_filter_by_weight4x4(u_src, uv_src_stride, u_dst, uv_dst_stride, src_weight);
+ vp8_filter_by_weight4x4(v_src, uv_src_stride, v_dst, uv_dst_stride, src_weight);
+ }
+}
+
+static void multiframe_quality_enhance_block
+(
+ int blksize, /* Currently only values supported are 16 and 8 */
+ int qcurr,
+ int qprev,
+ unsigned char *y,
+ unsigned char *u,
+ unsigned char *v,
+ int y_stride,
+ int uv_stride,
+ unsigned char *yd,
+ unsigned char *ud,
+ unsigned char *vd,
+ int yd_stride,
+ int uvd_stride
+)
+{
+ static const unsigned char VP8_ZEROS[16]=
+ {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+ };
+
+ int uvblksize = blksize >> 1;
+ int qdiff = qcurr - qprev;
+
+ int i;
+ unsigned char *up;
+ unsigned char *udp;
+ unsigned char *vp;
+ unsigned char *vdp;
+
+ unsigned int act, sad, thr, sse;
+
+ if (blksize == 16)
+ {
+ act = (vp8_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
+ sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, INT_MAX)+128)>>8;
+ }
+ else /* if (blksize == 8) */
+ {
+ act = (vp8_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
+ sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, INT_MAX)+32)>>6;
+ }
+
+ /* thr = qdiff/8 + log2(act) + log4(qprev) */
+ thr = (qdiff>>3);
+ while (act>>=1) thr++;
+ while (qprev>>=2) thr++;
+
+ if (sad < thr)
+ {
+ int ifactor = (sad << MFQE_PRECISION) / thr;
+ ifactor >>= (qdiff >> 5);
+
+ if (ifactor)
+ {
+ apply_ifactor(y, y_stride, yd, yd_stride,
+ u, v, uv_stride,
+ ud, vd, uvd_stride,
+ blksize, ifactor);
+ }
+ /* else implicitly copy from previous frame */
+ }
+ else
+ {
+ if (blksize == 16)
+ {
+ vp8_copy_mem16x16(y, y_stride, yd, yd_stride);
+ vp8_copy_mem8x8(u, uv_stride, ud, uvd_stride);
+ vp8_copy_mem8x8(v, uv_stride, vd, uvd_stride);
+ }
+ else /* if (blksize == 8) */
+ {
+ vp8_copy_mem8x8(y, y_stride, yd, yd_stride);
+ for (up = u, udp = ud, i = 0; i < uvblksize; ++i, up += uv_stride, udp += uvd_stride)
+ vpx_memcpy(udp, up, uvblksize);
+ for (vp = v, vdp = vd, i = 0; i < uvblksize; ++i, vp += uv_stride, vdp += uvd_stride)
+ vpx_memcpy(vdp, vp, uvblksize);
+ }
+ }
+}
+
+void vp8_multiframe_quality_enhance
+(
+ VP8_COMMON *cm
+)
+{
+ YV12_BUFFER_CONFIG *show = cm->frame_to_show;
+ YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;
+
+ FRAME_TYPE frame_type = cm->frame_type;
+ /* Point at base of Mb MODE_INFO list has motion vectors etc */
+ const MODE_INFO *mode_info_context = cm->mi;
+ int mb_row;
+ int mb_col;
+ int qcurr = cm->base_qindex;
+ int qprev = cm->postproc_state.last_base_qindex;
+
+ unsigned char *y_ptr, *u_ptr, *v_ptr;
+ unsigned char *yd_ptr, *ud_ptr, *vd_ptr;
+
+ /* Set up the buffer pointers */
+ y_ptr = show->y_buffer;
+ u_ptr = show->u_buffer;
+ v_ptr = show->v_buffer;
+ yd_ptr = dest->y_buffer;
+ ud_ptr = dest->u_buffer;
+ vd_ptr = dest->v_buffer;
+
+ /* postprocess each macro block */
+ for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
+ {
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
+ {
+ /* if motion is high there will likely be no benefit */
+ if (((frame_type == INTER_FRAME &&
+ abs(mode_info_context->mbmi.mv.as_mv.row) <= 10 &&
+ abs(mode_info_context->mbmi.mv.as_mv.col) <= 10) ||
+ (frame_type == KEY_FRAME)))
+ {
+ if (mode_info_context->mbmi.mode == B_PRED || mode_info_context->mbmi.mode == SPLITMV)
+ {
+ int i, j;
+ for (i=0; i<2; ++i)
+ for (j=0; j<2; ++j)
+ multiframe_quality_enhance_block(8, qcurr, qprev,
+ y_ptr + 8*(i*show->y_stride+j),
+ u_ptr + 4*(i*show->uv_stride+j),
+ v_ptr + 4*(i*show->uv_stride+j),
+ show->y_stride,
+ show->uv_stride,
+ yd_ptr + 8*(i*dest->y_stride+j),
+ ud_ptr + 4*(i*dest->uv_stride+j),
+ vd_ptr + 4*(i*dest->uv_stride+j),
+ dest->y_stride,
+ dest->uv_stride);
+ }
+ else
+ {
+ multiframe_quality_enhance_block(16, qcurr, qprev, y_ptr,
+ u_ptr, v_ptr,
+ show->y_stride,
+ show->uv_stride,
+ yd_ptr, ud_ptr, vd_ptr,
+ dest->y_stride,
+ dest->uv_stride);
+ }
+ }
+ else
+ {
+ vp8_copy_mem16x16(y_ptr, show->y_stride, yd_ptr, dest->y_stride);
+ vp8_copy_mem8x8(u_ptr, show->uv_stride, ud_ptr, dest->uv_stride);
+ vp8_copy_mem8x8(v_ptr, show->uv_stride, vd_ptr, dest->uv_stride);
+ }
+ y_ptr += 16;
+ u_ptr += 8;
+ v_ptr += 8;
+ yd_ptr += 16;
+ ud_ptr += 8;
+ vd_ptr += 8;
+ mode_info_context++; /* step to next MB */
+ }
+
+ y_ptr += show->y_stride * 16 - 16 * cm->mb_cols;
+ u_ptr += show->uv_stride * 8 - 8 * cm->mb_cols;
+ v_ptr += show->uv_stride * 8 - 8 * cm->mb_cols;
+ yd_ptr += dest->y_stride * 16 - 16 * cm->mb_cols;
+ ud_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols;
+ vd_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols;
+
+ mode_info_context++; /* Skip border mb */
+ }
+}
diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h
index eb7d5458d..4c39b49f0 100644
--- a/vp8/common/onyx.h
+++ b/vp8/common/onyx.h
@@ -72,7 +72,7 @@ extern "C"
#include <assert.h>
- static __inline void Scale2Ratio(int mode, int *hr, int *hs)
+ static void Scale2Ratio(int mode, int *hr, int *hs)
{
switch (mode)
{
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index 84cf3b340..7743ed5c5 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -92,11 +92,13 @@ typedef struct VP8Common
int fb_idx_ref_cnt[NUM_YV12_BUFFERS];
int new_fb_idx, lst_fb_idx, gld_fb_idx, alt_fb_idx;
- YV12_BUFFER_CONFIG post_proc_buffer;
YV12_BUFFER_CONFIG temp_scale_frame;
+#if CONFIG_POSTPROC
+ YV12_BUFFER_CONFIG post_proc_buffer;
YV12_BUFFER_CONFIG post_proc_buffer_int;
int post_proc_buffer_int_used;
+#endif
FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */
FRAME_TYPE frame_type;
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 280ce0294..50ed54309 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -17,7 +17,6 @@
#include "vpx_scale/yv12extend.h"
#include "vpx_scale/vpxscale.h"
#include "systemdependent.h"
-#include "variance.h"
#include <limits.h>
#include <math.h>
@@ -30,7 +29,6 @@
( (0.439*(float)(t>>16)) - (0.368*(float)(t>>8&0xff)) - (0.071*(float)(t&0xff)) + 128)
/* global constants */
-#define MFQE_PRECISION 4
#if CONFIG_POSTPROC_VISUALIZER
static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] =
{
@@ -362,6 +360,7 @@ void vp8_deblock(YV12_BUFFER_CONFIG *source,
vp8_post_proc_down_and_across(source->v_buffer, post->v_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl);
}
+#if !(CONFIG_TEMPORAL_DENOISING)
void vp8_de_noise(YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *post,
int q,
@@ -398,6 +397,7 @@ void vp8_de_noise(YV12_BUFFER_CONFIG *source,
source->uv_width - 4, ppl);
}
+#endif
double vp8_gaussian(double sigma, double mu, double x)
{
@@ -693,214 +693,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
}
}
-
-static void multiframe_quality_enhance_block
-(
- int blksize, /* Currently only values supported are 16, 8, 4 */
- int qcurr,
- int qprev,
- unsigned char *y,
- unsigned char *u,
- unsigned char *v,
- int y_stride,
- int uv_stride,
- unsigned char *yd,
- unsigned char *ud,
- unsigned char *vd,
- int yd_stride,
- int uvd_stride
-)
-{
- static const unsigned char VP8_ZEROS[16]=
- {
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
- };
- int blksizeby2 = blksize >> 1;
- int qdiff = qcurr - qprev;
-
- int i, j;
- unsigned char *yp;
- unsigned char *ydp;
- unsigned char *up;
- unsigned char *udp;
- unsigned char *vp;
- unsigned char *vdp;
-
- unsigned int act, sse, sad, thr;
- if (blksize == 16)
- {
- act = (vp8_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
- sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, INT_MAX)+128)>>8;
- }
- else if (blksize == 8)
- {
- act = (vp8_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
- sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, INT_MAX)+32)>>6;
- }
- else
- {
- act = (vp8_variance4x4(yd, yd_stride, VP8_ZEROS, 0, &sse)+8)>>4;
- sad = (vp8_sad4x4(y, y_stride, yd, yd_stride, INT_MAX)+8)>>4;
- }
- /* thr = qdiff/8 + log2(act) + log4(qprev) */
- thr = (qdiff>>3);
- while (act>>=1) thr++;
- while (qprev>>=2) thr++;
- if (sad < thr)
- {
- static const int roundoff = (1 << (MFQE_PRECISION - 1));
- int ifactor = (sad << MFQE_PRECISION) / thr;
- ifactor >>= (qdiff >> 5);
- // TODO: SIMD optimize this section
- if (ifactor)
- {
- int icfactor = (1 << MFQE_PRECISION) - ifactor;
- for (yp = y, ydp = yd, i = 0; i < blksize; ++i, yp += y_stride, ydp += yd_stride)
- {
- for (j = 0; j < blksize; ++j)
- ydp[j] = (int)((yp[j] * ifactor + ydp[j] * icfactor + roundoff) >> MFQE_PRECISION);
- }
- for (up = u, udp = ud, i = 0; i < blksizeby2; ++i, up += uv_stride, udp += uvd_stride)
- {
- for (j = 0; j < blksizeby2; ++j)
- udp[j] = (int)((up[j] * ifactor + udp[j] * icfactor + roundoff) >> MFQE_PRECISION);
- }
- for (vp = v, vdp = vd, i = 0; i < blksizeby2; ++i, vp += uv_stride, vdp += uvd_stride)
- {
- for (j = 0; j < blksizeby2; ++j)
- vdp[j] = (int)((vp[j] * ifactor + vdp[j] * icfactor + roundoff) >> MFQE_PRECISION);
- }
- }
- }
- else
- {
- if (blksize == 16)
- {
- vp8_copy_mem16x16(y, y_stride, yd, yd_stride);
- vp8_copy_mem8x8(u, uv_stride, ud, uvd_stride);
- vp8_copy_mem8x8(v, uv_stride, vd, uvd_stride);
- }
- else if (blksize == 8)
- {
- vp8_copy_mem8x8(y, y_stride, yd, yd_stride);
- for (up = u, udp = ud, i = 0; i < blksizeby2; ++i, up += uv_stride, udp += uvd_stride)
- vpx_memcpy(udp, up, blksizeby2);
- for (vp = v, vdp = vd, i = 0; i < blksizeby2; ++i, vp += uv_stride, vdp += uvd_stride)
- vpx_memcpy(vdp, vp, blksizeby2);
- }
- else
- {
- for (yp = y, ydp = yd, i = 0; i < blksize; ++i, yp += y_stride, ydp += yd_stride)
- vpx_memcpy(ydp, yp, blksize);
- for (up = u, udp = ud, i = 0; i < blksizeby2; ++i, up += uv_stride, udp += uvd_stride)
- vpx_memcpy(udp, up, blksizeby2);
- for (vp = v, vdp = vd, i = 0; i < blksizeby2; ++i, vp += uv_stride, vdp += uvd_stride)
- vpx_memcpy(vdp, vp, blksizeby2);
- }
- }
-}
-
-void vp8_multiframe_quality_enhance
-(
- VP8_COMMON *cm
-)
-{
- YV12_BUFFER_CONFIG *show = cm->frame_to_show;
- YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;
-
- FRAME_TYPE frame_type = cm->frame_type;
- /* Point at base of Mb MODE_INFO list has motion vectors etc */
- const MODE_INFO *mode_info_context = cm->mi;
- int mb_row;
- int mb_col;
- int qcurr = cm->base_qindex;
- int qprev = cm->postproc_state.last_base_qindex;
-
- unsigned char *y_ptr, *u_ptr, *v_ptr;
- unsigned char *yd_ptr, *ud_ptr, *vd_ptr;
-
- /* Set up the buffer pointers */
- y_ptr = show->y_buffer;
- u_ptr = show->u_buffer;
- v_ptr = show->v_buffer;
- yd_ptr = dest->y_buffer;
- ud_ptr = dest->u_buffer;
- vd_ptr = dest->v_buffer;
-
- /* postprocess each macro block */
- for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
- {
- for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
- {
- /* if motion is high there will likely be no benefit */
- if (((frame_type == INTER_FRAME &&
- abs(mode_info_context->mbmi.mv.as_mv.row) <= 10 &&
- abs(mode_info_context->mbmi.mv.as_mv.col) <= 10) ||
- (frame_type == KEY_FRAME)))
- {
- if (mode_info_context->mbmi.mode == B_PRED || mode_info_context->mbmi.mode == SPLITMV)
- {
- int i, j;
- for (i=0; i<2; ++i)
- for (j=0; j<2; ++j)
- multiframe_quality_enhance_block(8,
- qcurr,
- qprev,
- y_ptr + 8*(i*show->y_stride+j),
- u_ptr + 4*(i*show->uv_stride+j),
- v_ptr + 4*(i*show->uv_stride+j),
- show->y_stride,
- show->uv_stride,
- yd_ptr + 8*(i*dest->y_stride+j),
- ud_ptr + 4*(i*dest->uv_stride+j),
- vd_ptr + 4*(i*dest->uv_stride+j),
- dest->y_stride,
- dest->uv_stride);
- }
- else
- {
- multiframe_quality_enhance_block(16,
- qcurr,
- qprev,
- y_ptr,
- u_ptr,
- v_ptr,
- show->y_stride,
- show->uv_stride,
- yd_ptr,
- ud_ptr,
- vd_ptr,
- dest->y_stride,
- dest->uv_stride);
-
- }
- }
- else
- {
- vp8_copy_mem16x16(y_ptr, show->y_stride, yd_ptr, dest->y_stride);
- vp8_copy_mem8x8(u_ptr, show->uv_stride, ud_ptr, dest->uv_stride);
- vp8_copy_mem8x8(v_ptr, show->uv_stride, vd_ptr, dest->uv_stride);
- }
- y_ptr += 16;
- u_ptr += 8;
- v_ptr += 8;
- yd_ptr += 16;
- ud_ptr += 8;
- vd_ptr += 8;
- mode_info_context++; /* step to next MB */
- }
-
- y_ptr += show->y_stride * 16 - 16 * cm->mb_cols;
- u_ptr += show->uv_stride * 8 - 8 * cm->mb_cols;
- v_ptr += show->uv_stride * 8 - 8 * cm->mb_cols;
- yd_ptr += dest->y_stride * 16 - 16 * cm->mb_cols;
- ud_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols;
- vd_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols;
-
- mode_info_context++; /* Skip border mb */
- }
-}
-
+#if CONFIG_POSTPROC
int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *ppflags)
{
int q = oci->filter_level * 10 / 6;
@@ -923,6 +716,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
dest->y_height = oci->Height;
dest->uv_height = dest->y_height / 2;
oci->postproc_state.last_base_qindex = oci->base_qindex;
+ oci->postproc_state.last_frame_valid = 1;
return 0;
}
@@ -943,7 +737,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
// insure that postproc is set to all 0's so that post proc
// doesn't pull random data in from edge
- vpx_memset((&oci->post_proc_buffer_int)->buffer_alloc,126,(&oci->post_proc_buffer)->frame_size);
+ vpx_memset((&oci->post_proc_buffer_int)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size);
}
}
@@ -953,6 +747,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
#endif
if ((flags & VP8D_MFQE) &&
+ oci->postproc_state.last_frame_valid &&
oci->current_video_frame >= 2 &&
oci->base_qindex - oci->postproc_state.last_base_qindex >= 10)
{
@@ -992,6 +787,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
vp8_yv12_copy_frame_ptr(oci->frame_to_show, &oci->post_proc_buffer);
oci->postproc_state.last_base_qindex = oci->base_qindex;
}
+ oci->postproc_state.last_frame_valid = 1;
if (flags & VP8D_ADDNOISE)
{
@@ -1378,3 +1174,4 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
dest->uv_height = dest->y_height / 2;
return 0;
}
+#endif
diff --git a/vp8/common/postproc.h b/vp8/common/postproc.h
index 1db74379f..6ac788cbd 100644
--- a/vp8/common/postproc.h
+++ b/vp8/common/postproc.h
@@ -19,6 +19,7 @@ struct postproc_state
int last_noise;
char noise[3072];
int last_base_qindex;
+ int last_frame_valid;
DECLARE_ALIGNED(16, char, blackclamp[16]);
DECLARE_ALIGNED(16, char, whiteclamp[16]);
DECLARE_ALIGNED(16, char, bothclamp[16]);
@@ -40,4 +41,8 @@ void vp8_deblock(YV12_BUFFER_CONFIG *source,
int q,
int low_var_thresh,
int flag);
+
+#define MFQE_PRECISION 4
+
+void vp8_multiframe_quality_enhance(struct VP8Common *cm);
#endif
diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh
index ff8e30c3f..0fdb4fa00 100644
--- a/vp8/common/rtcd_defs.sh
+++ b/vp8/common/rtcd_defs.sh
@@ -166,6 +166,15 @@ if [ "$CONFIG_POSTPROC" = "yes" ]; then
prototype void vp8_blend_b "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"
# no asm yet
+
+ prototype void vp8_filter_by_weight16x16 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"
+ specialize vp8_filter_by_weight16x16 sse2
+
+ prototype void vp8_filter_by_weight8x8 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"
+ specialize vp8_filter_by_weight8x8 sse2
+
+ prototype void vp8_filter_by_weight4x4 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"
+ # no asm yet
fi
#
diff --git a/vp8/common/sad_c.c b/vp8/common/sad_c.c
index f745bbd3d..6a3e889b1 100644
--- a/vp8/common/sad_c.c
+++ b/vp8/common/sad_c.c
@@ -13,7 +13,7 @@
#include "vpx_config.h"
#include "vpx/vpx_integer.h"
-static __inline
+static
unsigned int sad_mx_n_c(
const unsigned char *src_ptr,
int src_stride,
diff --git a/vp8/common/x86/mfqe_sse2.asm b/vp8/common/x86/mfqe_sse2.asm
new file mode 100644
index 000000000..10d21f320
--- /dev/null
+++ b/vp8/common/x86/mfqe_sse2.asm
@@ -0,0 +1,281 @@
+;
+; Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void vp8_filter_by_weight16x16_sse2
+;(
+; unsigned char *src,
+; int src_stride,
+; unsigned char *dst,
+; int dst_stride,
+; int src_weight
+;)
+global sym(vp8_filter_by_weight16x16_sse2)
+sym(vp8_filter_by_weight16x16_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ SAVE_XMM 6
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ movd xmm0, arg(4) ; src_weight
+ pshuflw xmm0, xmm0, 0x0 ; replicate to all low words
+ punpcklqdq xmm0, xmm0 ; replicate to all hi words
+
+ movdqa xmm1, [GLOBAL(tMFQE)]
+ psubw xmm1, xmm0 ; dst_weight
+
+ mov rax, arg(0) ; src
+ mov rsi, arg(1) ; src_stride
+ mov rdx, arg(2) ; dst
+ mov rdi, arg(3) ; dst_stride
+
+ mov rcx, 16 ; loop count
+ pxor xmm6, xmm6
+
+.combine
+ movdqa xmm2, [rax]
+ movdqa xmm4, [rdx]
+ add rax, rsi
+
+ ; src * src_weight
+ movdqa xmm3, xmm2
+ punpcklbw xmm2, xmm6
+ punpckhbw xmm3, xmm6
+ pmullw xmm2, xmm0
+ pmullw xmm3, xmm0
+
+ ; dst * dst_weight
+ movdqa xmm5, xmm4
+ punpcklbw xmm4, xmm6
+ punpckhbw xmm5, xmm6
+ pmullw xmm4, xmm1
+ pmullw xmm5, xmm1
+
+ ; sum, round and shift
+ paddw xmm2, xmm4
+ paddw xmm3, xmm5
+ paddw xmm2, [GLOBAL(tMFQE_round)]
+ paddw xmm3, [GLOBAL(tMFQE_round)]
+ psrlw xmm2, 4
+ psrlw xmm3, 4
+
+ packuswb xmm2, xmm3
+ movdqa [rdx], xmm2
+ add rdx, rdi
+
+ dec rcx
+ jnz .combine
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+
+ ret
+
+;void vp8_filter_by_weight8x8_sse2
+;(
+; unsigned char *src,
+; int src_stride,
+; unsigned char *dst,
+; int dst_stride,
+; int src_weight
+;)
+global sym(vp8_filter_by_weight8x8_sse2)
+sym(vp8_filter_by_weight8x8_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ movd xmm0, arg(4) ; src_weight
+ pshuflw xmm0, xmm0, 0x0 ; replicate to all low words
+ punpcklqdq xmm0, xmm0 ; replicate to all hi words
+
+ movdqa xmm1, [GLOBAL(tMFQE)]
+ psubw xmm1, xmm0 ; dst_weight
+
+ mov rax, arg(0) ; src
+ mov rsi, arg(1) ; src_stride
+ mov rdx, arg(2) ; dst
+ mov rdi, arg(3) ; dst_stride
+
+ mov rcx, 8 ; loop count
+ pxor xmm4, xmm4
+
+.combine
+ movq xmm2, [rax]
+ movq xmm3, [rdx]
+ add rax, rsi
+
+ ; src * src_weight
+ punpcklbw xmm2, xmm4
+ pmullw xmm2, xmm0
+
+ ; dst * dst_weight
+ punpcklbw xmm3, xmm4
+ pmullw xmm3, xmm1
+
+ ; sum, round and shift
+ paddw xmm2, xmm3
+ paddw xmm2, [GLOBAL(tMFQE_round)]
+ psrlw xmm2, 4
+
+ packuswb xmm2, xmm4
+ movq [rdx], xmm2
+ add rdx, rdi
+
+ dec rcx
+ jnz .combine
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+
+ ret
+
+;void vp8_variance_and_sad_16x16_sse2 | arg
+;(
+; unsigned char *src1, 0
+; int stride1, 1
+; unsigned char *src2, 2
+; int stride2, 3
+; unsigned int *variance, 4
+; unsigned int *sad, 5
+;)
+global sym(vp8_variance_and_sad_16x16_sse2)
+sym(vp8_variance_and_sad_16x16_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rax, arg(0) ; src1
+ mov rcx, arg(1) ; stride1
+ mov rdx, arg(2) ; src2
+ mov rdi, arg(3) ; stride2
+
+ mov rsi, 16 ; block height
+
+ ; Prep accumulator registers
+ pxor xmm3, xmm3 ; SAD
+ pxor xmm4, xmm4 ; sum of src2
+ pxor xmm5, xmm5 ; sum of src2^2
+
+ ; Because we're working with the actual output frames
+ ; we can't depend on any kind of data alignment.
+.accumulate
+ movdqa xmm0, [rax] ; src1
+ movdqa xmm1, [rdx] ; src2
+ add rax, rcx ; src1 + stride1
+ add rdx, rdi ; src2 + stride2
+
+ ; SAD(src1, src2)
+ psadbw xmm0, xmm1
+ paddusw xmm3, xmm0
+
+ ; SUM(src2)
+ pxor xmm2, xmm2
+ psadbw xmm2, xmm1 ; sum src2 by misusing SAD against 0
+ paddusw xmm4, xmm2
+
+ ; pmaddubsw would be ideal if it took two unsigned values. instead,
+ ; it expects a signed and an unsigned value. so instead we zero extend
+ ; and operate on words.
+ pxor xmm2, xmm2
+ movdqa xmm0, xmm1
+ punpcklbw xmm0, xmm2
+ punpckhbw xmm1, xmm2
+ pmaddwd xmm0, xmm0
+ pmaddwd xmm1, xmm1
+ paddd xmm5, xmm0
+ paddd xmm5, xmm1
+
+ sub rsi, 1
+ jnz .accumulate
+
+ ; phaddd only operates on adjacent double words.
+ ; Finalize SAD and store
+ movdqa xmm0, xmm3
+ psrldq xmm0, 8
+ paddusw xmm0, xmm3
+ paddd xmm0, [GLOBAL(t128)]
+ psrld xmm0, 8
+
+ mov rax, arg(5)
+ movd [rax], xmm0
+
+ ; Accumulate sum of src2
+ movdqa xmm0, xmm4
+ psrldq xmm0, 8
+ paddusw xmm0, xmm4
+ ; Square src2. Ignore high value
+ pmuludq xmm0, xmm0
+ psrld xmm0, 8
+
+ ; phaddw could be used to sum adjacent values but we want
+ ; all the values summed. promote to doubles, accumulate,
+ ; shift and sum
+ pxor xmm2, xmm2
+ movdqa xmm1, xmm5
+ punpckldq xmm1, xmm2
+ punpckhdq xmm5, xmm2
+ paddd xmm1, xmm5
+ movdqa xmm2, xmm1
+ psrldq xmm1, 8
+ paddd xmm1, xmm2
+
+ psubd xmm1, xmm0
+
+ ; (variance + 128) >> 8
+ paddd xmm1, [GLOBAL(t128)]
+ psrld xmm1, 8
+ mov rax, arg(4)
+
+ movd [rax], xmm1
+
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+t128:
+ ddq 128
+align 16
+tMFQE: ; 1 << MFQE_PRECISION
+ times 8 dw 0x10
+align 16
+tMFQE_round: ; 1 << (MFQE_PRECISION - 1)
+ times 8 dw 0x08
+
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index ba94c58bb..c5752ee0b 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -15,58 +15,6 @@
#include "vpx_ports/mem.h"
#include "detokenize.h"
-#define BOOL_DATA unsigned char
-
-#define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
-DECLARE_ALIGNED(16, static const unsigned char, coef_bands_x[16]) =
-{
- 0 * OCB_X, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X,
- 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X,
- 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X,
- 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X
-};
-#define EOB_CONTEXT_NODE 0
-#define ZERO_CONTEXT_NODE 1
-#define ONE_CONTEXT_NODE 2
-#define LOW_VAL_CONTEXT_NODE 3
-#define TWO_CONTEXT_NODE 4
-#define THREE_CONTEXT_NODE 5
-#define HIGH_LOW_CONTEXT_NODE 6
-#define CAT_ONE_CONTEXT_NODE 7
-#define CAT_THREEFOUR_CONTEXT_NODE 8
-#define CAT_THREE_CONTEXT_NODE 9
-#define CAT_FIVE_CONTEXT_NODE 10
-
-#define CAT1_MIN_VAL 5
-#define CAT2_MIN_VAL 7
-#define CAT3_MIN_VAL 11
-#define CAT4_MIN_VAL 19
-#define CAT5_MIN_VAL 35
-#define CAT6_MIN_VAL 67
-
-#define CAT1_PROB0 159
-#define CAT2_PROB0 145
-#define CAT2_PROB1 165
-
-#define CAT3_PROB0 140
-#define CAT3_PROB1 148
-#define CAT3_PROB2 173
-
-#define CAT4_PROB0 135
-#define CAT4_PROB1 140
-#define CAT4_PROB2 155
-#define CAT4_PROB3 176
-
-#define CAT5_PROB0 130
-#define CAT5_PROB1 134
-#define CAT5_PROB2 141
-#define CAT5_PROB3 157
-#define CAT5_PROB4 180
-
-static const unsigned char cat6_prob[12] =
-{ 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0 };
-
-
void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
{
/* Clear entropy contexts for Y2 blocks */
@@ -83,302 +31,216 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
}
}
-DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]);
-#define FILL \
- if(count < 0) \
- VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
+/*
+ ------------------------------------------------------------------------------
+ Residual decoding (Paragraph 13.2 / 13.3)
+*/
+static const uint8_t kBands[16 + 1] = {
+ 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+ 0 /* extra entry as sentinel */
+};
-#define NORMALIZE \
- /*if(range < 0x80)*/ \
- { \
- shift = vp8_norm[range]; \
- range <<= shift; \
- value <<= shift; \
- count -= shift; \
- }
+static const uint8_t kCat3[] = { 173, 148, 140, 0 };
+static const uint8_t kCat4[] = { 176, 155, 140, 135, 0 };
+static const uint8_t kCat5[] = { 180, 157, 141, 134, 130, 0 };
+static const uint8_t kCat6[] =
+ { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 };
+static const uint8_t* const kCat3456[] = { kCat3, kCat4, kCat5, kCat6 };
+static const uint8_t kZigzag[16] = {
+ 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+};
-#define DECODE_AND_APPLYSIGN(value_to_sign) \
- split = (range + 1) >> 1; \
- bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
- FILL \
- if ( value < bigsplit ) \
- { \
- range = split; \
- v= value_to_sign; \
- } \
- else \
- { \
- range = range-split; \
- value = value-bigsplit; \
- v = -value_to_sign; \
- } \
- range +=range; \
- value +=value; \
- count--;
+#define VP8GetBit vp8dx_decode_bool
+#define NUM_PROBAS 11
+#define NUM_CTX 3
-#define DECODE_AND_BRANCH_IF_ZERO(probability,branch) \
- { \
- split = 1 + ((( probability*(range-1) ) )>> 8); \
- bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
- FILL \
- if ( value < bigsplit ) \
- { \
- range = split; \
- NORMALIZE \
- goto branch; \
- } \
- value -= bigsplit; \
- range = range - split; \
- NORMALIZE \
- }
+typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS]; // for const-casting
-#define DECODE_AND_LOOP_IF_ZERO(probability,branch) \
- { \
- split = 1 + ((( probability*(range-1) ) ) >> 8); \
- bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
- FILL \
- if ( value < bigsplit ) \
- { \
- range = split; \
- NORMALIZE \
- Prob = coef_probs; \
- if(c<15) {\
- ++c; \
- Prob += coef_bands_x[c]; \
- goto branch; \
- } goto BLOCK_FINISHED; /*for malformed input */\
- } \
- value -= bigsplit; \
- range = range - split; \
- NORMALIZE \
- }
+static int GetSigned(BOOL_DECODER *br, int value_to_sign)
+{
+ int split = (br->range + 1) >> 1;
+ VP8_BD_VALUE bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
+ int v;
-#define DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val) \
- DECODE_AND_APPLYSIGN(val) \
- Prob = coef_probs + (ENTROPY_NODES*2); \
- if(c < 15){\
- qcoeff_ptr [ scan[c] ] = (int16_t) v; \
- ++c; \
- goto DO_WHILE; }\
- qcoeff_ptr [ 15 ] = (int16_t) v; \
- goto BLOCK_FINISHED;
+ if(br->count < 0)
+ vp8dx_bool_decoder_fill(br);
+ if ( br->value < bigsplit )
+ {
+ br->range = split;
+ v= value_to_sign;
+ }
+ else
+ {
+ br->range = br->range-split;
+ br->value = br->value-bigsplit;
+ v = -value_to_sign;
+ }
+ br->range +=br->range;
+ br->value +=br->value;
+ br->count--;
-#define DECODE_EXTRABIT_AND_ADJUST_VAL(prob, bits_count)\
- split = 1 + (((range-1) * prob) >> 8); \
- bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
- FILL \
- if(value >= bigsplit)\
- {\
- range = range-split;\
- value = value-bigsplit;\
- val += ((uint16_t)1<<bits_count);\
- }\
- else\
- {\
- range = split;\
- }\
- NORMALIZE
+ return v;
+}
+/*
+ Returns the position of the last non-zero coeff plus one
+ (and 0 if there's no coeff at all)
+*/
+static int GetCoeffs(BOOL_DECODER *br, ProbaArray prob,
+ int ctx, int n, int16_t* out)
+{
+ const uint8_t* p = prob[n][ctx];
+ if (!VP8GetBit(br, p[0]))
+ { /* first EOB is more a 'CBP' bit. */
+ return 0;
+ }
+ while (1)
+ {
+ ++n;
+ if (!VP8GetBit(br, p[1]))
+ {
+ p = prob[kBands[n]][0];
+ }
+ else
+ { /* non zero coeff */
+ int v, j;
+ if (!VP8GetBit(br, p[2]))
+ {
+ p = prob[kBands[n]][1];
+ v = 1;
+ }
+ else
+ {
+ if (!VP8GetBit(br, p[3]))
+ {
+ if (!VP8GetBit(br, p[4]))
+ {
+ v = 2;
+ }
+ else
+ {
+ v = 3 + VP8GetBit(br, p[5]);
+ }
+ }
+ else
+ {
+ if (!VP8GetBit(br, p[6]))
+ {
+ if (!VP8GetBit(br, p[7]))
+ {
+ v = 5 + VP8GetBit(br, 159);
+ } else
+ {
+ v = 7 + 2 * VP8GetBit(br, 165);
+ v += VP8GetBit(br, 145);
+ }
+ }
+ else
+ {
+ const uint8_t* tab;
+ const int bit1 = VP8GetBit(br, p[8]);
+ const int bit0 = VP8GetBit(br, p[9 + bit1]);
+ const int cat = 2 * bit1 + bit0;
+ v = 0;
+ for (tab = kCat3456[cat]; *tab; ++tab)
+ {
+ v += v + VP8GetBit(br, *tab);
+ }
+ v += 3 + (8 << cat);
+ }
+ }
+ p = prob[kBands[n]][2];
+ }
+ j = kZigzag[n - 1];
+
+ out[j] = GetSigned(br, v);
+
+ if (n == 16 || !VP8GetBit(br, p[0]))
+ { /* EOB */
+ return n;
+ }
+ }
+ if (n == 16)
+ {
+ return 16;
+ }
+ }
+}
int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
{
- ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context;
- ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)x->left_context;
- const FRAME_CONTEXT * const fc = &dx->common.fc;
-
BOOL_DECODER *bc = x->current_bc;
-
+ const FRAME_CONTEXT * const fc = &dx->common.fc;
char *eobs = x->eobs;
- ENTROPY_CONTEXT *a;
- ENTROPY_CONTEXT *l;
int i;
-
+ int nonzeros;
int eobtotal = 0;
- register int count;
-
- const BOOL_DATA *bufptr;
- const BOOL_DATA *bufend;
- register unsigned int range;
- VP8_BD_VALUE value;
- const int *scan;
- register unsigned int shift;
- unsigned int split;
- VP8_BD_VALUE bigsplit;
short *qcoeff_ptr;
+ ProbaArray coef_probs;
+ ENTROPY_CONTEXT *a_ctx = ((ENTROPY_CONTEXT *)x->above_context);
+ ENTROPY_CONTEXT *l_ctx = ((ENTROPY_CONTEXT *)x->left_context);
+ ENTROPY_CONTEXT *a;
+ ENTROPY_CONTEXT *l;
+ int skip_dc = 0;
- const vp8_prob *coef_probs;
- int stop;
- int val, bits_count;
- int c;
- int v;
- const vp8_prob *Prob;
- int start_coeff;
-
-
- i = 0;
- stop = 16;
-
- scan = vp8_default_zig_zag1d;
qcoeff_ptr = &x->qcoeff[0];
- coef_probs = fc->coef_probs [3] [ 0 ] [0];
if (x->mode_info_context->mbmi.mode != B_PRED &&
x->mode_info_context->mbmi.mode != SPLITMV)
{
- i = 24;
- stop = 24;
- qcoeff_ptr += 24*16;
- eobtotal -= 16;
- coef_probs = fc->coef_probs [1] [ 0 ] [0];
- }
-
- bufend = bc->user_buffer_end;
- bufptr = bc->user_buffer;
- value = bc->value;
- count = bc->count;
- range = bc->range;
-
- start_coeff = 0;
-
-BLOCK_LOOP:
- a = A + vp8_block2above[i];
- l = L + vp8_block2left[i];
-
- c = start_coeff;
+ a = a_ctx + 8;
+ l = l_ctx + 8;
- VP8_COMBINEENTROPYCONTEXTS(v, *a, *l);
+ coef_probs = fc->coef_probs [1];
- Prob = coef_probs;
- Prob += v * ENTROPY_NODES;
- *a = *l = 0;
+ nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), 0, qcoeff_ptr + 24 * 16);
+ *a = *l = (nonzeros > 0);
-DO_WHILE:
- Prob += coef_bands_x[c];
- DECODE_AND_BRANCH_IF_ZERO(Prob[EOB_CONTEXT_NODE], BLOCK_FINISHED);
- *a = *l = 1;
+ eobs[24] = nonzeros;
+ eobtotal += nonzeros - 16;
-CHECK_0_:
- DECODE_AND_LOOP_IF_ZERO(Prob[ZERO_CONTEXT_NODE], CHECK_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[ONE_CONTEXT_NODE], ONE_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[LOW_VAL_CONTEXT_NODE],
- LOW_VAL_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[HIGH_LOW_CONTEXT_NODE],
- HIGH_LOW_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREEFOUR_CONTEXT_NODE],
- CAT_THREEFOUR_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_FIVE_CONTEXT_NODE],
- CAT_FIVE_CONTEXT_NODE_0_);
-
- val = CAT6_MIN_VAL;
- bits_count = 10;
-
- do
+ coef_probs = fc->coef_probs [0];
+ skip_dc = 1;
+ }
+ else
{
- DECODE_EXTRABIT_AND_ADJUST_VAL(cat6_prob[bits_count], bits_count);
- bits_count -- ;
+ coef_probs = fc->coef_probs [3];
+ skip_dc = 0;
}
- while (bits_count >= 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_FIVE_CONTEXT_NODE_0_:
- val = CAT5_MIN_VAL;
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB4, 4);
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB3, 3);
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB2, 2);
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB1, 1);
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB0, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_THREEFOUR_CONTEXT_NODE_0_:
- DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREE_CONTEXT_NODE],
- CAT_THREE_CONTEXT_NODE_0_);
- val = CAT4_MIN_VAL;
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB3, 3);
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB2, 2);
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB1, 1);
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB0, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_THREE_CONTEXT_NODE_0_:
- val = CAT3_MIN_VAL;
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB2, 2);
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB1, 1);
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB0, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-HIGH_LOW_CONTEXT_NODE_0_:
- DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_ONE_CONTEXT_NODE],
- CAT_ONE_CONTEXT_NODE_0_);
-
- val = CAT2_MIN_VAL;
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT2_PROB1, 1);
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT2_PROB0, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_ONE_CONTEXT_NODE_0_:
- val = CAT1_MIN_VAL;
- DECODE_EXTRABIT_AND_ADJUST_VAL(CAT1_PROB0, 0);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-LOW_VAL_CONTEXT_NODE_0_:
- DECODE_AND_BRANCH_IF_ZERO(Prob[TWO_CONTEXT_NODE], TWO_CONTEXT_NODE_0_);
- DECODE_AND_BRANCH_IF_ZERO(Prob[THREE_CONTEXT_NODE], THREE_CONTEXT_NODE_0_);
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(4);
-
-THREE_CONTEXT_NODE_0_:
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(3);
-
-TWO_CONTEXT_NODE_0_:
- DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(2);
-
-ONE_CONTEXT_NODE_0_:
- DECODE_AND_APPLYSIGN(1);
- Prob = coef_probs + ENTROPY_NODES;
-
- if (c < 15)
+ for (i = 0; i < 16; ++i)
{
- qcoeff_ptr [ scan[c] ] = (int16_t) v;
- ++c;
- goto DO_WHILE;
- }
+ a = a_ctx + (i&3);
+ l = l_ctx + ((i&0xc)>>2);
- qcoeff_ptr [ 15 ] = (int16_t) v;
-BLOCK_FINISHED:
- eobs[i] = c;
- eobtotal += c;
- qcoeff_ptr += 16;
+ nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), skip_dc, qcoeff_ptr);
+ *a = *l = (nonzeros > 0);
- i++;
+ nonzeros += skip_dc;
+ eobs[i] = nonzeros;
+ eobtotal += nonzeros;
+ qcoeff_ptr += 16;
+ }
- if (i < stop)
- goto BLOCK_LOOP;
+ coef_probs = fc->coef_probs [2];
- if (i == 25)
+ a_ctx += 4;
+ l_ctx += 4;
+ for (i = 16; i < 24; ++i)
{
- start_coeff = 1;
- i = 0;
- stop = 16;
- coef_probs = fc->coef_probs [0] [ 0 ] [0];
- qcoeff_ptr -= (24*16 + 16);
- goto BLOCK_LOOP;
- }
+ a = a_ctx + ((i > 19)<<1) + (i&1);
+ l = l_ctx + ((i > 19)<<1) + ((i&3)>1);
- if (i == 16)
- {
- start_coeff = 0;
- coef_probs = fc->coef_probs [2] [ 0 ] [0];
- stop = 24;
- goto BLOCK_LOOP;
+ nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), 0, qcoeff_ptr);
+ *a = *l = (nonzeros > 0);
+
+ eobs[i] = nonzeros;
+ eobtotal += nonzeros;
+ qcoeff_ptr += 16;
}
- FILL
- bc->user_buffer = bufptr;
- bc->value = value;
- bc->count = count;
- bc->range = range;
return eobtotal;
-
}
+
diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c
new file mode 100644
index 000000000..d487065c0
--- /dev/null
+++ b/vp8/encoder/denoising.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "denoising.h"
+
+#include "vp8/common/reconinter.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vpx_rtcd.h"
+
+const unsigned int NOISE_MOTION_THRESHOLD = 20*20;
+const unsigned int NOISE_DIFF2_THRESHOLD = 75;
+// SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming var(noise) ~= 100.
+const unsigned int SSE_DIFF_THRESHOLD = 16*16*20;
+const unsigned int SSE_THRESHOLD = 16*16*40;
+
+static uint8_t blend(uint8_t state, uint8_t sample, uint8_t factor_q8)
+{
+ return (uint8_t)(
+ (((uint16_t)factor_q8 * ((uint16_t)state) + // Q8
+ (uint16_t)(256 - factor_q8) * ((uint16_t)sample)) + 128) // Q8
+ >> 8);
+}
+
+static unsigned int denoiser_motion_compensate(YV12_BUFFER_CONFIG* src,
+ YV12_BUFFER_CONFIG* dst,
+ MACROBLOCK* x,
+ unsigned int best_sse,
+ unsigned int zero_mv_sse,
+ int recon_yoffset,
+ int recon_uvoffset)
+{
+ MACROBLOCKD filter_xd = x->e_mbd;
+ int mv_col;
+ int mv_row;
+ int sse_diff = zero_mv_sse - best_sse;
+ // Compensate the running average.
+ filter_xd.pre.y_buffer = src->y_buffer + recon_yoffset;
+ filter_xd.pre.u_buffer = src->u_buffer + recon_uvoffset;
+ filter_xd.pre.v_buffer = src->v_buffer + recon_uvoffset;
+ // Write the compensated running average to the destination buffer.
+ filter_xd.dst.y_buffer = dst->y_buffer + recon_yoffset;
+ filter_xd.dst.u_buffer = dst->u_buffer + recon_uvoffset;
+ filter_xd.dst.v_buffer = dst->v_buffer + recon_uvoffset;
+ // Use the best MV for the compensation.
+ filter_xd.mode_info_context->mbmi.ref_frame = LAST_FRAME;
+ filter_xd.mode_info_context->mbmi.mode = filter_xd.best_sse_inter_mode;
+ filter_xd.mode_info_context->mbmi.mv = filter_xd.best_sse_mv;
+ filter_xd.mode_info_context->mbmi.need_to_clamp_mvs =
+ filter_xd.need_to_clamp_best_mvs;
+ mv_col = filter_xd.best_sse_mv.as_mv.col;
+ mv_row = filter_xd.best_sse_mv.as_mv.row;
+ if (filter_xd.mode_info_context->mbmi.mode <= B_PRED ||
+ (mv_row*mv_row + mv_col*mv_col <= NOISE_MOTION_THRESHOLD &&
+ sse_diff < SSE_DIFF_THRESHOLD))
+ {
+ // Handle intra blocks as referring to last frame with zero motion and
+ // let the absolute pixel difference affect the filter factor.
+ // Also consider small amount of motion as being random walk due to noise,
+ // if it doesn't mean that we get a much bigger error.
+ // Note that any changes to the mode info only affects the denoising.
+ filter_xd.mode_info_context->mbmi.ref_frame = LAST_FRAME;
+ filter_xd.mode_info_context->mbmi.mode = ZEROMV;
+ filter_xd.mode_info_context->mbmi.mv.as_int = 0;
+ x->e_mbd.best_sse_inter_mode = ZEROMV;
+ x->e_mbd.best_sse_mv.as_int = 0;
+ best_sse = zero_mv_sse;
+ }
+ if (!x->skip)
+ {
+ vp8_build_inter_predictors_mb(&filter_xd);
+ }
+ else
+ {
+ vp8_build_inter16x16_predictors_mb(&filter_xd,
+ filter_xd.dst.y_buffer,
+ filter_xd.dst.u_buffer,
+ filter_xd.dst.v_buffer,
+ filter_xd.dst.y_stride,
+ filter_xd.dst.uv_stride);
+ }
+ return best_sse;
+}
+
+static void denoiser_filter(YV12_BUFFER_CONFIG* mc_running_avg,
+ YV12_BUFFER_CONFIG* running_avg,
+ MACROBLOCK* signal,
+ unsigned int motion_magnitude2,
+ int y_offset,
+ int uv_offset)
+{
+ unsigned char* sig = signal->thismb;
+ int sig_stride = 16;
+ unsigned char* mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
+ int mc_avg_y_stride = mc_running_avg->y_stride;
+ unsigned char* running_avg_y = running_avg->y_buffer + y_offset;
+ int avg_y_stride = running_avg->y_stride;
+ int r, c;
+ for (r = 0; r < 16; r++)
+ {
+ for (c = 0; c < 16; c++)
+ {
+ int diff;
+ int absdiff = 0;
+ unsigned int filter_coefficient;
+ absdiff = sig[c] - mc_running_avg_y[c];
+ absdiff = absdiff > 0 ? absdiff : -absdiff;
+ assert(absdiff >= 0 && absdiff < 256);
+ filter_coefficient = (255 << 8) / (256 + ((absdiff * 330) >> 3));
+ // Allow some additional filtering of static blocks, or blocks with very
+ // small motion vectors.
+ filter_coefficient += filter_coefficient / (3 + (motion_magnitude2 >> 3));
+ filter_coefficient = filter_coefficient > 255 ? 255 : filter_coefficient;
+
+ running_avg_y[c] = blend(mc_running_avg_y[c], sig[c], filter_coefficient);
+ diff = sig[c] - running_avg_y[c];
+
+ if (diff * diff < NOISE_DIFF2_THRESHOLD)
+ {
+ // Replace with mean to suppress the noise.
+ sig[c] = running_avg_y[c];
+ }
+ else
+ {
+ // Replace the filter state with the signal since the change in this
+ // pixel isn't classified as noise.
+ running_avg_y[c] = sig[c];
+ }
+ }
+ sig += sig_stride;
+ mc_running_avg_y += mc_avg_y_stride;
+ running_avg_y += avg_y_stride;
+ }
+}
+
+int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height)
+{
+ assert(denoiser);
+ denoiser->yv12_running_avg.flags = 0;
+ if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_running_avg), width,
+ height, VP8BORDERINPIXELS) < 0)
+ {
+ vp8_denoiser_free(denoiser);
+ return 1;
+ }
+ denoiser->yv12_mc_running_avg.flags = 0;
+ if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_mc_running_avg), width,
+ height, VP8BORDERINPIXELS) < 0)
+ {
+ vp8_denoiser_free(denoiser);
+ return 1;
+ }
+ vpx_memset(denoiser->yv12_running_avg.buffer_alloc, 0,
+ denoiser->yv12_running_avg.frame_size);
+ vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0,
+ denoiser->yv12_mc_running_avg.frame_size);
+ return 0;
+}
+
+void vp8_denoiser_free(VP8_DENOISER *denoiser)
+{
+ assert(denoiser);
+ vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_running_avg);
+ vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_mc_running_avg);
+}
+
+void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
+ MACROBLOCK *x,
+ unsigned int best_sse,
+ unsigned int zero_mv_sse,
+ int recon_yoffset,
+ int recon_uvoffset) {
+ int mv_row;
+ int mv_col;
+ unsigned int motion_magnitude2;
+ // Motion compensate the running average.
+ best_sse = denoiser_motion_compensate(&denoiser->yv12_running_avg,
+ &denoiser->yv12_mc_running_avg,
+ x,
+ best_sse,
+ zero_mv_sse,
+ recon_yoffset,
+ recon_uvoffset);
+
+ mv_row = x->e_mbd.best_sse_mv.as_mv.row;
+ mv_col = x->e_mbd.best_sse_mv.as_mv.col;
+ motion_magnitude2 = mv_row*mv_row + mv_col*mv_col;
+ if (best_sse > SSE_THRESHOLD ||
+ motion_magnitude2 > 8 * NOISE_MOTION_THRESHOLD)
+ {
+ // No filtering of this block since it differs too much from the predictor,
+ // or the motion vector magnitude is considered too big.
+ vp8_copy_mem16x16(x->thismb, 16,
+ denoiser->yv12_running_avg.y_buffer + recon_yoffset,
+ denoiser->yv12_running_avg.y_stride);
+ return;
+ }
+ // Filter.
+ denoiser_filter(&denoiser->yv12_mc_running_avg,
+ &denoiser->yv12_running_avg,
+ x,
+ motion_magnitude2,
+ recon_yoffset,
+ recon_uvoffset);
+}
diff --git a/vp8/encoder/denoising.h b/vp8/encoder/denoising.h
new file mode 100644
index 000000000..343531bb1
--- /dev/null
+++ b/vp8/encoder/denoising.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP8_ENCODER_DENOISING_H_
+#define VP8_ENCODER_DENOISING_H_
+
+#include "block.h"
+
+typedef struct vp8_denoiser
+{
+ YV12_BUFFER_CONFIG yv12_running_avg;
+ YV12_BUFFER_CONFIG yv12_mc_running_avg;
+} VP8_DENOISER;
+
+int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height);
+
+void vp8_denoiser_free(VP8_DENOISER *denoiser);
+
+void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
+ MACROBLOCK *x,
+ unsigned int best_sse,
+ unsigned int zero_mv_sse,
+ int recon_yoffset,
+ int recon_uvoffset);
+
+#endif // VP8_ENCODER_DENOISING_H_
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 21757f8f0..962a719c8 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -1179,6 +1179,13 @@ int vp8cx_encode_inter_macroblock
else
x->encode_breakout = cpi->oxcf.encode_breakout;
+#if CONFIG_TEMPORAL_DENOISING
+ // Reset the best sse mode/mv for each macroblock.
+ x->e_mbd.best_sse_inter_mode = 0;
+ x->e_mbd.best_sse_mv.as_int = 0;
+ x->e_mbd.need_to_clamp_best_mvs = 0;
+#endif
+
if (cpi->sf.RD)
{
int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c
index c122d038d..0145f6d20 100644
--- a/vp8/encoder/encodemv.c
+++ b/vp8/encoder/encodemv.c
@@ -186,7 +186,7 @@ void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int m
#define MV_PROB_UPDATE_CORRECTION -1
-__inline static void calc_prob(vp8_prob *p, const unsigned int ct[2])
+static void calc_prob(vp8_prob *p, const unsigned int ct[2])
{
const unsigned int tot = ct[0] + ct[1];
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index cb8fd3e89..ac83622d5 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -387,7 +387,11 @@ void vp8_end_first_pass(VP8_COMP *cpi)
output_stats(cpi, cpi->output_pkt_list, &cpi->twopass.total_stats);
}
-static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG * recon_buffer, int * best_motion_err, int recon_yoffset )
+static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x,
+ YV12_BUFFER_CONFIG * raw_buffer,
+ int * raw_motion_err,
+ YV12_BUFFER_CONFIG * recon_buffer,
+ int * best_motion_err, int recon_yoffset)
{
MACROBLOCKD * const xd = & x->e_mbd;
BLOCK *b = &x->block[0];
@@ -395,15 +399,22 @@ static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG
unsigned char *src_ptr = (*(b->base_src) + b->src);
int src_stride = b->src_stride;
+ unsigned char *raw_ptr;
+ int raw_stride = raw_buffer->y_stride;
unsigned char *ref_ptr;
int ref_stride = x->e_mbd.pre.y_stride;
+ // Set up pointers for this macro block raw buffer
+ raw_ptr = (unsigned char *)(raw_buffer->y_buffer + recon_yoffset
+ + d->offset);
+ vp8_mse16x16 ( src_ptr, src_stride, raw_ptr, raw_stride,
+ (unsigned int *)(raw_motion_err));
+
// Set up pointers for this macro block recon buffer
xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
-
ref_ptr = (unsigned char *)(xd->pre.y_buffer + d->offset );
-
- vp8_mse16x16 ( src_ptr, src_stride, ref_ptr, ref_stride, (unsigned int *)(best_motion_err));
+ vp8_mse16x16 ( src_ptr, src_stride, ref_ptr, ref_stride,
+ (unsigned int *)(best_motion_err));
}
static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
@@ -595,12 +606,18 @@ void vp8_first_pass(VP8_COMP *cpi)
MV tmp_mv = {0, 0};
int tmp_err;
int motion_error = INT_MAX;
+ int raw_motion_error = INT_MAX;
// Simple 0,0 motion with no mv overhead
- zz_motion_search( cpi, x, lst_yv12, &motion_error, recon_yoffset );
+ zz_motion_search( cpi, x, cpi->last_frame_unscaled_source,
+ &raw_motion_error, lst_yv12, &motion_error,
+ recon_yoffset );
d->bmi.mv.as_mv.row = 0;
d->bmi.mv.as_mv.col = 0;
+ if (raw_motion_error < cpi->oxcf.encode_breakout)
+ goto skip_motion_search;
+
// Test last reference frame using the previous best mv as the
// starting point (best reference) for the search
first_pass_motion_search(cpi, x, &best_ref_mv,
@@ -648,6 +665,7 @@ void vp8_first_pass(VP8_COMP *cpi)
xd->pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset;
}
+skip_motion_search:
/* Intra assumed best */
best_ref_mv.as_int = 0;
diff --git a/vp8/encoder/lookahead.c b/vp8/encoder/lookahead.c
index 3e582e369..4c9228186 100644
--- a/vp8/encoder/lookahead.c
+++ b/vp8/encoder/lookahead.c
@@ -73,6 +73,9 @@ vp8_lookahead_init(unsigned int width,
else if(depth > MAX_LAG_BUFFERS)
depth = MAX_LAG_BUFFERS;
+ /* Keep last frame in lookahead buffer by increasing depth by 1.*/
+ depth += 1;
+
/* Align the buffer dimensions */
width = (width + 15) & ~15;
height = (height + 15) & ~15;
@@ -110,7 +113,7 @@ vp8_lookahead_push(struct lookahead_ctx *ctx,
int mb_rows = (src->y_height + 15) >> 4;
int mb_cols = (src->y_width + 15) >> 4;
- if(ctx->sz + 1 > ctx->max_sz)
+ if(ctx->sz + 2 > ctx->max_sz)
return 1;
ctx->sz++;
buf = pop(ctx, &ctx->write_idx);
@@ -177,7 +180,7 @@ vp8_lookahead_pop(struct lookahead_ctx *ctx,
{
struct lookahead_entry* buf = NULL;
- if(ctx->sz && (drain || ctx->sz == ctx->max_sz))
+ if(ctx->sz && (drain || ctx->sz == ctx->max_sz - 1))
{
buf = pop(ctx, &ctx->read_idx);
ctx->sz--;
@@ -188,18 +191,33 @@ vp8_lookahead_pop(struct lookahead_ctx *ctx,
struct lookahead_entry*
vp8_lookahead_peek(struct lookahead_ctx *ctx,
- unsigned int index)
+ unsigned int index,
+ int direction)
{
struct lookahead_entry* buf = NULL;
- assert(index < ctx->max_sz);
- if(index < ctx->sz)
+ if (direction == PEEK_FORWARD)
+ {
+ assert(index < ctx->max_sz - 1);
+ if(index < ctx->sz)
+ {
+ index += ctx->read_idx;
+ if(index >= ctx->max_sz)
+ index -= ctx->max_sz;
+ buf = ctx->buf + index;
+ }
+ }
+ else if (direction == PEEK_BACKWARD)
{
- index += ctx->read_idx;
- if(index >= ctx->max_sz)
- index -= ctx->max_sz;
+ assert(index == 1);
+
+ if(ctx->read_idx == 0)
+ index = ctx->max_sz - 1;
+ else
+ index = ctx->read_idx - index;
buf = ctx->buf + index;
}
+
return buf;
}
diff --git a/vp8/encoder/lookahead.h b/vp8/encoder/lookahead.h
index 32bafcd63..cf56b75b7 100644
--- a/vp8/encoder/lookahead.h
+++ b/vp8/encoder/lookahead.h
@@ -82,6 +82,8 @@ vp8_lookahead_pop(struct lookahead_ctx *ctx,
int drain);
+#define PEEK_FORWARD 1
+#define PEEK_BACKWARD -1
/**\brief Get a future source buffer to encode
*
* \param[in] ctx Pointer to the lookahead context
@@ -92,7 +94,8 @@ vp8_lookahead_pop(struct lookahead_ctx *ctx,
*/
struct lookahead_entry*
vp8_lookahead_peek(struct lookahead_ctx *ctx,
- unsigned int index);
+ unsigned int index,
+ int direction);
/**\brief Get the number of frames currently in the lookahead queue
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 5ad51e846..cd62c9c17 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -144,7 +144,7 @@ extern void vp8cx_init_quantizer(VP8_COMP *cpi);
extern const int vp8cx_base_skip_false_prob[128];
// Tables relating active max Q to active min Q
-static const int kf_low_motion_minq[QINDEX_RANGE] =
+static const unsigned char kf_low_motion_minq[QINDEX_RANGE] =
{
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@@ -155,7 +155,7 @@ static const int kf_low_motion_minq[QINDEX_RANGE] =
11,11,12,12,13,13,13,13,14,14,15,15,15,15,16,16,
16,16,17,17,18,18,18,18,19,20,20,21,21,22,23,23
};
-static const int kf_high_motion_minq[QINDEX_RANGE] =
+static const unsigned char kf_high_motion_minq[QINDEX_RANGE] =
{
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@@ -166,7 +166,7 @@ static const int kf_high_motion_minq[QINDEX_RANGE] =
16,16,17,17,18,18,18,18,19,19,20,20,20,20,21,21,
21,21,22,22,23,23,24,25,25,26,26,27,28,28,29,30
};
-static const int gf_low_motion_minq[QINDEX_RANGE] =
+static const unsigned char gf_low_motion_minq[QINDEX_RANGE] =
{
0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,
3,3,3,3,4,4,4,4,5,5,5,5,6,6,6,6,
@@ -177,7 +177,7 @@ static const int gf_low_motion_minq[QINDEX_RANGE] =
35,35,36,36,37,37,38,38,39,39,40,40,41,41,42,42,
43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58
};
-static const int gf_mid_motion_minq[QINDEX_RANGE] =
+static const unsigned char gf_mid_motion_minq[QINDEX_RANGE] =
{
0,0,0,0,1,1,1,1,1,1,2,2,3,3,3,4,
4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9,
@@ -188,7 +188,7 @@ static const int gf_mid_motion_minq[QINDEX_RANGE] =
38,39,39,40,40,41,41,42,42,43,43,44,45,46,47,48,
49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64
};
-static const int gf_high_motion_minq[QINDEX_RANGE] =
+static const unsigned char gf_high_motion_minq[QINDEX_RANGE] =
{
0,0,0,0,1,1,1,1,1,2,2,2,3,3,3,4,
4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9,
@@ -199,7 +199,7 @@ static const int gf_high_motion_minq[QINDEX_RANGE] =
41,41,42,42,43,44,45,46,47,48,49,50,51,52,53,54,
55,56,57,58,59,60,62,64,66,68,70,72,74,76,78,80
};
-static const int inter_minq[QINDEX_RANGE] =
+static const unsigned char inter_minq[QINDEX_RANGE] =
{
0,0,1,1,2,3,3,4,4,5,6,6,7,8,8,9,
9,10,11,11,12,13,13,14,15,15,16,17,17,18,19,20,
@@ -1107,8 +1107,7 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate scaled source buffer");
-
- vpx_free(cpi->tok);
+ vpx_free(cpi->tok);
{
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
@@ -1680,6 +1679,17 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
cpi->alt_ref_source = NULL;
cpi->is_src_frame_alt_ref = 0;
+#if CONFIG_TEMPORAL_DENOISING
+ if (cpi->oxcf.noise_sensitivity)
+ {
+ if (!cpi->denoiser.yv12_mc_running_avg.buffer_alloc)
+ {
+ int width = (cpi->oxcf.Width + 15) & ~15;
+ int height = (cpi->oxcf.Height + 15) & ~15;
+ vp8_denoiser_allocate(&cpi->denoiser, width, height);
+ }
+ }
+#endif
#if 0
// Experimental RD Code
@@ -2314,6 +2324,9 @@ void vp8_remove_compressor(VP8_COMP **ptr)
vp8cx_remove_encoder_threads(cpi);
#endif
+#if CONFIG_TEMPORAL_DENOISING
+ vp8_denoiser_free(&cpi->denoiser);
+#endif
dealloc_compressor_data(cpi);
vpx_free(cpi->mb.ss);
vpx_free(cpi->tok);
@@ -2920,7 +2933,6 @@ static void Pass1Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
(void) frame_flags;
vp8_set_quantizer(cpi, 26);
- scale_and_extend_source(cpi->un_scaled_source, cpi);
vp8_first_pass(cpi);
}
#endif
@@ -3133,7 +3145,12 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
}
vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
-
+#if CONFIG_TEMPORAL_DENOISING
+ if (cpi->oxcf.noise_sensitivity)
+ {
+ vp8_yv12_extend_frame_borders(&cpi->denoiser.yv12_running_avg);
+ }
+#endif
}
static void encode_frame_to_data_rate
@@ -3589,7 +3606,7 @@ static void encode_frame_to_data_rate
scale_and_extend_source(cpi->un_scaled_source, cpi);
-#if !(CONFIG_REALTIME_ONLY) && CONFIG_POSTPROC
+#if !(CONFIG_REALTIME_ONLY) && CONFIG_POSTPROC && !(CONFIG_TEMPORAL_DENOISING)
if (cpi->oxcf.noise_sensitivity > 0)
{
@@ -4702,7 +4719,8 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
cpi->source_alt_ref_pending)
{
if ((cpi->source = vp8_lookahead_peek(cpi->lookahead,
- cpi->frames_till_gf_update_due)))
+ cpi->frames_till_gf_update_due,
+ PEEK_FORWARD)))
{
cpi->alt_ref_source = cpi->source;
if (cpi->oxcf.arnr_max_frames > 0)
@@ -4724,6 +4742,15 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
if (!cpi->source)
{
+ /* Read last frame source if we are encoding first pass. */
+ if (cpi->pass == 1 && cm->current_video_frame > 0)
+ {
+ if((cpi->last_source = vp8_lookahead_peek(cpi->lookahead, 1,
+ PEEK_BACKWARD)) == NULL)
+ return -1;
+ }
+
+
if ((cpi->source = vp8_lookahead_pop(cpi->lookahead, flush)))
{
cm->show_frame = 1;
@@ -4743,6 +4770,11 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
*time_stamp = cpi->source->ts_start;
*time_end = cpi->source->ts_end;
*frame_flags = cpi->source->flags;
+
+ if (cpi->pass == 1 && cm->current_video_frame > 0)
+ {
+ cpi->last_frame_unscaled_source = &cpi->last_source->img;
+ }
}
else
{
@@ -5026,7 +5058,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
double frame_psnr;
YV12_BUFFER_CONFIG *orig = cpi->Source;
YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show;
- YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer;
int y_samples = orig->y_height * orig->y_width ;
int uv_samples = orig->uv_height * orig->uv_width ;
int t_samples = y_samples + 2 * uv_samples;
@@ -5050,7 +5081,9 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
cpi->total_v += vp8_mse2psnr(uv_samples, 255.0, ve);
cpi->total_sq_error += sq_error;
cpi->total += frame_psnr;
+#if CONFIG_POSTPROC
{
+ YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer;
double frame_psnr2, frame_ssim2 = 0;
double weight = 0;
@@ -5101,6 +5134,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
}
}
}
+#endif
}
if (cpi->b_calculate_ssimg)
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index e9e2ee52e..c7a1de8e8 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -28,6 +28,9 @@
#include "mcomp.h"
#include "vp8/common/findnearmv.h"
#include "lookahead.h"
+#if CONFIG_TEMPORAL_DENOISING
+#include "vp8/encoder/denoising.h"
+#endif
//#define SPEEDSTATS 1
#define MIN_GF_INTERVAL 4
@@ -313,10 +316,12 @@ typedef struct VP8_COMP
struct lookahead_ctx *lookahead;
struct lookahead_entry *source;
struct lookahead_entry *alt_ref_source;
+ struct lookahead_entry *last_source;
YV12_BUFFER_CONFIG *Source;
YV12_BUFFER_CONFIG *un_scaled_source;
YV12_BUFFER_CONFIG scaled_source;
+ YV12_BUFFER_CONFIG *last_frame_unscaled_source;
int source_alt_ref_pending; // frame in src_buffers has been identified to be encoded as an alt ref
int source_alt_ref_active; // an alt ref frame has been encoded and is usable
@@ -661,6 +666,10 @@ typedef struct VP8_COMP
int droppable;
+#if CONFIG_TEMPORAL_DENOISING
+ VP8_DENOISER denoiser;
+#endif
+
// Coding layer state variables
unsigned int current_layer;
LAYER_CONTEXT layer_context[MAX_LAYERS];
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 65e6c1294..24e041f8d 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -24,6 +24,9 @@
#include "mcomp.h"
#include "rdopt.h"
#include "vpx_mem/vpx_mem.h"
+#if CONFIG_TEMPORAL_DENOISING
+#include "denoising.h"
+#endif
extern int VP8_UVSSE(MACROBLOCK *x);
@@ -450,6 +453,48 @@ void get_lower_res_motion_info(VP8_COMP *cpi, MACROBLOCKD *xd, int *dissim,
}
#endif
+static void check_for_encode_breakout(unsigned int sse, MACROBLOCK* x)
+{
+ if (sse < x->encode_breakout)
+ {
+ // Check u and v to make sure skip is ok
+ int sse2 = 0;
+
+ sse2 = VP8_UVSSE(x);
+
+ if (sse2 * 2 < x->encode_breakout)
+ x->skip = 1;
+ else
+ x->skip = 0;
+ }
+}
+
+static int evaluate_inter_mode(unsigned int* sse, int rate2, int* distortion2, VP8_COMP *cpi, MACROBLOCK *x)
+{
+ MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
+ int_mv mv = x->e_mbd.mode_info_context->mbmi.mv;
+ int this_rd;
+ /* Exit early and don't compute the distortion if this macroblock
+ * is marked inactive. */
+ if (cpi->active_map_enabled && x->active_ptr[0] == 0)
+ {
+ *sse = 0;
+ *distortion2 = 0;
+ x->skip = 1;
+ return INT_MAX;
+ }
+
+ if((this_mode != NEWMV) ||
+ !(cpi->sf.half_pixel_search) || cpi->common.full_pixel==1)
+ *distortion2 = get_inter_mbpred_error(x,
+ &cpi->fn_ptr[BLOCK_16X16],
+ sse, mv);
+
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, *distortion2);
+
+ check_for_encode_breakout(*sse, x);
+ return this_rd;
+}
void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
int recon_uvoffset, int *returnrate,
@@ -476,7 +521,10 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
int distortion2;
int bestsme = INT_MAX;
int best_mode_index = 0;
- unsigned int sse = INT_MAX, best_sse = INT_MAX;
+ unsigned int sse = INT_MAX, best_rd_sse = INT_MAX;
+#if CONFIG_TEMPORAL_DENOISING
+ unsigned int zero_mv_sse = 0, best_sse = INT_MAX;
+#endif
int_mv mvp;
@@ -488,9 +536,6 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
int ref_frame_map[4];
int sign_bias = 0;
- int have_subp_search = cpi->sf.half_pixel_search; /* In real-time mode,
- when Speed >= 15, no sub-pixel search. */
-
#if CONFIG_MULTI_RES_ENCODING
int dissim = INT_MAX;
int parent_ref_frame = 0;
@@ -657,7 +702,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
{
case B_PRED:
/* Pass best so far to pick_intra4x4mby_modes to use as breakout */
- distortion2 = best_sse;
+ distortion2 = best_rd_sse;
pick_intra4x4mby_modes(x, &rate, &distortion2);
if (distortion2 == INT_MAX)
@@ -905,43 +950,38 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
x->e_mbd.mode_info_context->mbmi.mv.as_int =
mode_mv[this_mode].as_int;
-
- /* Exit early and don't compute the distortion if this macroblock
- * is marked inactive. */
- if (cpi->active_map_enabled && x->active_ptr[0] == 0)
- {
- sse = 0;
- distortion2 = 0;
- x->skip = 1;
- break;
- }
-
- if((this_mode != NEWMV) ||
- !(have_subp_search) || cpi->common.full_pixel==1)
- distortion2 = get_inter_mbpred_error(x,
- &cpi->fn_ptr[BLOCK_16X16],
- &sse, mode_mv[this_mode]);
-
- this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
-
- if (sse < x->encode_breakout)
- {
- // Check u and v to make sure skip is ok
- int sse2 = 0;
-
- sse2 = VP8_UVSSE(x);
-
- if (sse2 * 2 < x->encode_breakout)
- x->skip = 1;
- else
- x->skip = 0;
- }
+ this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x);
break;
default:
break;
}
+#if CONFIG_TEMPORAL_DENOISING
+ if (cpi->oxcf.noise_sensitivity)
+ {
+ // Store for later use by denoiser.
+ if (this_mode == ZEROMV &&
+ x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
+ {
+ zero_mv_sse = sse;
+ }
+
+ // Store the best NEWMV in x for later use in the denoiser.
+ // We are restricted to the LAST_FRAME since the denoiser only keeps
+ // one filter state.
+ if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
+ x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
+ {
+ best_sse = sse;
+ x->e_mbd.best_sse_inter_mode = NEWMV;
+ x->e_mbd.best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
+ x->e_mbd.need_to_clamp_best_mvs =
+ x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
+ }
+ }
+#endif
+
if (this_rd < best_rd || x->skip)
{
// Note index of best mode
@@ -949,7 +989,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
*returnrate = rate2;
*returndistortion = distortion2;
- best_sse = sse;
+ best_rd_sse = sse;
best_rd = this_rd;
vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
sizeof(MB_MODE_INFO));
@@ -1011,6 +1051,43 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
cpi->error_bins[this_rdbin] ++;
}
+#if CONFIG_TEMPORAL_DENOISING
+ if (cpi->oxcf.noise_sensitivity)
+ {
+ if (x->e_mbd.best_sse_inter_mode == DC_PRED) {
+ // No best MV found.
+ x->e_mbd.best_sse_inter_mode = best_mbmode.mode;
+ x->e_mbd.best_sse_mv = best_mbmode.mv;
+ x->e_mbd.need_to_clamp_best_mvs = best_mbmode.need_to_clamp_mvs;
+ best_sse = best_rd_sse;
+ }
+ vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
+ recon_yoffset, recon_uvoffset);
+
+ // Reevaluate ZEROMV after denoising.
+ if (best_mbmode.ref_frame == INTRA_FRAME)
+ {
+ int this_rd = 0;
+ rate2 = 0;
+ distortion2 = 0;
+ x->e_mbd.mode_info_context->mbmi.ref_frame = LAST_FRAME;
+ rate2 += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
+ this_mode = ZEROMV;
+ rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
+ x->e_mbd.mode_info_context->mbmi.mode = this_mode;
+ x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
+ x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
+ this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x);
+
+ if (this_rd < best_rd || x->skip)
+ {
+ vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
+ sizeof(MB_MODE_INFO));
+ }
+ }
+ }
+#endif
+
if (cpi->is_src_frame_alt_ref &&
(best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME))
{
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index bb2b0ca71..8f575e498 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -33,11 +33,33 @@
#include "rdopt.h"
#include "vpx_mem/vpx_mem.h"
#include "vp8/common/systemdependent.h"
+#if CONFIG_TEMPORAL_DENOISING
+#include "denoising.h"
+#endif
extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
#define MAXF(a,b) (((a) > (b)) ? (a) : (b))
+typedef struct rate_distortion_struct
+{
+ int rate2;
+ int rate_y;
+ int rate_uv;
+ int distortion2;
+ int distortion_uv;
+} RATE_DISTORTION;
+
+typedef struct best_mode_struct
+{
+ int yrd;
+ int rd;
+ int intra_rd;
+ MB_MODE_INFO mbmode;
+ union b_mode_info bmodes[16];
+ PARTITION_INFO partition;
+} BEST_MODE;
+
static const int auto_speed_thresh[17] =
{
1000,
@@ -741,7 +763,7 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
return INT_MAX;
*Rate = cost;
- *rate_y += tot_rate_y;
+ *rate_y = tot_rate_y;
*Distortion = distortion;
return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
@@ -1327,7 +1349,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
}
}
-static __inline
+static
void vp8_cal_step_param(int sr, int *sp)
{
int step = 0;
@@ -1711,6 +1733,181 @@ static void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv)
}
}
+static int evaluate_inter_mode_rd(int mdcounts[4],
+ RATE_DISTORTION* rd,
+ int* disable_skip,
+ VP8_COMP *cpi, MACROBLOCK *x)
+{
+ MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
+ BLOCK *b = &x->block[0];
+ MACROBLOCKD *xd = &x->e_mbd;
+ int distortion;
+ vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16);
+
+ if (cpi->active_map_enabled && x->active_ptr[0] == 0) {
+ x->skip = 1;
+ }
+ else if (x->encode_breakout)
+ {
+ unsigned int sse;
+ unsigned int var;
+ int threshold = (xd->block[0].dequant[1]
+ * xd->block[0].dequant[1] >>4);
+
+ if(threshold < x->encode_breakout)
+ threshold = x->encode_breakout;
+
+ var = vp8_variance16x16
+ (*(b->base_src), b->src_stride,
+ x->e_mbd.predictor, 16, &sse);
+
+ if (sse < threshold)
+ {
+ unsigned int q2dc = xd->block[24].dequant[0];
+ /* If theres is no codeable 2nd order dc
+ or a very small uniform pixel change change */
+ if ((sse - var < q2dc * q2dc >>4) ||
+ (sse /2 > var && sse-var < 64))
+ {
+ // Check u and v to make sure skip is ok
+ int sse2= VP8_UVSSE(x);
+ if (sse2 * 2 < threshold)
+ {
+ x->skip = 1;
+ rd->distortion2 = sse + sse2;
+ rd->rate2 = 500;
+
+ /* for best_yrd calculation */
+ rd->rate_uv = 0;
+ rd->distortion_uv = sse2;
+
+ *disable_skip = 1;
+ return RDCOST(x->rdmult, x->rddiv, rd->rate2,
+ rd->distortion2);
+ }
+ }
+ }
+ }
+
+
+ //intermodecost[mode_index] = vp8_cost_mv_ref(this_mode, mdcounts); // Experimental debug code
+
+ // Add in the Mv/mode cost
+ rd->rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
+
+ // Y cost and distortion
+ macro_block_yrd(x, &rd->rate_y, &distortion);
+ rd->rate2 += rd->rate_y;
+ rd->distortion2 += distortion;
+
+ // UV cost and distortion
+ rd_inter16x16_uv(cpi, x, &rd->rate_uv, &rd->distortion_uv,
+ cpi->common.full_pixel);
+ rd->rate2 += rd->rate_uv;
+ rd->distortion2 += rd->distortion_uv;
+ return INT_MAX;
+}
+
+static int calculate_final_rd_costs(int this_rd,
+ RATE_DISTORTION* rd,
+ int* other_cost,
+ int disable_skip,
+ int uv_intra_tteob,
+ int intra_rd_penalty,
+ VP8_COMP *cpi, MACROBLOCK *x)
+{
+ MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
+ // Where skip is allowable add in the default per mb cost for the no skip case.
+ // where we then decide to skip we have to delete this and replace it with the
+ // cost of signallying a skip
+ if (cpi->common.mb_no_coeff_skip)
+ {
+ *other_cost += vp8_cost_bit(cpi->prob_skip_false, 0);
+ rd->rate2 += *other_cost;
+ }
+
+ /* Estimate the reference frame signaling cost and add it
+ * to the rolling cost variable.
+ */
+ rd->rate2 +=
+ x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
+
+ if (!disable_skip)
+ {
+ // Test for the condition where skip block will be activated because there are no non zero coefficients and make any necessary adjustment for rate
+ if (cpi->common.mb_no_coeff_skip)
+ {
+ int i;
+ int tteob;
+ int has_y2_block = (this_mode!=SPLITMV && this_mode!=B_PRED);
+
+ tteob = 0;
+ if(has_y2_block)
+ tteob += x->e_mbd.eobs[24];
+
+ for (i = 0; i < 16; i++)
+ tteob += (x->e_mbd.eobs[i] > has_y2_block);
+
+ if (x->e_mbd.mode_info_context->mbmi.ref_frame)
+ {
+ for (i = 16; i < 24; i++)
+ tteob += x->e_mbd.eobs[i];
+ }
+ else
+ tteob += uv_intra_tteob;
+
+ if (tteob == 0)
+ {
+ rd->rate2 -= (rd->rate_y + rd->rate_uv);
+ //for best_yrd calculation
+ rd->rate_uv = 0;
+
+ // Back out no skip flag costing and add in skip flag costing
+ if (cpi->prob_skip_false)
+ {
+ int prob_skip_cost;
+
+ prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1);
+ prob_skip_cost -= vp8_cost_bit(cpi->prob_skip_false, 0);
+ rd->rate2 += prob_skip_cost;
+ *other_cost += prob_skip_cost;
+ }
+ }
+ }
+ // Calculate the final RD estimate for this mode
+ this_rd = RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
+ if (this_rd < INT_MAX && x->e_mbd.mode_info_context->mbmi.ref_frame
+ == INTRA_FRAME)
+ this_rd += intra_rd_penalty;
+ }
+ return this_rd;
+}
+
+static void update_best_mode(BEST_MODE* best_mode, int this_rd,
+ RATE_DISTORTION* rd, int other_cost, MACROBLOCK *x)
+{
+ MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
+
+ other_cost +=
+ x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
+
+ /* Calculate the final y RD estimate for this mode */
+ best_mode->yrd = RDCOST(x->rdmult, x->rddiv, (rd->rate2-rd->rate_uv-other_cost),
+ (rd->distortion2-rd->distortion_uv));
+
+ best_mode->rd = this_rd;
+ vpx_memcpy(&best_mode->mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
+ vpx_memcpy(&best_mode->partition, x->partition_info, sizeof(PARTITION_INFO));
+
+ if ((this_mode == B_PRED) || (this_mode == SPLITMV))
+ {
+ int i;
+ for (i = 0; i < 16; i++)
+ {
+ best_mode->bmodes[i] = x->e_mbd.block[i].bmi;
+ }
+ }
+}
void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
int recon_uvoffset, int *returnrate,
@@ -1719,9 +1916,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
BLOCK *b = &x->block[0];
BLOCKD *d = &x->e_mbd.block[0];
MACROBLOCKD *xd = &x->e_mbd;
- union b_mode_info best_bmodes[16];
- MB_MODE_INFO best_mbmode;
- PARTITION_INFO best_partition;
int_mv best_ref_mv_sb[2];
int_mv mode_mv_sb[2][MB_MODE_COUNT];
int_mv best_ref_mv;
@@ -1729,21 +1923,16 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
MB_PREDICTION_MODE this_mode;
int num00;
int best_mode_index = 0;
+ BEST_MODE best_mode;
int i;
int mode_index;
int mdcounts[4];
int rate;
- int distortion;
- int best_rd = INT_MAX;
- int best_intra_rd = INT_MAX;
- int rate2, distortion2;
+ RATE_DISTORTION rd;
int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly;
int uv_intra_tteob = 0;
int uv_intra_done = 0;
- int rate_y, UNINITIALIZED_IS_SAFE(rate_uv);
- int distortion_uv;
- int best_yrd = INT_MAX;
MB_PREDICTION_MODE uv_intra_mode = 0;
int_mv mvp;
@@ -1760,9 +1949,12 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
mode_mv = mode_mv_sb[sign_bias];
best_ref_mv.as_int = 0;
+ best_mode.rd = INT_MAX;
+ best_mode.yrd = INT_MAX;
+ best_mode.intra_rd = INT_MAX;
vpx_memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
- vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
- vpx_memset(&best_bmodes, 0, sizeof(best_bmodes));
+ vpx_memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode));
+ vpx_memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes));
/* Setup search priorities */
get_reference_search_order(cpi, ref_frame_map);
@@ -1799,15 +1991,15 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]];
// Test best rd so far against threshold for trying this mode.
- if (best_rd <= cpi->rd_threshes[mode_index])
+ if (best_mode.rd <= cpi->rd_threshes[mode_index])
continue;
if (this_ref_frame < 0)
continue;
// These variables hold are rolling total cost and distortion for this mode
- rate2 = 0;
- distortion2 = 0;
+ rd.rate2 = 0;
+ rd.distortion2 = 0;
this_mode = vp8_mode_order[mode_index];
@@ -1907,16 +2099,17 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
int tmp_rd;
// Note the rate value returned here includes the cost of coding the BPRED mode : x->mbmode_cost[x->e_mbd.frame_type][BPRED];
- tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion, best_yrd);
- rate2 += rate;
- distortion2 += distortion;
+ int distortion;
+ tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rd.rate_y, &distortion, best_mode.yrd);
+ rd.rate2 += rate;
+ rd.distortion2 += distortion;
- if(tmp_rd < best_yrd)
+ if(tmp_rd < best_mode.yrd)
{
- rate2 += uv_intra_rate;
- rate_uv = uv_intra_rate_tokenonly;
- distortion2 += uv_intra_distortion;
- distortion_uv = uv_intra_distortion;
+ rd.rate2 += uv_intra_rate;
+ rd.rate_uv = uv_intra_rate_tokenonly;
+ rd.distortion2 += uv_intra_distortion;
+ rd.distortion_uv = uv_intra_distortion;
}
else
{
@@ -1930,24 +2123,25 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
{
int tmp_rd;
int this_rd_thresh;
+ int distortion;
this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1) ? cpi->rd_threshes[THR_NEW1] : cpi->rd_threshes[THR_NEW3];
this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2) ? cpi->rd_threshes[THR_NEW2] : this_rd_thresh;
tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
- best_yrd, mdcounts,
- &rate, &rate_y, &distortion, this_rd_thresh) ;
+ best_mode.yrd, mdcounts,
+ &rate, &rd.rate_y, &distortion, this_rd_thresh) ;
- rate2 += rate;
- distortion2 += distortion;
+ rd.rate2 += rate;
+ rd.distortion2 += distortion;
// If even the 'Y' rd value of split is higher than best so far then dont bother looking at UV
- if (tmp_rd < best_yrd)
+ if (tmp_rd < best_mode.yrd)
{
// Now work out UV cost and add it in
- rd_inter4x4_uv(cpi, x, &rate_uv, &distortion_uv, cpi->common.full_pixel);
- rate2 += rate_uv;
- distortion2 += distortion_uv;
+ rd_inter4x4_uv(cpi, x, &rd.rate_uv, &rd.distortion_uv, cpi->common.full_pixel);
+ rd.rate2 += rd.rate_uv;
+ rd.distortion2 += rd.distortion_uv;
}
else
{
@@ -1960,18 +2154,21 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
case V_PRED:
case H_PRED:
case TM_PRED:
+ {
+ int distortion;
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
vp8_build_intra_predictors_mby
(&x->e_mbd);
- macro_block_yrd(x, &rate_y, &distortion) ;
- rate2 += rate_y;
- distortion2 += distortion;
- rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
- rate2 += uv_intra_rate;
- rate_uv = uv_intra_rate_tokenonly;
- distortion2 += uv_intra_distortion;
- distortion_uv = uv_intra_distortion;
- break;
+ macro_block_yrd(x, &rd.rate_y, &distortion) ;
+ rd.rate2 += rd.rate_y;
+ rd.distortion2 += distortion;
+ rd.rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
+ rd.rate2 += uv_intra_rate;
+ rd.rate_uv = uv_intra_rate_tokenonly;
+ rd.distortion2 += uv_intra_distortion;
+ rd.distortion_uv = uv_intra_distortion;
+ }
+ break;
case NEWMV:
{
@@ -2114,7 +2311,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
// Add the new motion vector cost to our rolling cost variable
- rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
+ rd.rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
}
case NEARESTMV:
@@ -2136,177 +2333,57 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
continue;
vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]);
- vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16);
-
- if (cpi->active_map_enabled && x->active_ptr[0] == 0) {
- x->skip = 1;
- }
- else if (x->encode_breakout)
- {
- unsigned int sse;
- unsigned int var;
- int threshold = (xd->block[0].dequant[1]
- * xd->block[0].dequant[1] >>4);
-
- if(threshold < x->encode_breakout)
- threshold = x->encode_breakout;
-
- var = vp8_variance16x16
- (*(b->base_src), b->src_stride,
- x->e_mbd.predictor, 16, &sse);
-
- if (sse < threshold)
- {
- unsigned int q2dc = xd->block[24].dequant[0];
- /* If theres is no codeable 2nd order dc
- or a very small uniform pixel change change */
- if ((sse - var < q2dc * q2dc >>4) ||
- (sse /2 > var && sse-var < 64))
- {
- // Check u and v to make sure skip is ok
- int sse2= VP8_UVSSE(x);
- if (sse2 * 2 < threshold)
- {
- x->skip = 1;
- distortion2 = sse + sse2;
- rate2 = 500;
-
- /* for best_yrd calculation */
- rate_uv = 0;
- distortion_uv = sse2;
-
- disable_skip = 1;
- this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
-
- break;
- }
- }
- }
- }
-
-
- //intermodecost[mode_index] = vp8_cost_mv_ref(this_mode, mdcounts); // Experimental debug code
-
- // Add in the Mv/mode cost
- rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
-
- // Y cost and distortion
- macro_block_yrd(x, &rate_y, &distortion);
- rate2 += rate_y;
- distortion2 += distortion;
-
- // UV cost and distortion
- rd_inter16x16_uv(cpi, x, &rate_uv, &distortion_uv, cpi->common.full_pixel);
- rate2 += rate_uv;
- distortion2 += distortion_uv;
+ this_rd = evaluate_inter_mode_rd(mdcounts, &rd,
+ &disable_skip, cpi, x);
break;
default:
break;
}
- // Where skip is allowable add in the default per mb cost for the no skip case.
- // where we then decide to skip we have to delete this and replace it with the
- // cost of signallying a skip
- if (cpi->common.mb_no_coeff_skip)
- {
- other_cost += vp8_cost_bit(cpi->prob_skip_false, 0);
- rate2 += other_cost;
- }
+ this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost,
+ disable_skip, uv_intra_tteob,
+ intra_rd_penalty, cpi, x);
- /* Estimate the reference frame signaling cost and add it
- * to the rolling cost variable.
- */
- rate2 +=
- x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
-
- if (!disable_skip)
+ // Keep record of best intra distortion
+ if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
+ (this_rd < best_mode.intra_rd) )
{
- // Test for the condition where skip block will be activated because there are no non zero coefficients and make any necessary adjustment for rate
- if (cpi->common.mb_no_coeff_skip)
- {
- int tteob;
- int has_y2_block = (this_mode!=SPLITMV && this_mode!=B_PRED);
-
- tteob = 0;
- if(has_y2_block)
- tteob += x->e_mbd.eobs[24];
-
- for (i = 0; i < 16; i++)
- tteob += (x->e_mbd.eobs[i] > has_y2_block);
-
- if (x->e_mbd.mode_info_context->mbmi.ref_frame)
- {
- for (i = 16; i < 24; i++)
- tteob += x->e_mbd.eobs[i];
- }
- else
- tteob += uv_intra_tteob;
-
- if (tteob == 0)
- {
- rate2 -= (rate_y + rate_uv);
- //for best_yrd calculation
- rate_uv = 0;
-
- // Back out no skip flag costing and add in skip flag costing
- if (cpi->prob_skip_false)
- {
- int prob_skip_cost;
-
- prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1);
- prob_skip_cost -= vp8_cost_bit(cpi->prob_skip_false, 0);
- rate2 += prob_skip_cost;
- other_cost += prob_skip_cost;
- }
- }
- }
- // Calculate the final RD estimate for this mode
- this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
- if (this_rd < INT_MAX && x->e_mbd.mode_info_context->mbmi.ref_frame
- == INTRA_FRAME)
- this_rd += intra_rd_penalty;
+ best_mode.intra_rd = this_rd;
+ *returnintra = rd.distortion2 ;
}
- // Keep record of best intra distortion
- if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
- (this_rd < best_intra_rd) )
+#if CONFIG_TEMPORAL_DENOISING
+ if (cpi->oxcf.noise_sensitivity)
{
- best_intra_rd = this_rd;
- *returnintra = distortion2 ;
+ // Store the best NEWMV in x for later use in the denoiser.
+ // We are restricted to the LAST_FRAME since the denoiser only keeps
+ // one filter state.
+ if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
+ x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
+ {
+ x->e_mbd.best_sse_inter_mode = NEWMV;
+ x->e_mbd.best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
+ x->e_mbd.need_to_clamp_best_mvs =
+ x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
+ }
}
+#endif
// Did this mode help.. i.i is it the new best mode
- if (this_rd < best_rd || x->skip)
+ if (this_rd < best_mode.rd || x->skip)
{
// Note index of best mode so far
best_mode_index = mode_index;
-
+ *returnrate = rd.rate2;
+ *returndistortion = rd.distortion2;
if (this_mode <= B_PRED)
{
x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode;
/* required for left and above block mv */
x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
}
-
- other_cost +=
- x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
-
- /* Calculate the final y RD estimate for this mode */
- best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2-rate_uv-other_cost),
- (distortion2-distortion_uv));
-
- *returnrate = rate2;
- *returndistortion = distortion2;
- best_rd = this_rd;
- vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
- vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO));
-
- if ((this_mode == B_PRED) || (this_mode == SPLITMV))
- for (i = 0; i < 16; i++)
- {
- best_bmodes[i] = x->e_mbd.block[i].bmi;
- }
+ update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
// Testing this mode gave rise to an improvement in best error score. Lower threshold a bit for next time
@@ -2359,9 +2436,50 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
// Note how often each mode chosen as best
cpi->mode_chosen_counts[best_mode_index] ++;
+#if CONFIG_TEMPORAL_DENOISING
+ if (cpi->oxcf.noise_sensitivity)
+ {
+ if (x->e_mbd.best_sse_inter_mode == DC_PRED) {
+ // No best MV found.
+ x->e_mbd.best_sse_inter_mode = best_mode.mbmode.mode;
+ x->e_mbd.best_sse_mv = best_mode.mbmode.mv;
+ x->e_mbd.need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs;
+ }
+
+ // TODO(holmer): No SSEs are calculated in rdopt.c. What else can be used?
+ vp8_denoiser_denoise_mb(&cpi->denoiser, x, 0, 0,
+ recon_yoffset, recon_uvoffset);
+ // Reevalute ZEROMV if the current mode is INTRA.
+ if (best_mode.mbmode.ref_frame == INTRA_FRAME)
+ {
+ int this_rd = INT_MAX;
+ int disable_skip = 0;
+ int other_cost = 0;
+ vpx_memset(&rd, 0, sizeof(rd));
+ x->e_mbd.mode_info_context->mbmi.ref_frame = LAST_FRAME;
+ rd.rate2 += x->ref_frame_cost[LAST_FRAME];
+ rd.rate2 += vp8_cost_mv_ref(ZEROMV, mdcounts);
+ x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
+ x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
+ x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
+ this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
+ this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost,
+ disable_skip, uv_intra_tteob,
+ intra_rd_penalty, cpi, x);
+ if (this_rd < best_mode.rd || x->skip)
+ {
+ // Note index of best mode so far
+ best_mode_index = mode_index;
+ *returnrate = rd.rate2;
+ *returndistortion = rd.distortion2;
+ update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
+ }
+ }
+ }
+#endif
if (cpi->is_src_frame_alt_ref &&
- (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME))
+ (best_mode.mbmode.mode != ZEROMV || best_mode.mbmode.ref_frame != ALTREF_FRAME))
{
x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
x->e_mbd.mode_info_context->mbmi.ref_frame = ALTREF_FRAME;
@@ -2370,26 +2488,25 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
x->e_mbd.mode_info_context->mbmi.mb_skip_coeff =
(cpi->common.mb_no_coeff_skip);
x->e_mbd.mode_info_context->mbmi.partitioning = 0;
-
return;
}
// macroblock modes
- vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+ vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode, sizeof(MB_MODE_INFO));
- if (best_mbmode.mode == B_PRED)
+ if (best_mode.mbmode.mode == B_PRED)
{
for (i = 0; i < 16; i++)
- xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode;
+ xd->mode_info_context->bmi[i].as_mode = best_mode.bmodes[i].as_mode;
}
- if (best_mbmode.mode == SPLITMV)
+ if (best_mode.mbmode.mode == SPLITMV)
{
for (i = 0; i < 16; i++)
- xd->mode_info_context->bmi[i].mv.as_int = best_bmodes[i].mv.as_int;
+ xd->mode_info_context->bmi[i].mv.as_int = best_mode.bmodes[i].mv.as_int;
- vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
+ vpx_memcpy(x->partition_info, &best_mode.partition, sizeof(PARTITION_INFO));
x->e_mbd.mode_info_context->mbmi.mv.as_int =
x->partition_info->bmi[15].mv.as_int;
diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
index 709f6e2b4..7e7def462 100644
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -525,7 +525,8 @@ void vp8_temporal_filter_prepare_c
{
int which_buffer = start_frame - frame;
struct lookahead_entry* buf = vp8_lookahead_peek(cpi->lookahead,
- which_buffer);
+ which_buffer,
+ PEEK_FORWARD);
cpi->frames[frames_to_blur-1-frame] = &buf->img;
}
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index 967b6026a..efe2b4826 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -482,7 +482,7 @@ void vp8_tokenize_initialize()
}
-static __inline void stuff2nd_order_b
+static void stuff2nd_order_b
(
TOKENEXTRA **tp,
ENTROPY_CONTEXT *a,
@@ -506,7 +506,7 @@ static __inline void stuff2nd_order_b
}
-static __inline void stuff1st_order_b
+static void stuff1st_order_b
(
TOKENEXTRA **tp,
ENTROPY_CONTEXT *a,
@@ -530,7 +530,7 @@ static __inline void stuff1st_order_b
*a = *l = pt;
}
-static __inline
+static
void stuff1st_order_buv
(
TOKENEXTRA **tp,
diff --git a/vp8/encoder/treewriter.h b/vp8/encoder/treewriter.h
index 0aa19431c..48574f33c 100644
--- a/vp8/encoder/treewriter.h
+++ b/vp8/encoder/treewriter.h
@@ -42,7 +42,7 @@ typedef BOOL_CODER vp8_writer;
/* Both of these return bits, not scaled bits. */
-static __inline unsigned int vp8_cost_branch(const unsigned int ct[2], vp8_prob p)
+static unsigned int vp8_cost_branch(const unsigned int ct[2], vp8_prob p)
{
/* Imitate existing calculation */
@@ -53,7 +53,7 @@ static __inline unsigned int vp8_cost_branch(const unsigned int ct[2], vp8_prob
/* Small functions to write explicit values and tokens, as well as
estimate their lengths. */
-static __inline void vp8_treed_write
+static void vp8_treed_write
(
vp8_writer *const w,
vp8_tree t,
@@ -72,7 +72,7 @@ static __inline void vp8_treed_write
}
while (n);
}
-static __inline void vp8_write_token
+static void vp8_write_token
(
vp8_writer *const w,
vp8_tree t,
@@ -83,7 +83,7 @@ static __inline void vp8_write_token
vp8_treed_write(w, t, p, x->value, x->Len);
}
-static __inline int vp8_treed_cost(
+static int vp8_treed_cost(
vp8_tree t,
const vp8_prob *const p,
int v,
@@ -103,7 +103,7 @@ static __inline int vp8_treed_cost(
return c;
}
-static __inline int vp8_cost_token
+static int vp8_cost_token
(
vp8_tree t,
const vp8_prob *const p,
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index f68d007c1..3403557e9 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -79,6 +79,7 @@ VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.c
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.h
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp8_asm_stubs.c
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/loopfilter_x86.c
+VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/mfqe.c
VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.h
VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.c
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/dequantize_mmx.asm
@@ -112,6 +113,7 @@ VP8_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/sad_sse4.asm
ifeq ($(CONFIG_POSTPROC),yes)
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/postproc_x86.c
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/postproc_mmx.asm
+VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/mfqe_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm
endif
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index fa78ec31c..683194a1d 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -183,14 +183,20 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK_BOOL(vp8_cfg, enable_auto_alt_ref);
RANGE_CHECK(vp8_cfg, cpu_used, -16, 16);
-
+#if CONFIG_TEMPORAL_DENOISING
+ RANGE_CHECK(vp8_cfg, noise_sensitivity, 0, 1);
+#endif
#if !(CONFIG_REALTIME_ONLY)
RANGE_CHECK(vp8_cfg, encoding_mode, VP8_BEST_QUALITY_ENCODING, VP8_REAL_TIME_ENCODING);
+#if !(CONFIG_TEMPORAL_DENOISING)
RANGE_CHECK_HI(vp8_cfg, noise_sensitivity, 6);
+#endif
#else
RANGE_CHECK(vp8_cfg, encoding_mode, VP8_REAL_TIME_ENCODING, VP8_REAL_TIME_ENCODING);
+#if !(CONFIG_TEMPORAL_DENOISING)
RANGE_CHECK(vp8_cfg, noise_sensitivity, 0, 0);
#endif
+#endif
RANGE_CHECK(vp8_cfg, token_partitions, VP8_ONE_TOKENPARTITION, VP8_EIGHT_TOKENPARTITION);
RANGE_CHECK_HI(vp8_cfg, Sharpness, 7);
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index aad847a72..2e940d787 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -47,6 +47,8 @@ VP8_CX_SRCS-yes += encoder/firstpass.c
VP8_CX_SRCS-yes += encoder/block.h
VP8_CX_SRCS-yes += encoder/boolhuff.h
VP8_CX_SRCS-yes += encoder/bitstream.h
+VP8_CX_SRCS-$(CONFIG_TEMPORAL_DENOISING) += encoder/denoising.h
+VP8_CX_SRCS-$(CONFIG_TEMPORAL_DENOISING) += encoder/denoising.c
VP8_CX_SRCS-yes += encoder/encodeintra.h
VP8_CX_SRCS-yes += encoder/encodemb.h
VP8_CX_SRCS-yes += encoder/encodemv.h