diff options
Diffstat (limited to 'vp8')
30 files changed, 1601 insertions, 808 deletions
diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c index b606aaca0..919ef499a 100644 --- a/vp8/common/alloccommon.c +++ b/vp8/common/alloccommon.c @@ -37,14 +37,15 @@ static void update_mode_info_border(MODE_INFO *mi, int rows, int cols) void vp8_de_alloc_frame_buffers(VP8_COMMON *oci) { int i; - for (i = 0; i < NUM_YV12_BUFFERS; i++) vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]); vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame); +#if CONFIG_POSTPROC vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer); if (oci->post_proc_buffer_int_used) vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int); +#endif vpx_free(oci->above_context); vpx_free(oci->mip); @@ -97,6 +98,7 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) return 1; } +#if CONFIG_POSTPROC if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0) { vp8_de_alloc_frame_buffers(oci); @@ -104,6 +106,9 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) } oci->post_proc_buffer_int_used = 0; + vpx_memset(&oci->postproc_state, 0, sizeof(oci->postproc_state)); + vpx_memset((&oci->post_proc_buffer)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size); +#endif oci->mb_rows = height >> 4; oci->mb_cols = width >> 4; diff --git a/vp8/common/asm_com_offsets.c b/vp8/common/asm_com_offsets.c index 5cf151980..ae22b5f6b 100644 --- a/vp8/common/asm_com_offsets.c +++ b/vp8/common/asm_com_offsets.c @@ -15,6 +15,10 @@ #include "vpx_scale/yv12config.h" #include "vp8/common/blockd.h" +#if CONFIG_POSTPROC +#include "postproc.h" +#endif /* CONFIG_POSTPROC */ + BEGIN /* vpx_scale */ @@ -30,6 +34,11 @@ DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_b DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border)); DEFINE(VP8BORDERINPIXELS_VAL, VP8BORDERINPIXELS); +#if CONFIG_POSTPROC +/* mfqe.c / filter_by_weight */ +DEFINE(MFQE_PRECISION_VAL, MFQE_PRECISION); +#endif /* CONFIG_POSTPROC */ + END /* add asserts for any offset that is not supported by assembly code */ @@ -53,3 +62,10 @@ ct_assert(B_HU_PRED, B_HU_PRED == 9); /* vp8_yv12_extend_frame_borders_neon makes several assumptions based on this */ ct_assert(VP8BORDERINPIXELS_VAL, VP8BORDERINPIXELS == 32) #endif + +#if HAVE_SSE2 +#if CONFIG_POSTPROC +/* vp8_filter_by_weight16x16 and 8x8 */ +ct_assert(MFQE_PRECISION_VAL, MFQE_PRECISION == 4) +#endif /* CONFIG_POSTPROC */ +#endif /* HAVE_SSE2 */ diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h index c8d1bab7d..692f0ebd2 100644 --- a/vp8/common/blockd.h +++ b/vp8/common/blockd.h @@ -215,6 +215,12 @@ typedef struct macroblockd MODE_INFO *mode_info_context; int mode_info_stride; +#if CONFIG_TEMPORAL_DENOISING + MB_PREDICTION_MODE best_sse_inter_mode; + int_mv best_sse_mv; + unsigned char need_to_clamp_best_mvs; +#endif + FRAME_TYPE frame_type; int up_available; diff --git a/vp8/common/loopfilter_filters.c b/vp8/common/loopfilter_filters.c index 60a7ff262..8235f6e9f 100644 --- a/vp8/common/loopfilter_filters.c +++ b/vp8/common/loopfilter_filters.c @@ -15,7 +15,7 @@ typedef unsigned char uc; -static __inline signed char vp8_signed_char_clamp(int t) +static signed char vp8_signed_char_clamp(int t) { t = (t < -128 ? -128 : t); t = (t > 127 ? 127 : t); @@ -24,9 +24,9 @@ static __inline signed char vp8_signed_char_clamp(int t) /* should we apply any filter at all ( 11111111 yes, 00000000 no) */ -static __inline signed char vp8_filter_mask(uc limit, uc blimit, - uc p3, uc p2, uc p1, uc p0, - uc q0, uc q1, uc q2, uc q3) +static signed char vp8_filter_mask(uc limit, uc blimit, + uc p3, uc p2, uc p1, uc p0, + uc q0, uc q1, uc q2, uc q3) { signed char mask = 0; mask |= (abs(p3 - p2) > limit); @@ -40,7 +40,7 @@ static __inline signed char vp8_filter_mask(uc limit, uc blimit, } /* is there high variance internal edge ( 11111111 yes, 00000000 no) */ -static __inline signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1) +static signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1) { signed char hev = 0; hev |= (abs(p1 - p0) > thresh) * -1; @@ -48,7 +48,7 @@ static __inline signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1) return hev; } -static __inline void vp8_filter(signed char mask, uc hev, uc *op1, +static void vp8_filter(signed char mask, uc hev, uc *op1, uc *op0, uc *oq0, uc *oq1) { @@ -158,7 +158,7 @@ void vp8_loop_filter_vertical_edge_c while (++i < count * 8); } -static __inline void vp8_mbfilter(signed char mask, uc hev, +static void vp8_mbfilter(signed char mask, uc hev, uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2) { signed char s, u; @@ -279,7 +279,7 @@ void vp8_mbloop_filter_vertical_edge_c } /* should we apply any filter at all ( 11111111 yes, 00000000 no) */ -static __inline signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1) +static signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1) { /* Why does this cause problems for win32? * error C2143: syntax error : missing ';' before 'type' @@ -289,7 +289,7 @@ static __inline signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q return mask; } -static __inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1) +static void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1) { signed char vp8_filter, Filter1, Filter2; signed char p1 = (signed char) * op1 ^ 0x80; diff --git a/vp8/common/mfqe.c b/vp8/common/mfqe.c new file mode 100644 index 000000000..84e336915 --- /dev/null +++ b/vp8/common/mfqe.c @@ -0,0 +1,271 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* MFQE: Multiframe Quality Enhancement + * In rate limited situations keyframes may cause significant visual artifacts + * commonly referred to as "popping." This file implements a postproccesing + * algorithm which blends data from the preceeding frame when there is no + * motion and the q from the previous frame is lower which indicates that it is + * higher quality. + */ + +#include "postproc.h" +#include "variance.h" +#include "vpx_mem/vpx_mem.h" +#include "vpx_rtcd.h" +#include "vpx_scale/yv12config.h" + +#include <limits.h> +#include <stdlib.h> + + +static void filter_by_weight(unsigned char *src, int src_stride, + unsigned char *dst, int dst_stride, + int block_size, int src_weight) +{ + int dst_weight = (1 << MFQE_PRECISION) - src_weight; + int rounding_bit = 1 << (MFQE_PRECISION - 1); + int r, c; + + for (r = 0; r < block_size; r++) + { + for (c = 0; c < block_size; c++) + { + dst[c] = (src[c] * src_weight + + dst[c] * dst_weight + + rounding_bit) >> MFQE_PRECISION; + } + src += src_stride; + dst += dst_stride; + } +} + +void vp8_filter_by_weight16x16_c(unsigned char *src, int src_stride, + unsigned char *dst, int dst_stride, + int src_weight) +{ + filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight); +} + +void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, + unsigned char *dst, int dst_stride, + int src_weight) +{ + filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight); +} + +void vp8_filter_by_weight4x4_c(unsigned char *src, int src_stride, + unsigned char *dst, int dst_stride, + int src_weight) +{ + filter_by_weight(src, src_stride, dst, dst_stride, 4, src_weight); +} + +static void apply_ifactor(unsigned char *y_src, + int y_src_stride, + unsigned char *y_dst, + int y_dst_stride, + unsigned char *u_src, + unsigned char *v_src, + int uv_src_stride, + unsigned char *u_dst, + unsigned char *v_dst, + int uv_dst_stride, + int block_size, + int src_weight) +{ + if (block_size == 16) + { + vp8_filter_by_weight16x16(y_src, y_src_stride, y_dst, y_dst_stride, src_weight); + vp8_filter_by_weight8x8(u_src, uv_src_stride, u_dst, uv_dst_stride, src_weight); + vp8_filter_by_weight8x8(v_src, uv_src_stride, v_dst, uv_dst_stride, src_weight); + } + else /* if (block_size == 8) */ + { + vp8_filter_by_weight8x8(y_src, y_src_stride, y_dst, y_dst_stride, src_weight); + vp8_filter_by_weight4x4(u_src, uv_src_stride, u_dst, uv_dst_stride, src_weight); + vp8_filter_by_weight4x4(v_src, uv_src_stride, v_dst, uv_dst_stride, src_weight); + } +} + +static void multiframe_quality_enhance_block +( + int blksize, /* Currently only values supported are 16 and 8 */ + int qcurr, + int qprev, + unsigned char *y, + unsigned char *u, + unsigned char *v, + int y_stride, + int uv_stride, + unsigned char *yd, + unsigned char *ud, + unsigned char *vd, + int yd_stride, + int uvd_stride +) +{ + static const unsigned char VP8_ZEROS[16]= + { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + }; + + int uvblksize = blksize >> 1; + int qdiff = qcurr - qprev; + + int i; + unsigned char *up; + unsigned char *udp; + unsigned char *vp; + unsigned char *vdp; + + unsigned int act, sad, thr, sse; + + if (blksize == 16) + { + act = (vp8_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8; + sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, INT_MAX)+128)>>8; + } + else /* if (blksize == 8) */ + { + act = (vp8_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6; + sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, INT_MAX)+32)>>6; + } + + /* thr = qdiff/8 + log2(act) + log4(qprev) */ + thr = (qdiff>>3); + while (act>>=1) thr++; + while (qprev>>=2) thr++; + + if (sad < thr) + { + int ifactor = (sad << MFQE_PRECISION) / thr; + ifactor >>= (qdiff >> 5); + + if (ifactor) + { + apply_ifactor(y, y_stride, yd, yd_stride, + u, v, uv_stride, + ud, vd, uvd_stride, + blksize, ifactor); + } + /* else implicitly copy from previous frame */ + } + else + { + if (blksize == 16) + { + vp8_copy_mem16x16(y, y_stride, yd, yd_stride); + vp8_copy_mem8x8(u, uv_stride, ud, uvd_stride); + vp8_copy_mem8x8(v, uv_stride, vd, uvd_stride); + } + else /* if (blksize == 8) */ + { + vp8_copy_mem8x8(y, y_stride, yd, yd_stride); + for (up = u, udp = ud, i = 0; i < uvblksize; ++i, up += uv_stride, udp += uvd_stride) + vpx_memcpy(udp, up, uvblksize); + for (vp = v, vdp = vd, i = 0; i < uvblksize; ++i, vp += uv_stride, vdp += uvd_stride) + vpx_memcpy(vdp, vp, uvblksize); + } + } +} + +void vp8_multiframe_quality_enhance +( + VP8_COMMON *cm +) +{ + YV12_BUFFER_CONFIG *show = cm->frame_to_show; + YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer; + + FRAME_TYPE frame_type = cm->frame_type; + /* Point at base of Mb MODE_INFO list has motion vectors etc */ + const MODE_INFO *mode_info_context = cm->mi; + int mb_row; + int mb_col; + int qcurr = cm->base_qindex; + int qprev = cm->postproc_state.last_base_qindex; + + unsigned char *y_ptr, *u_ptr, *v_ptr; + unsigned char *yd_ptr, *ud_ptr, *vd_ptr; + + /* Set up the buffer pointers */ + y_ptr = show->y_buffer; + u_ptr = show->u_buffer; + v_ptr = show->v_buffer; + yd_ptr = dest->y_buffer; + ud_ptr = dest->u_buffer; + vd_ptr = dest->v_buffer; + + /* postprocess each macro block */ + for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) + { + for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) + { + /* if motion is high there will likely be no benefit */ + if (((frame_type == INTER_FRAME && + abs(mode_info_context->mbmi.mv.as_mv.row) <= 10 && + abs(mode_info_context->mbmi.mv.as_mv.col) <= 10) || + (frame_type == KEY_FRAME))) + { + if (mode_info_context->mbmi.mode == B_PRED || mode_info_context->mbmi.mode == SPLITMV) + { + int i, j; + for (i=0; i<2; ++i) + for (j=0; j<2; ++j) + multiframe_quality_enhance_block(8, qcurr, qprev, + y_ptr + 8*(i*show->y_stride+j), + u_ptr + 4*(i*show->uv_stride+j), + v_ptr + 4*(i*show->uv_stride+j), + show->y_stride, + show->uv_stride, + yd_ptr + 8*(i*dest->y_stride+j), + ud_ptr + 4*(i*dest->uv_stride+j), + vd_ptr + 4*(i*dest->uv_stride+j), + dest->y_stride, + dest->uv_stride); + } + else + { + multiframe_quality_enhance_block(16, qcurr, qprev, y_ptr, + u_ptr, v_ptr, + show->y_stride, + show->uv_stride, + yd_ptr, ud_ptr, vd_ptr, + dest->y_stride, + dest->uv_stride); + } + } + else + { + vp8_copy_mem16x16(y_ptr, show->y_stride, yd_ptr, dest->y_stride); + vp8_copy_mem8x8(u_ptr, show->uv_stride, ud_ptr, dest->uv_stride); + vp8_copy_mem8x8(v_ptr, show->uv_stride, vd_ptr, dest->uv_stride); + } + y_ptr += 16; + u_ptr += 8; + v_ptr += 8; + yd_ptr += 16; + ud_ptr += 8; + vd_ptr += 8; + mode_info_context++; /* step to next MB */ + } + + y_ptr += show->y_stride * 16 - 16 * cm->mb_cols; + u_ptr += show->uv_stride * 8 - 8 * cm->mb_cols; + v_ptr += show->uv_stride * 8 - 8 * cm->mb_cols; + yd_ptr += dest->y_stride * 16 - 16 * cm->mb_cols; + ud_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols; + vd_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols; + + mode_info_context++; /* Skip border mb */ + } +} diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h index eb7d5458d..4c39b49f0 100644 --- a/vp8/common/onyx.h +++ b/vp8/common/onyx.h @@ -72,7 +72,7 @@ extern "C" #include <assert.h> - static __inline void Scale2Ratio(int mode, int *hr, int *hs) + static void Scale2Ratio(int mode, int *hr, int *hs) { switch (mode) { diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h index 84cf3b340..7743ed5c5 100644 --- a/vp8/common/onyxc_int.h +++ b/vp8/common/onyxc_int.h @@ -92,11 +92,13 @@ typedef struct VP8Common int fb_idx_ref_cnt[NUM_YV12_BUFFERS]; int new_fb_idx, lst_fb_idx, gld_fb_idx, alt_fb_idx; - YV12_BUFFER_CONFIG post_proc_buffer; YV12_BUFFER_CONFIG temp_scale_frame; +#if CONFIG_POSTPROC + YV12_BUFFER_CONFIG post_proc_buffer; YV12_BUFFER_CONFIG post_proc_buffer_int; int post_proc_buffer_int_used; +#endif FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */ FRAME_TYPE frame_type; diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c index 280ce0294..50ed54309 100644 --- a/vp8/common/postproc.c +++ b/vp8/common/postproc.c @@ -17,7 +17,6 @@ #include "vpx_scale/yv12extend.h" #include "vpx_scale/vpxscale.h" #include "systemdependent.h" -#include "variance.h" #include <limits.h> #include <math.h> @@ -30,7 +29,6 @@ ( (0.439*(float)(t>>16)) - (0.368*(float)(t>>8&0xff)) - (0.071*(float)(t&0xff)) + 128) /* global constants */ -#define MFQE_PRECISION 4 #if CONFIG_POSTPROC_VISUALIZER static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] = { @@ -362,6 +360,7 @@ void vp8_deblock(YV12_BUFFER_CONFIG *source, vp8_post_proc_down_and_across(source->v_buffer, post->v_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl); } +#if !(CONFIG_TEMPORAL_DENOISING) void vp8_de_noise(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *post, int q, @@ -398,6 +397,7 @@ void vp8_de_noise(YV12_BUFFER_CONFIG *source, source->uv_width - 4, ppl); } +#endif double vp8_gaussian(double sigma, double mu, double x) { @@ -693,214 +693,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei } } - -static void multiframe_quality_enhance_block -( - int blksize, /* Currently only values supported are 16, 8, 4 */ - int qcurr, - int qprev, - unsigned char *y, - unsigned char *u, - unsigned char *v, - int y_stride, - int uv_stride, - unsigned char *yd, - unsigned char *ud, - unsigned char *vd, - int yd_stride, - int uvd_stride -) -{ - static const unsigned char VP8_ZEROS[16]= - { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 - }; - int blksizeby2 = blksize >> 1; - int qdiff = qcurr - qprev; - - int i, j; - unsigned char *yp; - unsigned char *ydp; - unsigned char *up; - unsigned char *udp; - unsigned char *vp; - unsigned char *vdp; - - unsigned int act, sse, sad, thr; - if (blksize == 16) - { - act = (vp8_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8; - sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, INT_MAX)+128)>>8; - } - else if (blksize == 8) - { - act = (vp8_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6; - sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, INT_MAX)+32)>>6; - } - else - { - act = (vp8_variance4x4(yd, yd_stride, VP8_ZEROS, 0, &sse)+8)>>4; - sad = (vp8_sad4x4(y, y_stride, yd, yd_stride, INT_MAX)+8)>>4; - } - /* thr = qdiff/8 + log2(act) + log4(qprev) */ - thr = (qdiff>>3); - while (act>>=1) thr++; - while (qprev>>=2) thr++; - if (sad < thr) - { - static const int roundoff = (1 << (MFQE_PRECISION - 1)); - int ifactor = (sad << MFQE_PRECISION) / thr; - ifactor >>= (qdiff >> 5); - // TODO: SIMD optimize this section - if (ifactor) - { - int icfactor = (1 << MFQE_PRECISION) - ifactor; - for (yp = y, ydp = yd, i = 0; i < blksize; ++i, yp += y_stride, ydp += yd_stride) - { - for (j = 0; j < blksize; ++j) - ydp[j] = (int)((yp[j] * ifactor + ydp[j] * icfactor + roundoff) >> MFQE_PRECISION); - } - for (up = u, udp = ud, i = 0; i < blksizeby2; ++i, up += uv_stride, udp += uvd_stride) - { - for (j = 0; j < blksizeby2; ++j) - udp[j] = (int)((up[j] * ifactor + udp[j] * icfactor + roundoff) >> MFQE_PRECISION); - } - for (vp = v, vdp = vd, i = 0; i < blksizeby2; ++i, vp += uv_stride, vdp += uvd_stride) - { - for (j = 0; j < blksizeby2; ++j) - vdp[j] = (int)((vp[j] * ifactor + vdp[j] * icfactor + roundoff) >> MFQE_PRECISION); - } - } - } - else - { - if (blksize == 16) - { - vp8_copy_mem16x16(y, y_stride, yd, yd_stride); - vp8_copy_mem8x8(u, uv_stride, ud, uvd_stride); - vp8_copy_mem8x8(v, uv_stride, vd, uvd_stride); - } - else if (blksize == 8) - { - vp8_copy_mem8x8(y, y_stride, yd, yd_stride); - for (up = u, udp = ud, i = 0; i < blksizeby2; ++i, up += uv_stride, udp += uvd_stride) - vpx_memcpy(udp, up, blksizeby2); - for (vp = v, vdp = vd, i = 0; i < blksizeby2; ++i, vp += uv_stride, vdp += uvd_stride) - vpx_memcpy(vdp, vp, blksizeby2); - } - else - { - for (yp = y, ydp = yd, i = 0; i < blksize; ++i, yp += y_stride, ydp += yd_stride) - vpx_memcpy(ydp, yp, blksize); - for (up = u, udp = ud, i = 0; i < blksizeby2; ++i, up += uv_stride, udp += uvd_stride) - vpx_memcpy(udp, up, blksizeby2); - for (vp = v, vdp = vd, i = 0; i < blksizeby2; ++i, vp += uv_stride, vdp += uvd_stride) - vpx_memcpy(vdp, vp, blksizeby2); - } - } -} - -void vp8_multiframe_quality_enhance -( - VP8_COMMON *cm -) -{ - YV12_BUFFER_CONFIG *show = cm->frame_to_show; - YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer; - - FRAME_TYPE frame_type = cm->frame_type; - /* Point at base of Mb MODE_INFO list has motion vectors etc */ - const MODE_INFO *mode_info_context = cm->mi; - int mb_row; - int mb_col; - int qcurr = cm->base_qindex; - int qprev = cm->postproc_state.last_base_qindex; - - unsigned char *y_ptr, *u_ptr, *v_ptr; - unsigned char *yd_ptr, *ud_ptr, *vd_ptr; - - /* Set up the buffer pointers */ - y_ptr = show->y_buffer; - u_ptr = show->u_buffer; - v_ptr = show->v_buffer; - yd_ptr = dest->y_buffer; - ud_ptr = dest->u_buffer; - vd_ptr = dest->v_buffer; - - /* postprocess each macro block */ - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) - { - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) - { - /* if motion is high there will likely be no benefit */ - if (((frame_type == INTER_FRAME && - abs(mode_info_context->mbmi.mv.as_mv.row) <= 10 && - abs(mode_info_context->mbmi.mv.as_mv.col) <= 10) || - (frame_type == KEY_FRAME))) - { - if (mode_info_context->mbmi.mode == B_PRED || mode_info_context->mbmi.mode == SPLITMV) - { - int i, j; - for (i=0; i<2; ++i) - for (j=0; j<2; ++j) - multiframe_quality_enhance_block(8, - qcurr, - qprev, - y_ptr + 8*(i*show->y_stride+j), - u_ptr + 4*(i*show->uv_stride+j), - v_ptr + 4*(i*show->uv_stride+j), - show->y_stride, - show->uv_stride, - yd_ptr + 8*(i*dest->y_stride+j), - ud_ptr + 4*(i*dest->uv_stride+j), - vd_ptr + 4*(i*dest->uv_stride+j), - dest->y_stride, - dest->uv_stride); - } - else - { - multiframe_quality_enhance_block(16, - qcurr, - qprev, - y_ptr, - u_ptr, - v_ptr, - show->y_stride, - show->uv_stride, - yd_ptr, - ud_ptr, - vd_ptr, - dest->y_stride, - dest->uv_stride); - - } - } - else - { - vp8_copy_mem16x16(y_ptr, show->y_stride, yd_ptr, dest->y_stride); - vp8_copy_mem8x8(u_ptr, show->uv_stride, ud_ptr, dest->uv_stride); - vp8_copy_mem8x8(v_ptr, show->uv_stride, vd_ptr, dest->uv_stride); - } - y_ptr += 16; - u_ptr += 8; - v_ptr += 8; - yd_ptr += 16; - ud_ptr += 8; - vd_ptr += 8; - mode_info_context++; /* step to next MB */ - } - - y_ptr += show->y_stride * 16 - 16 * cm->mb_cols; - u_ptr += show->uv_stride * 8 - 8 * cm->mb_cols; - v_ptr += show->uv_stride * 8 - 8 * cm->mb_cols; - yd_ptr += dest->y_stride * 16 - 16 * cm->mb_cols; - ud_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols; - vd_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols; - - mode_info_context++; /* Skip border mb */ - } -} - +#if CONFIG_POSTPROC int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *ppflags) { int q = oci->filter_level * 10 / 6; @@ -923,6 +716,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t dest->y_height = oci->Height; dest->uv_height = dest->y_height / 2; oci->postproc_state.last_base_qindex = oci->base_qindex; + oci->postproc_state.last_frame_valid = 1; return 0; } @@ -943,7 +737,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t // insure that postproc is set to all 0's so that post proc // doesn't pull random data in from edge - vpx_memset((&oci->post_proc_buffer_int)->buffer_alloc,126,(&oci->post_proc_buffer)->frame_size); + vpx_memset((&oci->post_proc_buffer_int)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size); } } @@ -953,6 +747,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t #endif if ((flags & VP8D_MFQE) && + oci->postproc_state.last_frame_valid && oci->current_video_frame >= 2 && oci->base_qindex - oci->postproc_state.last_base_qindex >= 10) { @@ -992,6 +787,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t vp8_yv12_copy_frame_ptr(oci->frame_to_show, &oci->post_proc_buffer); oci->postproc_state.last_base_qindex = oci->base_qindex; } + oci->postproc_state.last_frame_valid = 1; if (flags & VP8D_ADDNOISE) { @@ -1378,3 +1174,4 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t dest->uv_height = dest->y_height / 2; return 0; } +#endif diff --git a/vp8/common/postproc.h b/vp8/common/postproc.h index 1db74379f..6ac788cbd 100644 --- a/vp8/common/postproc.h +++ b/vp8/common/postproc.h @@ -19,6 +19,7 @@ struct postproc_state int last_noise; char noise[3072]; int last_base_qindex; + int last_frame_valid; DECLARE_ALIGNED(16, char, blackclamp[16]); DECLARE_ALIGNED(16, char, whiteclamp[16]); DECLARE_ALIGNED(16, char, bothclamp[16]); @@ -40,4 +41,8 @@ void vp8_deblock(YV12_BUFFER_CONFIG *source, int q, int low_var_thresh, int flag); + +#define MFQE_PRECISION 4 + +void vp8_multiframe_quality_enhance(struct VP8Common *cm); #endif diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh index ff8e30c3f..0fdb4fa00 100644 --- a/vp8/common/rtcd_defs.sh +++ b/vp8/common/rtcd_defs.sh @@ -166,6 +166,15 @@ if [ "$CONFIG_POSTPROC" = "yes" ]; then prototype void vp8_blend_b "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride" # no asm yet + + prototype void vp8_filter_by_weight16x16 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight" + specialize vp8_filter_by_weight16x16 sse2 + + prototype void vp8_filter_by_weight8x8 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight" + specialize vp8_filter_by_weight8x8 sse2 + + prototype void vp8_filter_by_weight4x4 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight" + # no asm yet fi # diff --git a/vp8/common/sad_c.c b/vp8/common/sad_c.c index f745bbd3d..6a3e889b1 100644 --- a/vp8/common/sad_c.c +++ b/vp8/common/sad_c.c @@ -13,7 +13,7 @@ #include "vpx_config.h" #include "vpx/vpx_integer.h" -static __inline +static unsigned int sad_mx_n_c( const unsigned char *src_ptr, int src_stride, diff --git a/vp8/common/x86/mfqe_sse2.asm b/vp8/common/x86/mfqe_sse2.asm new file mode 100644 index 000000000..10d21f320 --- /dev/null +++ b/vp8/common/x86/mfqe_sse2.asm @@ -0,0 +1,281 @@ +; +; Copyright (c) 2012 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +;void vp8_filter_by_weight16x16_sse2 +;( +; unsigned char *src, +; int src_stride, +; unsigned char *dst, +; int dst_stride, +; int src_weight +;) +global sym(vp8_filter_by_weight16x16_sse2) +sym(vp8_filter_by_weight16x16_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + SAVE_XMM 6 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + movd xmm0, arg(4) ; src_weight + pshuflw xmm0, xmm0, 0x0 ; replicate to all low words + punpcklqdq xmm0, xmm0 ; replicate to all hi words + + movdqa xmm1, [GLOBAL(tMFQE)] + psubw xmm1, xmm0 ; dst_weight + + mov rax, arg(0) ; src + mov rsi, arg(1) ; src_stride + mov rdx, arg(2) ; dst + mov rdi, arg(3) ; dst_stride + + mov rcx, 16 ; loop count + pxor xmm6, xmm6 + +.combine + movdqa xmm2, [rax] + movdqa xmm4, [rdx] + add rax, rsi + + ; src * src_weight + movdqa xmm3, xmm2 + punpcklbw xmm2, xmm6 + punpckhbw xmm3, xmm6 + pmullw xmm2, xmm0 + pmullw xmm3, xmm0 + + ; dst * dst_weight + movdqa xmm5, xmm4 + punpcklbw xmm4, xmm6 + punpckhbw xmm5, xmm6 + pmullw xmm4, xmm1 + pmullw xmm5, xmm1 + + ; sum, round and shift + paddw xmm2, xmm4 + paddw xmm3, xmm5 + paddw xmm2, [GLOBAL(tMFQE_round)] + paddw xmm3, [GLOBAL(tMFQE_round)] + psrlw xmm2, 4 + psrlw xmm3, 4 + + packuswb xmm2, xmm3 + movdqa [rdx], xmm2 + add rdx, rdi + + dec rcx + jnz .combine + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + + ret + +;void vp8_filter_by_weight8x8_sse2 +;( +; unsigned char *src, +; int src_stride, +; unsigned char *dst, +; int dst_stride, +; int src_weight +;) +global sym(vp8_filter_by_weight8x8_sse2) +sym(vp8_filter_by_weight8x8_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + movd xmm0, arg(4) ; src_weight + pshuflw xmm0, xmm0, 0x0 ; replicate to all low words + punpcklqdq xmm0, xmm0 ; replicate to all hi words + + movdqa xmm1, [GLOBAL(tMFQE)] + psubw xmm1, xmm0 ; dst_weight + + mov rax, arg(0) ; src + mov rsi, arg(1) ; src_stride + mov rdx, arg(2) ; dst + mov rdi, arg(3) ; dst_stride + + mov rcx, 8 ; loop count + pxor xmm4, xmm4 + +.combine + movq xmm2, [rax] + movq xmm3, [rdx] + add rax, rsi + + ; src * src_weight + punpcklbw xmm2, xmm4 + pmullw xmm2, xmm0 + + ; dst * dst_weight + punpcklbw xmm3, xmm4 + pmullw xmm3, xmm1 + + ; sum, round and shift + paddw xmm2, xmm3 + paddw xmm2, [GLOBAL(tMFQE_round)] + psrlw xmm2, 4 + + packuswb xmm2, xmm4 + movq [rdx], xmm2 + add rdx, rdi + + dec rcx + jnz .combine + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + + ret + +;void vp8_variance_and_sad_16x16_sse2 | arg +;( +; unsigned char *src1, 0 +; int stride1, 1 +; unsigned char *src2, 2 +; int stride2, 3 +; unsigned int *variance, 4 +; unsigned int *sad, 5 +;) +global sym(vp8_variance_and_sad_16x16_sse2) +sym(vp8_variance_and_sad_16x16_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + mov rax, arg(0) ; src1 + mov rcx, arg(1) ; stride1 + mov rdx, arg(2) ; src2 + mov rdi, arg(3) ; stride2 + + mov rsi, 16 ; block height + + ; Prep accumulator registers + pxor xmm3, xmm3 ; SAD + pxor xmm4, xmm4 ; sum of src2 + pxor xmm5, xmm5 ; sum of src2^2 + + ; Because we're working with the actual output frames + ; we can't depend on any kind of data alignment. +.accumulate + movdqa xmm0, [rax] ; src1 + movdqa xmm1, [rdx] ; src2 + add rax, rcx ; src1 + stride1 + add rdx, rdi ; src2 + stride2 + + ; SAD(src1, src2) + psadbw xmm0, xmm1 + paddusw xmm3, xmm0 + + ; SUM(src2) + pxor xmm2, xmm2 + psadbw xmm2, xmm1 ; sum src2 by misusing SAD against 0 + paddusw xmm4, xmm2 + + ; pmaddubsw would be ideal if it took two unsigned values. instead, + ; it expects a signed and an unsigned value. so instead we zero extend + ; and operate on words. + pxor xmm2, xmm2 + movdqa xmm0, xmm1 + punpcklbw xmm0, xmm2 + punpckhbw xmm1, xmm2 + pmaddwd xmm0, xmm0 + pmaddwd xmm1, xmm1 + paddd xmm5, xmm0 + paddd xmm5, xmm1 + + sub rsi, 1 + jnz .accumulate + + ; phaddd only operates on adjacent double words. + ; Finalize SAD and store + movdqa xmm0, xmm3 + psrldq xmm0, 8 + paddusw xmm0, xmm3 + paddd xmm0, [GLOBAL(t128)] + psrld xmm0, 8 + + mov rax, arg(5) + movd [rax], xmm0 + + ; Accumulate sum of src2 + movdqa xmm0, xmm4 + psrldq xmm0, 8 + paddusw xmm0, xmm4 + ; Square src2. Ignore high value + pmuludq xmm0, xmm0 + psrld xmm0, 8 + + ; phaddw could be used to sum adjacent values but we want + ; all the values summed. promote to doubles, accumulate, + ; shift and sum + pxor xmm2, xmm2 + movdqa xmm1, xmm5 + punpckldq xmm1, xmm2 + punpckhdq xmm5, xmm2 + paddd xmm1, xmm5 + movdqa xmm2, xmm1 + psrldq xmm1, 8 + paddd xmm1, xmm2 + + psubd xmm1, xmm0 + + ; (variance + 128) >> 8 + paddd xmm1, [GLOBAL(t128)] + psrld xmm1, 8 + mov rax, arg(4) + + movd [rax], xmm1 + + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +SECTION_RODATA +align 16 +t128: + ddq 128 +align 16 +tMFQE: ; 1 << MFQE_PRECISION + times 8 dw 0x10 +align 16 +tMFQE_round: ; 1 << (MFQE_PRECISION - 1) + times 8 dw 0x08 + diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c index ba94c58bb..c5752ee0b 100644 --- a/vp8/decoder/detokenize.c +++ b/vp8/decoder/detokenize.c @@ -15,58 +15,6 @@ #include "vpx_ports/mem.h" #include "detokenize.h" -#define BOOL_DATA unsigned char - -#define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES -DECLARE_ALIGNED(16, static const unsigned char, coef_bands_x[16]) = -{ - 0 * OCB_X, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, - 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, - 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, - 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X -}; -#define EOB_CONTEXT_NODE 0 -#define ZERO_CONTEXT_NODE 1 -#define ONE_CONTEXT_NODE 2 -#define LOW_VAL_CONTEXT_NODE 3 -#define TWO_CONTEXT_NODE 4 -#define THREE_CONTEXT_NODE 5 -#define HIGH_LOW_CONTEXT_NODE 6 -#define CAT_ONE_CONTEXT_NODE 7 -#define CAT_THREEFOUR_CONTEXT_NODE 8 -#define CAT_THREE_CONTEXT_NODE 9 -#define CAT_FIVE_CONTEXT_NODE 10 - -#define CAT1_MIN_VAL 5 -#define CAT2_MIN_VAL 7 -#define CAT3_MIN_VAL 11 -#define CAT4_MIN_VAL 19 -#define CAT5_MIN_VAL 35 -#define CAT6_MIN_VAL 67 - -#define CAT1_PROB0 159 -#define CAT2_PROB0 145 -#define CAT2_PROB1 165 - -#define CAT3_PROB0 140 -#define CAT3_PROB1 148 -#define CAT3_PROB2 173 - -#define CAT4_PROB0 135 -#define CAT4_PROB1 140 -#define CAT4_PROB2 155 -#define CAT4_PROB3 176 - -#define CAT5_PROB0 130 -#define CAT5_PROB1 134 -#define CAT5_PROB2 141 -#define CAT5_PROB3 157 -#define CAT5_PROB4 180 - -static const unsigned char cat6_prob[12] = -{ 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0 }; - - void vp8_reset_mb_tokens_context(MACROBLOCKD *x) { /* Clear entropy contexts for Y2 blocks */ @@ -83,302 +31,216 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x) } } -DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]); -#define FILL \ - if(count < 0) \ - VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend); +/* + ------------------------------------------------------------------------------ + Residual decoding (Paragraph 13.2 / 13.3) +*/ +static const uint8_t kBands[16 + 1] = { + 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, + 0 /* extra entry as sentinel */ +}; -#define NORMALIZE \ - /*if(range < 0x80)*/ \ - { \ - shift = vp8_norm[range]; \ - range <<= shift; \ - value <<= shift; \ - count -= shift; \ - } +static const uint8_t kCat3[] = { 173, 148, 140, 0 }; +static const uint8_t kCat4[] = { 176, 155, 140, 135, 0 }; +static const uint8_t kCat5[] = { 180, 157, 141, 134, 130, 0 }; +static const uint8_t kCat6[] = + { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 }; +static const uint8_t* const kCat3456[] = { kCat3, kCat4, kCat5, kCat6 }; +static const uint8_t kZigzag[16] = { + 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 +}; -#define DECODE_AND_APPLYSIGN(value_to_sign) \ - split = (range + 1) >> 1; \ - bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \ - FILL \ - if ( value < bigsplit ) \ - { \ - range = split; \ - v= value_to_sign; \ - } \ - else \ - { \ - range = range-split; \ - value = value-bigsplit; \ - v = -value_to_sign; \ - } \ - range +=range; \ - value +=value; \ - count--; +#define VP8GetBit vp8dx_decode_bool +#define NUM_PROBAS 11 +#define NUM_CTX 3 -#define DECODE_AND_BRANCH_IF_ZERO(probability,branch) \ - { \ - split = 1 + ((( probability*(range-1) ) )>> 8); \ - bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \ - FILL \ - if ( value < bigsplit ) \ - { \ - range = split; \ - NORMALIZE \ - goto branch; \ - } \ - value -= bigsplit; \ - range = range - split; \ - NORMALIZE \ - } +typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS]; // for const-casting -#define DECODE_AND_LOOP_IF_ZERO(probability,branch) \ - { \ - split = 1 + ((( probability*(range-1) ) ) >> 8); \ - bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \ - FILL \ - if ( value < bigsplit ) \ - { \ - range = split; \ - NORMALIZE \ - Prob = coef_probs; \ - if(c<15) {\ - ++c; \ - Prob += coef_bands_x[c]; \ - goto branch; \ - } goto BLOCK_FINISHED; /*for malformed input */\ - } \ - value -= bigsplit; \ - range = range - split; \ - NORMALIZE \ - } +static int GetSigned(BOOL_DECODER *br, int value_to_sign) +{ + int split = (br->range + 1) >> 1; + VP8_BD_VALUE bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); + int v; -#define DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val) \ - DECODE_AND_APPLYSIGN(val) \ - Prob = coef_probs + (ENTROPY_NODES*2); \ - if(c < 15){\ - qcoeff_ptr [ scan[c] ] = (int16_t) v; \ - ++c; \ - goto DO_WHILE; }\ - qcoeff_ptr [ 15 ] = (int16_t) v; \ - goto BLOCK_FINISHED; + if(br->count < 0) + vp8dx_bool_decoder_fill(br); + if ( br->value < bigsplit ) + { + br->range = split; + v= value_to_sign; + } + else + { + br->range = br->range-split; + br->value = br->value-bigsplit; + v = -value_to_sign; + } + br->range +=br->range; + br->value +=br->value; + br->count--; -#define DECODE_EXTRABIT_AND_ADJUST_VAL(prob, bits_count)\ - split = 1 + (((range-1) * prob) >> 8); \ - bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \ - FILL \ - if(value >= bigsplit)\ - {\ - range = range-split;\ - value = value-bigsplit;\ - val += ((uint16_t)1<<bits_count);\ - }\ - else\ - {\ - range = split;\ - }\ - NORMALIZE + return v; +} +/* + Returns the position of the last non-zero coeff plus one + (and 0 if there's no coeff at all) +*/ +static int GetCoeffs(BOOL_DECODER *br, ProbaArray prob, + int ctx, int n, int16_t* out) +{ + const uint8_t* p = prob[n][ctx]; + if (!VP8GetBit(br, p[0])) + { /* first EOB is more a 'CBP' bit. */ + return 0; + } + while (1) + { + ++n; + if (!VP8GetBit(br, p[1])) + { + p = prob[kBands[n]][0]; + } + else + { /* non zero coeff */ + int v, j; + if (!VP8GetBit(br, p[2])) + { + p = prob[kBands[n]][1]; + v = 1; + } + else + { + if (!VP8GetBit(br, p[3])) + { + if (!VP8GetBit(br, p[4])) + { + v = 2; + } + else + { + v = 3 + VP8GetBit(br, p[5]); + } + } + else + { + if (!VP8GetBit(br, p[6])) + { + if (!VP8GetBit(br, p[7])) + { + v = 5 + VP8GetBit(br, 159); + } else + { + v = 7 + 2 * VP8GetBit(br, 165); + v += VP8GetBit(br, 145); + } + } + else + { + const uint8_t* tab; + const int bit1 = VP8GetBit(br, p[8]); + const int bit0 = VP8GetBit(br, p[9 + bit1]); + const int cat = 2 * bit1 + bit0; + v = 0; + for (tab = kCat3456[cat]; *tab; ++tab) + { + v += v + VP8GetBit(br, *tab); + } + v += 3 + (8 << cat); + } + } + p = prob[kBands[n]][2]; + } + j = kZigzag[n - 1]; + + out[j] = GetSigned(br, v); + + if (n == 16 || !VP8GetBit(br, p[0])) + { /* EOB */ + return n; + } + } + if (n == 16) + { + return 16; + } + } +} int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x) { - ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context; - ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)x->left_context; - const FRAME_CONTEXT * const fc = &dx->common.fc; - BOOL_DECODER *bc = x->current_bc; - + const FRAME_CONTEXT * const fc = &dx->common.fc; char *eobs = x->eobs; - ENTROPY_CONTEXT *a; - ENTROPY_CONTEXT *l; int i; - + int nonzeros; int eobtotal = 0; - register int count; - - const BOOL_DATA *bufptr; - const BOOL_DATA *bufend; - register unsigned int range; - VP8_BD_VALUE value; - const int *scan; - register unsigned int shift; - unsigned int split; - VP8_BD_VALUE bigsplit; short *qcoeff_ptr; + ProbaArray coef_probs; + ENTROPY_CONTEXT *a_ctx = ((ENTROPY_CONTEXT *)x->above_context); + ENTROPY_CONTEXT *l_ctx = ((ENTROPY_CONTEXT *)x->left_context); + ENTROPY_CONTEXT *a; + ENTROPY_CONTEXT *l; + int skip_dc = 0; - const vp8_prob *coef_probs; - int stop; - int val, bits_count; - int c; - int v; - const vp8_prob *Prob; - int start_coeff; - - - i = 0; - stop = 16; - - scan = vp8_default_zig_zag1d; qcoeff_ptr = &x->qcoeff[0]; - coef_probs = fc->coef_probs [3] [ 0 ] [0]; if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV) { - i = 24; - stop = 24; - qcoeff_ptr += 24*16; - eobtotal -= 16; - coef_probs = fc->coef_probs [1] [ 0 ] [0]; - } - - bufend = bc->user_buffer_end; - bufptr = bc->user_buffer; - value = bc->value; - count = bc->count; - range = bc->range; - - start_coeff = 0; - -BLOCK_LOOP: - a = A + vp8_block2above[i]; - l = L + vp8_block2left[i]; - - c = start_coeff; + a = a_ctx + 8; + l = l_ctx + 8; - VP8_COMBINEENTROPYCONTEXTS(v, *a, *l); + coef_probs = fc->coef_probs [1]; - Prob = coef_probs; - Prob += v * ENTROPY_NODES; - *a = *l = 0; + nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), 0, qcoeff_ptr + 24 * 16); + *a = *l = (nonzeros > 0); -DO_WHILE: - Prob += coef_bands_x[c]; - DECODE_AND_BRANCH_IF_ZERO(Prob[EOB_CONTEXT_NODE], BLOCK_FINISHED); - *a = *l = 1; + eobs[24] = nonzeros; + eobtotal += nonzeros - 16; -CHECK_0_: - DECODE_AND_LOOP_IF_ZERO(Prob[ZERO_CONTEXT_NODE], CHECK_0_); - DECODE_AND_BRANCH_IF_ZERO(Prob[ONE_CONTEXT_NODE], ONE_CONTEXT_NODE_0_); - DECODE_AND_BRANCH_IF_ZERO(Prob[LOW_VAL_CONTEXT_NODE], - LOW_VAL_CONTEXT_NODE_0_); - DECODE_AND_BRANCH_IF_ZERO(Prob[HIGH_LOW_CONTEXT_NODE], - HIGH_LOW_CONTEXT_NODE_0_); - DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREEFOUR_CONTEXT_NODE], - CAT_THREEFOUR_CONTEXT_NODE_0_); - DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_FIVE_CONTEXT_NODE], - CAT_FIVE_CONTEXT_NODE_0_); - - val = CAT6_MIN_VAL; - bits_count = 10; - - do + coef_probs = fc->coef_probs [0]; + skip_dc = 1; + } + else { - DECODE_EXTRABIT_AND_ADJUST_VAL(cat6_prob[bits_count], bits_count); - bits_count -- ; + coef_probs = fc->coef_probs [3]; + skip_dc = 0; } - while (bits_count >= 0); - DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val); - -CAT_FIVE_CONTEXT_NODE_0_: - val = CAT5_MIN_VAL; - DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB4, 4); - DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB3, 3); - DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB2, 2); - DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB1, 1); - DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB0, 0); - DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val); - -CAT_THREEFOUR_CONTEXT_NODE_0_: - DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREE_CONTEXT_NODE], - CAT_THREE_CONTEXT_NODE_0_); - val = CAT4_MIN_VAL; - DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB3, 3); - DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB2, 2); - DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB1, 1); - DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB0, 0); - DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val); - -CAT_THREE_CONTEXT_NODE_0_: - val = CAT3_MIN_VAL; - DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB2, 2); - DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB1, 1); - DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB0, 0); - DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val); - -HIGH_LOW_CONTEXT_NODE_0_: - DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_ONE_CONTEXT_NODE], - CAT_ONE_CONTEXT_NODE_0_); - - val = CAT2_MIN_VAL; - DECODE_EXTRABIT_AND_ADJUST_VAL(CAT2_PROB1, 1); - DECODE_EXTRABIT_AND_ADJUST_VAL(CAT2_PROB0, 0); - DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val); - -CAT_ONE_CONTEXT_NODE_0_: - val = CAT1_MIN_VAL; - DECODE_EXTRABIT_AND_ADJUST_VAL(CAT1_PROB0, 0); - DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val); - -LOW_VAL_CONTEXT_NODE_0_: - DECODE_AND_BRANCH_IF_ZERO(Prob[TWO_CONTEXT_NODE], TWO_CONTEXT_NODE_0_); - DECODE_AND_BRANCH_IF_ZERO(Prob[THREE_CONTEXT_NODE], THREE_CONTEXT_NODE_0_); - DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(4); - -THREE_CONTEXT_NODE_0_: - DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(3); - -TWO_CONTEXT_NODE_0_: - DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(2); - -ONE_CONTEXT_NODE_0_: - DECODE_AND_APPLYSIGN(1); - Prob = coef_probs + ENTROPY_NODES; - - if (c < 15) + for (i = 0; i < 16; ++i) { - qcoeff_ptr [ scan[c] ] = (int16_t) v; - ++c; - goto DO_WHILE; - } + a = a_ctx + (i&3); + l = l_ctx + ((i&0xc)>>2); - qcoeff_ptr [ 15 ] = (int16_t) v; -BLOCK_FINISHED: - eobs[i] = c; - eobtotal += c; - qcoeff_ptr += 16; + nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), skip_dc, qcoeff_ptr); + *a = *l = (nonzeros > 0); - i++; + nonzeros += skip_dc; + eobs[i] = nonzeros; + eobtotal += nonzeros; + qcoeff_ptr += 16; + } - if (i < stop) - goto BLOCK_LOOP; + coef_probs = fc->coef_probs [2]; - if (i == 25) + a_ctx += 4; + l_ctx += 4; + for (i = 16; i < 24; ++i) { - start_coeff = 1; - i = 0; - stop = 16; - coef_probs = fc->coef_probs [0] [ 0 ] [0]; - qcoeff_ptr -= (24*16 + 16); - goto BLOCK_LOOP; - } + a = a_ctx + ((i > 19)<<1) + (i&1); + l = l_ctx + ((i > 19)<<1) + ((i&3)>1); - if (i == 16) - { - start_coeff = 0; - coef_probs = fc->coef_probs [2] [ 0 ] [0]; - stop = 24; - goto BLOCK_LOOP; + nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), 0, qcoeff_ptr); + *a = *l = (nonzeros > 0); + + eobs[i] = nonzeros; + eobtotal += nonzeros; + qcoeff_ptr += 16; } - FILL - bc->user_buffer = bufptr; - bc->value = value; - bc->count = count; - bc->range = range; return eobtotal; - } + diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c new file mode 100644 index 000000000..d487065c0 --- /dev/null +++ b/vp8/encoder/denoising.c @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "denoising.h" + +#include "vp8/common/reconinter.h" +#include "vpx/vpx_integer.h" +#include "vpx_mem/vpx_mem.h" +#include "vpx_rtcd.h" + +const unsigned int NOISE_MOTION_THRESHOLD = 20*20; +const unsigned int NOISE_DIFF2_THRESHOLD = 75; +// SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming var(noise) ~= 100. +const unsigned int SSE_DIFF_THRESHOLD = 16*16*20; +const unsigned int SSE_THRESHOLD = 16*16*40; + +static uint8_t blend(uint8_t state, uint8_t sample, uint8_t factor_q8) +{ + return (uint8_t)( + (((uint16_t)factor_q8 * ((uint16_t)state) + // Q8 + (uint16_t)(256 - factor_q8) * ((uint16_t)sample)) + 128) // Q8 + >> 8); +} + +static unsigned int denoiser_motion_compensate(YV12_BUFFER_CONFIG* src, + YV12_BUFFER_CONFIG* dst, + MACROBLOCK* x, + unsigned int best_sse, + unsigned int zero_mv_sse, + int recon_yoffset, + int recon_uvoffset) +{ + MACROBLOCKD filter_xd = x->e_mbd; + int mv_col; + int mv_row; + int sse_diff = zero_mv_sse - best_sse; + // Compensate the running average. + filter_xd.pre.y_buffer = src->y_buffer + recon_yoffset; + filter_xd.pre.u_buffer = src->u_buffer + recon_uvoffset; + filter_xd.pre.v_buffer = src->v_buffer + recon_uvoffset; + // Write the compensated running average to the destination buffer. + filter_xd.dst.y_buffer = dst->y_buffer + recon_yoffset; + filter_xd.dst.u_buffer = dst->u_buffer + recon_uvoffset; + filter_xd.dst.v_buffer = dst->v_buffer + recon_uvoffset; + // Use the best MV for the compensation. + filter_xd.mode_info_context->mbmi.ref_frame = LAST_FRAME; + filter_xd.mode_info_context->mbmi.mode = filter_xd.best_sse_inter_mode; + filter_xd.mode_info_context->mbmi.mv = filter_xd.best_sse_mv; + filter_xd.mode_info_context->mbmi.need_to_clamp_mvs = + filter_xd.need_to_clamp_best_mvs; + mv_col = filter_xd.best_sse_mv.as_mv.col; + mv_row = filter_xd.best_sse_mv.as_mv.row; + if (filter_xd.mode_info_context->mbmi.mode <= B_PRED || + (mv_row*mv_row + mv_col*mv_col <= NOISE_MOTION_THRESHOLD && + sse_diff < SSE_DIFF_THRESHOLD)) + { + // Handle intra blocks as referring to last frame with zero motion and + // let the absolute pixel difference affect the filter factor. + // Also consider small amount of motion as being random walk due to noise, + // if it doesn't mean that we get a much bigger error. + // Note that any changes to the mode info only affects the denoising. + filter_xd.mode_info_context->mbmi.ref_frame = LAST_FRAME; + filter_xd.mode_info_context->mbmi.mode = ZEROMV; + filter_xd.mode_info_context->mbmi.mv.as_int = 0; + x->e_mbd.best_sse_inter_mode = ZEROMV; + x->e_mbd.best_sse_mv.as_int = 0; + best_sse = zero_mv_sse; + } + if (!x->skip) + { + vp8_build_inter_predictors_mb(&filter_xd); + } + else + { + vp8_build_inter16x16_predictors_mb(&filter_xd, + filter_xd.dst.y_buffer, + filter_xd.dst.u_buffer, + filter_xd.dst.v_buffer, + filter_xd.dst.y_stride, + filter_xd.dst.uv_stride); + } + return best_sse; +} + +static void denoiser_filter(YV12_BUFFER_CONFIG* mc_running_avg, + YV12_BUFFER_CONFIG* running_avg, + MACROBLOCK* signal, + unsigned int motion_magnitude2, + int y_offset, + int uv_offset) +{ + unsigned char* sig = signal->thismb; + int sig_stride = 16; + unsigned char* mc_running_avg_y = mc_running_avg->y_buffer + y_offset; + int mc_avg_y_stride = mc_running_avg->y_stride; + unsigned char* running_avg_y = running_avg->y_buffer + y_offset; + int avg_y_stride = running_avg->y_stride; + int r, c; + for (r = 0; r < 16; r++) + { + for (c = 0; c < 16; c++) + { + int diff; + int absdiff = 0; + unsigned int filter_coefficient; + absdiff = sig[c] - mc_running_avg_y[c]; + absdiff = absdiff > 0 ? absdiff : -absdiff; + assert(absdiff >= 0 && absdiff < 256); + filter_coefficient = (255 << 8) / (256 + ((absdiff * 330) >> 3)); + // Allow some additional filtering of static blocks, or blocks with very + // small motion vectors. + filter_coefficient += filter_coefficient / (3 + (motion_magnitude2 >> 3)); + filter_coefficient = filter_coefficient > 255 ? 255 : filter_coefficient; + + running_avg_y[c] = blend(mc_running_avg_y[c], sig[c], filter_coefficient); + diff = sig[c] - running_avg_y[c]; + + if (diff * diff < NOISE_DIFF2_THRESHOLD) + { + // Replace with mean to suppress the noise. + sig[c] = running_avg_y[c]; + } + else + { + // Replace the filter state with the signal since the change in this + // pixel isn't classified as noise. + running_avg_y[c] = sig[c]; + } + } + sig += sig_stride; + mc_running_avg_y += mc_avg_y_stride; + running_avg_y += avg_y_stride; + } +} + +int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height) +{ + assert(denoiser); + denoiser->yv12_running_avg.flags = 0; + if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_running_avg), width, + height, VP8BORDERINPIXELS) < 0) + { + vp8_denoiser_free(denoiser); + return 1; + } + denoiser->yv12_mc_running_avg.flags = 0; + if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_mc_running_avg), width, + height, VP8BORDERINPIXELS) < 0) + { + vp8_denoiser_free(denoiser); + return 1; + } + vpx_memset(denoiser->yv12_running_avg.buffer_alloc, 0, + denoiser->yv12_running_avg.frame_size); + vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0, + denoiser->yv12_mc_running_avg.frame_size); + return 0; +} + +void vp8_denoiser_free(VP8_DENOISER *denoiser) +{ + assert(denoiser); + vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_running_avg); + vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_mc_running_avg); +} + +void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, + MACROBLOCK *x, + unsigned int best_sse, + unsigned int zero_mv_sse, + int recon_yoffset, + int recon_uvoffset) { + int mv_row; + int mv_col; + unsigned int motion_magnitude2; + // Motion compensate the running average. + best_sse = denoiser_motion_compensate(&denoiser->yv12_running_avg, + &denoiser->yv12_mc_running_avg, + x, + best_sse, + zero_mv_sse, + recon_yoffset, + recon_uvoffset); + + mv_row = x->e_mbd.best_sse_mv.as_mv.row; + mv_col = x->e_mbd.best_sse_mv.as_mv.col; + motion_magnitude2 = mv_row*mv_row + mv_col*mv_col; + if (best_sse > SSE_THRESHOLD || + motion_magnitude2 > 8 * NOISE_MOTION_THRESHOLD) + { + // No filtering of this block since it differs too much from the predictor, + // or the motion vector magnitude is considered too big. + vp8_copy_mem16x16(x->thismb, 16, + denoiser->yv12_running_avg.y_buffer + recon_yoffset, + denoiser->yv12_running_avg.y_stride); + return; + } + // Filter. + denoiser_filter(&denoiser->yv12_mc_running_avg, + &denoiser->yv12_running_avg, + x, + motion_magnitude2, + recon_yoffset, + recon_uvoffset); +} diff --git a/vp8/encoder/denoising.h b/vp8/encoder/denoising.h new file mode 100644 index 000000000..343531bb1 --- /dev/null +++ b/vp8/encoder/denoising.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP8_ENCODER_DENOISING_H_ +#define VP8_ENCODER_DENOISING_H_ + +#include "block.h" + +typedef struct vp8_denoiser +{ + YV12_BUFFER_CONFIG yv12_running_avg; + YV12_BUFFER_CONFIG yv12_mc_running_avg; +} VP8_DENOISER; + +int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height); + +void vp8_denoiser_free(VP8_DENOISER *denoiser); + +void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, + MACROBLOCK *x, + unsigned int best_sse, + unsigned int zero_mv_sse, + int recon_yoffset, + int recon_uvoffset); + +#endif // VP8_ENCODER_DENOISING_H_ diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index 21757f8f0..962a719c8 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -1179,6 +1179,13 @@ int vp8cx_encode_inter_macroblock else x->encode_breakout = cpi->oxcf.encode_breakout; +#if CONFIG_TEMPORAL_DENOISING + // Reset the best sse mode/mv for each macroblock. + x->e_mbd.best_sse_inter_mode = 0; + x->e_mbd.best_sse_mv.as_int = 0; + x->e_mbd.need_to_clamp_best_mvs = 0; +#endif + if (cpi->sf.RD) { int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled; diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c index c122d038d..0145f6d20 100644 --- a/vp8/encoder/encodemv.c +++ b/vp8/encoder/encodemv.c @@ -186,7 +186,7 @@ void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int m #define MV_PROB_UPDATE_CORRECTION -1 -__inline static void calc_prob(vp8_prob *p, const unsigned int ct[2]) +static void calc_prob(vp8_prob *p, const unsigned int ct[2]) { const unsigned int tot = ct[0] + ct[1]; diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c index cb8fd3e89..ac83622d5 100644 --- a/vp8/encoder/firstpass.c +++ b/vp8/encoder/firstpass.c @@ -387,7 +387,11 @@ void vp8_end_first_pass(VP8_COMP *cpi) output_stats(cpi, cpi->output_pkt_list, &cpi->twopass.total_stats); } -static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG * recon_buffer, int * best_motion_err, int recon_yoffset ) +static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, + YV12_BUFFER_CONFIG * raw_buffer, + int * raw_motion_err, + YV12_BUFFER_CONFIG * recon_buffer, + int * best_motion_err, int recon_yoffset) { MACROBLOCKD * const xd = & x->e_mbd; BLOCK *b = &x->block[0]; @@ -395,15 +399,22 @@ static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG unsigned char *src_ptr = (*(b->base_src) + b->src); int src_stride = b->src_stride; + unsigned char *raw_ptr; + int raw_stride = raw_buffer->y_stride; unsigned char *ref_ptr; int ref_stride = x->e_mbd.pre.y_stride; + // Set up pointers for this macro block raw buffer + raw_ptr = (unsigned char *)(raw_buffer->y_buffer + recon_yoffset + + d->offset); + vp8_mse16x16 ( src_ptr, src_stride, raw_ptr, raw_stride, + (unsigned int *)(raw_motion_err)); + // Set up pointers for this macro block recon buffer xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; - ref_ptr = (unsigned char *)(xd->pre.y_buffer + d->offset ); - - vp8_mse16x16 ( src_ptr, src_stride, ref_ptr, ref_stride, (unsigned int *)(best_motion_err)); + vp8_mse16x16 ( src_ptr, src_stride, ref_ptr, ref_stride, + (unsigned int *)(best_motion_err)); } static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, @@ -595,12 +606,18 @@ void vp8_first_pass(VP8_COMP *cpi) MV tmp_mv = {0, 0}; int tmp_err; int motion_error = INT_MAX; + int raw_motion_error = INT_MAX; // Simple 0,0 motion with no mv overhead - zz_motion_search( cpi, x, lst_yv12, &motion_error, recon_yoffset ); + zz_motion_search( cpi, x, cpi->last_frame_unscaled_source, + &raw_motion_error, lst_yv12, &motion_error, + recon_yoffset ); d->bmi.mv.as_mv.row = 0; d->bmi.mv.as_mv.col = 0; + if (raw_motion_error < cpi->oxcf.encode_breakout) + goto skip_motion_search; + // Test last reference frame using the previous best mv as the // starting point (best reference) for the search first_pass_motion_search(cpi, x, &best_ref_mv, @@ -648,6 +665,7 @@ void vp8_first_pass(VP8_COMP *cpi) xd->pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset; } +skip_motion_search: /* Intra assumed best */ best_ref_mv.as_int = 0; diff --git a/vp8/encoder/lookahead.c b/vp8/encoder/lookahead.c index 3e582e369..4c9228186 100644 --- a/vp8/encoder/lookahead.c +++ b/vp8/encoder/lookahead.c @@ -73,6 +73,9 @@ vp8_lookahead_init(unsigned int width, else if(depth > MAX_LAG_BUFFERS) depth = MAX_LAG_BUFFERS; + /* Keep last frame in lookahead buffer by increasing depth by 1.*/ + depth += 1; + /* Align the buffer dimensions */ width = (width + 15) & ~15; height = (height + 15) & ~15; @@ -110,7 +113,7 @@ vp8_lookahead_push(struct lookahead_ctx *ctx, int mb_rows = (src->y_height + 15) >> 4; int mb_cols = (src->y_width + 15) >> 4; - if(ctx->sz + 1 > ctx->max_sz) + if(ctx->sz + 2 > ctx->max_sz) return 1; ctx->sz++; buf = pop(ctx, &ctx->write_idx); @@ -177,7 +180,7 @@ vp8_lookahead_pop(struct lookahead_ctx *ctx, { struct lookahead_entry* buf = NULL; - if(ctx->sz && (drain || ctx->sz == ctx->max_sz)) + if(ctx->sz && (drain || ctx->sz == ctx->max_sz - 1)) { buf = pop(ctx, &ctx->read_idx); ctx->sz--; @@ -188,18 +191,33 @@ vp8_lookahead_pop(struct lookahead_ctx *ctx, struct lookahead_entry* vp8_lookahead_peek(struct lookahead_ctx *ctx, - unsigned int index) + unsigned int index, + int direction) { struct lookahead_entry* buf = NULL; - assert(index < ctx->max_sz); - if(index < ctx->sz) + if (direction == PEEK_FORWARD) + { + assert(index < ctx->max_sz - 1); + if(index < ctx->sz) + { + index += ctx->read_idx; + if(index >= ctx->max_sz) + index -= ctx->max_sz; + buf = ctx->buf + index; + } + } + else if (direction == PEEK_BACKWARD) { - index += ctx->read_idx; - if(index >= ctx->max_sz) - index -= ctx->max_sz; + assert(index == 1); + + if(ctx->read_idx == 0) + index = ctx->max_sz - 1; + else + index = ctx->read_idx - index; buf = ctx->buf + index; } + return buf; } diff --git a/vp8/encoder/lookahead.h b/vp8/encoder/lookahead.h index 32bafcd63..cf56b75b7 100644 --- a/vp8/encoder/lookahead.h +++ b/vp8/encoder/lookahead.h @@ -82,6 +82,8 @@ vp8_lookahead_pop(struct lookahead_ctx *ctx, int drain); +#define PEEK_FORWARD 1 +#define PEEK_BACKWARD -1 /**\brief Get a future source buffer to encode * * \param[in] ctx Pointer to the lookahead context @@ -92,7 +94,8 @@ vp8_lookahead_pop(struct lookahead_ctx *ctx, */ struct lookahead_entry* vp8_lookahead_peek(struct lookahead_ctx *ctx, - unsigned int index); + unsigned int index, + int direction); /**\brief Get the number of frames currently in the lookahead queue diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 5ad51e846..cd62c9c17 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -144,7 +144,7 @@ extern void vp8cx_init_quantizer(VP8_COMP *cpi); extern const int vp8cx_base_skip_false_prob[128]; // Tables relating active max Q to active min Q -static const int kf_low_motion_minq[QINDEX_RANGE] = +static const unsigned char kf_low_motion_minq[QINDEX_RANGE] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, @@ -155,7 +155,7 @@ static const int kf_low_motion_minq[QINDEX_RANGE] = 11,11,12,12,13,13,13,13,14,14,15,15,15,15,16,16, 16,16,17,17,18,18,18,18,19,20,20,21,21,22,23,23 }; -static const int kf_high_motion_minq[QINDEX_RANGE] = +static const unsigned char kf_high_motion_minq[QINDEX_RANGE] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, @@ -166,7 +166,7 @@ static const int kf_high_motion_minq[QINDEX_RANGE] = 16,16,17,17,18,18,18,18,19,19,20,20,20,20,21,21, 21,21,22,22,23,23,24,25,25,26,26,27,28,28,29,30 }; -static const int gf_low_motion_minq[QINDEX_RANGE] = +static const unsigned char gf_low_motion_minq[QINDEX_RANGE] = { 0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2, 3,3,3,3,4,4,4,4,5,5,5,5,6,6,6,6, @@ -177,7 +177,7 @@ static const int gf_low_motion_minq[QINDEX_RANGE] = 35,35,36,36,37,37,38,38,39,39,40,40,41,41,42,42, 43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58 }; -static const int gf_mid_motion_minq[QINDEX_RANGE] = +static const unsigned char gf_mid_motion_minq[QINDEX_RANGE] = { 0,0,0,0,1,1,1,1,1,1,2,2,3,3,3,4, 4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9, @@ -188,7 +188,7 @@ static const int gf_mid_motion_minq[QINDEX_RANGE] = 38,39,39,40,40,41,41,42,42,43,43,44,45,46,47,48, 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64 }; -static const int gf_high_motion_minq[QINDEX_RANGE] = +static const unsigned char gf_high_motion_minq[QINDEX_RANGE] = { 0,0,0,0,1,1,1,1,1,2,2,2,3,3,3,4, 4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9, @@ -199,7 +199,7 @@ static const int gf_high_motion_minq[QINDEX_RANGE] = 41,41,42,42,43,44,45,46,47,48,49,50,51,52,53,54, 55,56,57,58,59,60,62,64,66,68,70,72,74,76,78,80 }; -static const int inter_minq[QINDEX_RANGE] = +static const unsigned char inter_minq[QINDEX_RANGE] = { 0,0,1,1,2,3,3,4,4,5,6,6,7,8,8,9, 9,10,11,11,12,13,13,14,15,15,16,17,17,18,19,20, @@ -1107,8 +1107,7 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate scaled source buffer"); - - vpx_free(cpi->tok); + vpx_free(cpi->tok); { #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING @@ -1680,6 +1679,17 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) cpi->alt_ref_source = NULL; cpi->is_src_frame_alt_ref = 0; +#if CONFIG_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity) + { + if (!cpi->denoiser.yv12_mc_running_avg.buffer_alloc) + { + int width = (cpi->oxcf.Width + 15) & ~15; + int height = (cpi->oxcf.Height + 15) & ~15; + vp8_denoiser_allocate(&cpi->denoiser, width, height); + } + } +#endif #if 0 // Experimental RD Code @@ -2314,6 +2324,9 @@ void vp8_remove_compressor(VP8_COMP **ptr) vp8cx_remove_encoder_threads(cpi); #endif +#if CONFIG_TEMPORAL_DENOISING + vp8_denoiser_free(&cpi->denoiser); +#endif dealloc_compressor_data(cpi); vpx_free(cpi->mb.ss); vpx_free(cpi->tok); @@ -2920,7 +2933,6 @@ static void Pass1Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, (void) frame_flags; vp8_set_quantizer(cpi, 26); - scale_and_extend_source(cpi->un_scaled_source, cpi); vp8_first_pass(cpi); } #endif @@ -3133,7 +3145,12 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm) } vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show); - +#if CONFIG_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity) + { + vp8_yv12_extend_frame_borders(&cpi->denoiser.yv12_running_avg); + } +#endif } static void encode_frame_to_data_rate @@ -3589,7 +3606,7 @@ static void encode_frame_to_data_rate scale_and_extend_source(cpi->un_scaled_source, cpi); -#if !(CONFIG_REALTIME_ONLY) && CONFIG_POSTPROC +#if !(CONFIG_REALTIME_ONLY) && CONFIG_POSTPROC && !(CONFIG_TEMPORAL_DENOISING) if (cpi->oxcf.noise_sensitivity > 0) { @@ -4702,7 +4719,8 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l cpi->source_alt_ref_pending) { if ((cpi->source = vp8_lookahead_peek(cpi->lookahead, - cpi->frames_till_gf_update_due))) + cpi->frames_till_gf_update_due, + PEEK_FORWARD))) { cpi->alt_ref_source = cpi->source; if (cpi->oxcf.arnr_max_frames > 0) @@ -4724,6 +4742,15 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l if (!cpi->source) { + /* Read last frame source if we are encoding first pass. */ + if (cpi->pass == 1 && cm->current_video_frame > 0) + { + if((cpi->last_source = vp8_lookahead_peek(cpi->lookahead, 1, + PEEK_BACKWARD)) == NULL) + return -1; + } + + if ((cpi->source = vp8_lookahead_pop(cpi->lookahead, flush))) { cm->show_frame = 1; @@ -4743,6 +4770,11 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l *time_stamp = cpi->source->ts_start; *time_end = cpi->source->ts_end; *frame_flags = cpi->source->flags; + + if (cpi->pass == 1 && cm->current_video_frame > 0) + { + cpi->last_frame_unscaled_source = &cpi->last_source->img; + } } else { @@ -5026,7 +5058,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l double frame_psnr; YV12_BUFFER_CONFIG *orig = cpi->Source; YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show; - YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer; int y_samples = orig->y_height * orig->y_width ; int uv_samples = orig->uv_height * orig->uv_width ; int t_samples = y_samples + 2 * uv_samples; @@ -5050,7 +5081,9 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l cpi->total_v += vp8_mse2psnr(uv_samples, 255.0, ve); cpi->total_sq_error += sq_error; cpi->total += frame_psnr; +#if CONFIG_POSTPROC { + YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer; double frame_psnr2, frame_ssim2 = 0; double weight = 0; @@ -5101,6 +5134,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l } } } +#endif } if (cpi->b_calculate_ssimg) diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index e9e2ee52e..c7a1de8e8 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -28,6 +28,9 @@ #include "mcomp.h" #include "vp8/common/findnearmv.h" #include "lookahead.h" +#if CONFIG_TEMPORAL_DENOISING +#include "vp8/encoder/denoising.h" +#endif //#define SPEEDSTATS 1 #define MIN_GF_INTERVAL 4 @@ -313,10 +316,12 @@ typedef struct VP8_COMP struct lookahead_ctx *lookahead; struct lookahead_entry *source; struct lookahead_entry *alt_ref_source; + struct lookahead_entry *last_source; YV12_BUFFER_CONFIG *Source; YV12_BUFFER_CONFIG *un_scaled_source; YV12_BUFFER_CONFIG scaled_source; + YV12_BUFFER_CONFIG *last_frame_unscaled_source; int source_alt_ref_pending; // frame in src_buffers has been identified to be encoded as an alt ref int source_alt_ref_active; // an alt ref frame has been encoded and is usable @@ -661,6 +666,10 @@ typedef struct VP8_COMP int droppable; +#if CONFIG_TEMPORAL_DENOISING + VP8_DENOISER denoiser; +#endif + // Coding layer state variables unsigned int current_layer; LAYER_CONTEXT layer_context[MAX_LAYERS]; diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c index 65e6c1294..24e041f8d 100644 --- a/vp8/encoder/pickinter.c +++ b/vp8/encoder/pickinter.c @@ -24,6 +24,9 @@ #include "mcomp.h" #include "rdopt.h" #include "vpx_mem/vpx_mem.h" +#if CONFIG_TEMPORAL_DENOISING +#include "denoising.h" +#endif extern int VP8_UVSSE(MACROBLOCK *x); @@ -450,6 +453,48 @@ void get_lower_res_motion_info(VP8_COMP *cpi, MACROBLOCKD *xd, int *dissim, } #endif +static void check_for_encode_breakout(unsigned int sse, MACROBLOCK* x) +{ + if (sse < x->encode_breakout) + { + // Check u and v to make sure skip is ok + int sse2 = 0; + + sse2 = VP8_UVSSE(x); + + if (sse2 * 2 < x->encode_breakout) + x->skip = 1; + else + x->skip = 0; + } +} + +static int evaluate_inter_mode(unsigned int* sse, int rate2, int* distortion2, VP8_COMP *cpi, MACROBLOCK *x) +{ + MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode; + int_mv mv = x->e_mbd.mode_info_context->mbmi.mv; + int this_rd; + /* Exit early and don't compute the distortion if this macroblock + * is marked inactive. */ + if (cpi->active_map_enabled && x->active_ptr[0] == 0) + { + *sse = 0; + *distortion2 = 0; + x->skip = 1; + return INT_MAX; + } + + if((this_mode != NEWMV) || + !(cpi->sf.half_pixel_search) || cpi->common.full_pixel==1) + *distortion2 = get_inter_mbpred_error(x, + &cpi->fn_ptr[BLOCK_16X16], + sse, mv); + + this_rd = RDCOST(x->rdmult, x->rddiv, rate2, *distortion2); + + check_for_encode_breakout(*sse, x); + return this_rd; +} void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, @@ -476,7 +521,10 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int distortion2; int bestsme = INT_MAX; int best_mode_index = 0; - unsigned int sse = INT_MAX, best_sse = INT_MAX; + unsigned int sse = INT_MAX, best_rd_sse = INT_MAX; +#if CONFIG_TEMPORAL_DENOISING + unsigned int zero_mv_sse = 0, best_sse = INT_MAX; +#endif int_mv mvp; @@ -488,9 +536,6 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int ref_frame_map[4]; int sign_bias = 0; - int have_subp_search = cpi->sf.half_pixel_search; /* In real-time mode, - when Speed >= 15, no sub-pixel search. */ - #if CONFIG_MULTI_RES_ENCODING int dissim = INT_MAX; int parent_ref_frame = 0; @@ -657,7 +702,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, { case B_PRED: /* Pass best so far to pick_intra4x4mby_modes to use as breakout */ - distortion2 = best_sse; + distortion2 = best_rd_sse; pick_intra4x4mby_modes(x, &rate, &distortion2); if (distortion2 == INT_MAX) @@ -905,43 +950,38 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, rate2 += vp8_cost_mv_ref(this_mode, mdcounts); x->e_mbd.mode_info_context->mbmi.mv.as_int = mode_mv[this_mode].as_int; - - /* Exit early and don't compute the distortion if this macroblock - * is marked inactive. */ - if (cpi->active_map_enabled && x->active_ptr[0] == 0) - { - sse = 0; - distortion2 = 0; - x->skip = 1; - break; - } - - if((this_mode != NEWMV) || - !(have_subp_search) || cpi->common.full_pixel==1) - distortion2 = get_inter_mbpred_error(x, - &cpi->fn_ptr[BLOCK_16X16], - &sse, mode_mv[this_mode]); - - this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); - - if (sse < x->encode_breakout) - { - // Check u and v to make sure skip is ok - int sse2 = 0; - - sse2 = VP8_UVSSE(x); - - if (sse2 * 2 < x->encode_breakout) - x->skip = 1; - else - x->skip = 0; - } + this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x); break; default: break; } +#if CONFIG_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity) + { + // Store for later use by denoiser. + if (this_mode == ZEROMV && + x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME) + { + zero_mv_sse = sse; + } + + // Store the best NEWMV in x for later use in the denoiser. + // We are restricted to the LAST_FRAME since the denoiser only keeps + // one filter state. + if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && + x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME) + { + best_sse = sse; + x->e_mbd.best_sse_inter_mode = NEWMV; + x->e_mbd.best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv; + x->e_mbd.need_to_clamp_best_mvs = + x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs; + } + } +#endif + if (this_rd < best_rd || x->skip) { // Note index of best mode @@ -949,7 +989,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, *returnrate = rate2; *returndistortion = distortion2; - best_sse = sse; + best_rd_sse = sse; best_rd = this_rd; vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO)); @@ -1011,6 +1051,43 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, cpi->error_bins[this_rdbin] ++; } +#if CONFIG_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity) + { + if (x->e_mbd.best_sse_inter_mode == DC_PRED) { + // No best MV found. + x->e_mbd.best_sse_inter_mode = best_mbmode.mode; + x->e_mbd.best_sse_mv = best_mbmode.mv; + x->e_mbd.need_to_clamp_best_mvs = best_mbmode.need_to_clamp_mvs; + best_sse = best_rd_sse; + } + vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse, + recon_yoffset, recon_uvoffset); + + // Reevaluate ZEROMV after denoising. + if (best_mbmode.ref_frame == INTRA_FRAME) + { + int this_rd = 0; + rate2 = 0; + distortion2 = 0; + x->e_mbd.mode_info_context->mbmi.ref_frame = LAST_FRAME; + rate2 += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; + this_mode = ZEROMV; + rate2 += vp8_cost_mv_ref(this_mode, mdcounts); + x->e_mbd.mode_info_context->mbmi.mode = this_mode; + x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; + x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; + this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x); + + if (this_rd < best_rd || x->skip) + { + vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, + sizeof(MB_MODE_INFO)); + } + } + } +#endif + if (cpi->is_src_frame_alt_ref && (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) { diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index bb2b0ca71..8f575e498 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -33,11 +33,33 @@ #include "rdopt.h" #include "vpx_mem/vpx_mem.h" #include "vp8/common/systemdependent.h" +#if CONFIG_TEMPORAL_DENOISING +#include "denoising.h" +#endif extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x); #define MAXF(a,b) (((a) > (b)) ? (a) : (b)) +typedef struct rate_distortion_struct +{ + int rate2; + int rate_y; + int rate_uv; + int distortion2; + int distortion_uv; +} RATE_DISTORTION; + +typedef struct best_mode_struct +{ + int yrd; + int rd; + int intra_rd; + MB_MODE_INFO mbmode; + union b_mode_info bmodes[16]; + PARTITION_INFO partition; +} BEST_MODE; + static const int auto_speed_thresh[17] = { 1000, @@ -741,7 +763,7 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, return INT_MAX; *Rate = cost; - *rate_y += tot_rate_y; + *rate_y = tot_rate_y; *Distortion = distortion; return RDCOST(mb->rdmult, mb->rddiv, cost, distortion); @@ -1327,7 +1349,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, } } -static __inline +static void vp8_cal_step_param(int sr, int *sp) { int step = 0; @@ -1711,6 +1733,181 @@ static void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv) } } +static int evaluate_inter_mode_rd(int mdcounts[4], + RATE_DISTORTION* rd, + int* disable_skip, + VP8_COMP *cpi, MACROBLOCK *x) +{ + MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode; + BLOCK *b = &x->block[0]; + MACROBLOCKD *xd = &x->e_mbd; + int distortion; + vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16); + + if (cpi->active_map_enabled && x->active_ptr[0] == 0) { + x->skip = 1; + } + else if (x->encode_breakout) + { + unsigned int sse; + unsigned int var; + int threshold = (xd->block[0].dequant[1] + * xd->block[0].dequant[1] >>4); + + if(threshold < x->encode_breakout) + threshold = x->encode_breakout; + + var = vp8_variance16x16 + (*(b->base_src), b->src_stride, + x->e_mbd.predictor, 16, &sse); + + if (sse < threshold) + { + unsigned int q2dc = xd->block[24].dequant[0]; + /* If theres is no codeable 2nd order dc + or a very small uniform pixel change change */ + if ((sse - var < q2dc * q2dc >>4) || + (sse /2 > var && sse-var < 64)) + { + // Check u and v to make sure skip is ok + int sse2= VP8_UVSSE(x); + if (sse2 * 2 < threshold) + { + x->skip = 1; + rd->distortion2 = sse + sse2; + rd->rate2 = 500; + + /* for best_yrd calculation */ + rd->rate_uv = 0; + rd->distortion_uv = sse2; + + *disable_skip = 1; + return RDCOST(x->rdmult, x->rddiv, rd->rate2, + rd->distortion2); + } + } + } + } + + + //intermodecost[mode_index] = vp8_cost_mv_ref(this_mode, mdcounts); // Experimental debug code + + // Add in the Mv/mode cost + rd->rate2 += vp8_cost_mv_ref(this_mode, mdcounts); + + // Y cost and distortion + macro_block_yrd(x, &rd->rate_y, &distortion); + rd->rate2 += rd->rate_y; + rd->distortion2 += distortion; + + // UV cost and distortion + rd_inter16x16_uv(cpi, x, &rd->rate_uv, &rd->distortion_uv, + cpi->common.full_pixel); + rd->rate2 += rd->rate_uv; + rd->distortion2 += rd->distortion_uv; + return INT_MAX; +} + +static int calculate_final_rd_costs(int this_rd, + RATE_DISTORTION* rd, + int* other_cost, + int disable_skip, + int uv_intra_tteob, + int intra_rd_penalty, + VP8_COMP *cpi, MACROBLOCK *x) +{ + MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode; + // Where skip is allowable add in the default per mb cost for the no skip case. + // where we then decide to skip we have to delete this and replace it with the + // cost of signallying a skip + if (cpi->common.mb_no_coeff_skip) + { + *other_cost += vp8_cost_bit(cpi->prob_skip_false, 0); + rd->rate2 += *other_cost; + } + + /* Estimate the reference frame signaling cost and add it + * to the rolling cost variable. + */ + rd->rate2 += + x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; + + if (!disable_skip) + { + // Test for the condition where skip block will be activated because there are no non zero coefficients and make any necessary adjustment for rate + if (cpi->common.mb_no_coeff_skip) + { + int i; + int tteob; + int has_y2_block = (this_mode!=SPLITMV && this_mode!=B_PRED); + + tteob = 0; + if(has_y2_block) + tteob += x->e_mbd.eobs[24]; + + for (i = 0; i < 16; i++) + tteob += (x->e_mbd.eobs[i] > has_y2_block); + + if (x->e_mbd.mode_info_context->mbmi.ref_frame) + { + for (i = 16; i < 24; i++) + tteob += x->e_mbd.eobs[i]; + } + else + tteob += uv_intra_tteob; + + if (tteob == 0) + { + rd->rate2 -= (rd->rate_y + rd->rate_uv); + //for best_yrd calculation + rd->rate_uv = 0; + + // Back out no skip flag costing and add in skip flag costing + if (cpi->prob_skip_false) + { + int prob_skip_cost; + + prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1); + prob_skip_cost -= vp8_cost_bit(cpi->prob_skip_false, 0); + rd->rate2 += prob_skip_cost; + *other_cost += prob_skip_cost; + } + } + } + // Calculate the final RD estimate for this mode + this_rd = RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2); + if (this_rd < INT_MAX && x->e_mbd.mode_info_context->mbmi.ref_frame + == INTRA_FRAME) + this_rd += intra_rd_penalty; + } + return this_rd; +} + +static void update_best_mode(BEST_MODE* best_mode, int this_rd, + RATE_DISTORTION* rd, int other_cost, MACROBLOCK *x) +{ + MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode; + + other_cost += + x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; + + /* Calculate the final y RD estimate for this mode */ + best_mode->yrd = RDCOST(x->rdmult, x->rddiv, (rd->rate2-rd->rate_uv-other_cost), + (rd->distortion2-rd->distortion_uv)); + + best_mode->rd = this_rd; + vpx_memcpy(&best_mode->mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO)); + vpx_memcpy(&best_mode->partition, x->partition_info, sizeof(PARTITION_INFO)); + + if ((this_mode == B_PRED) || (this_mode == SPLITMV)) + { + int i; + for (i = 0; i < 16; i++) + { + best_mode->bmodes[i] = x->e_mbd.block[i].bmi; + } + } +} void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, @@ -1719,9 +1916,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, BLOCK *b = &x->block[0]; BLOCKD *d = &x->e_mbd.block[0]; MACROBLOCKD *xd = &x->e_mbd; - union b_mode_info best_bmodes[16]; - MB_MODE_INFO best_mbmode; - PARTITION_INFO best_partition; int_mv best_ref_mv_sb[2]; int_mv mode_mv_sb[2][MB_MODE_COUNT]; int_mv best_ref_mv; @@ -1729,21 +1923,16 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, MB_PREDICTION_MODE this_mode; int num00; int best_mode_index = 0; + BEST_MODE best_mode; int i; int mode_index; int mdcounts[4]; int rate; - int distortion; - int best_rd = INT_MAX; - int best_intra_rd = INT_MAX; - int rate2, distortion2; + RATE_DISTORTION rd; int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly; int uv_intra_tteob = 0; int uv_intra_done = 0; - int rate_y, UNINITIALIZED_IS_SAFE(rate_uv); - int distortion_uv; - int best_yrd = INT_MAX; MB_PREDICTION_MODE uv_intra_mode = 0; int_mv mvp; @@ -1760,9 +1949,12 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, mode_mv = mode_mv_sb[sign_bias]; best_ref_mv.as_int = 0; + best_mode.rd = INT_MAX; + best_mode.yrd = INT_MAX; + best_mode.intra_rd = INT_MAX; vpx_memset(mode_mv_sb, 0, sizeof(mode_mv_sb)); - vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); - vpx_memset(&best_bmodes, 0, sizeof(best_bmodes)); + vpx_memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode)); + vpx_memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes)); /* Setup search priorities */ get_reference_search_order(cpi, ref_frame_map); @@ -1799,15 +1991,15 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]]; // Test best rd so far against threshold for trying this mode. - if (best_rd <= cpi->rd_threshes[mode_index]) + if (best_mode.rd <= cpi->rd_threshes[mode_index]) continue; if (this_ref_frame < 0) continue; // These variables hold are rolling total cost and distortion for this mode - rate2 = 0; - distortion2 = 0; + rd.rate2 = 0; + rd.distortion2 = 0; this_mode = vp8_mode_order[mode_index]; @@ -1907,16 +2099,17 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int tmp_rd; // Note the rate value returned here includes the cost of coding the BPRED mode : x->mbmode_cost[x->e_mbd.frame_type][BPRED]; - tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion, best_yrd); - rate2 += rate; - distortion2 += distortion; + int distortion; + tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rd.rate_y, &distortion, best_mode.yrd); + rd.rate2 += rate; + rd.distortion2 += distortion; - if(tmp_rd < best_yrd) + if(tmp_rd < best_mode.yrd) { - rate2 += uv_intra_rate; - rate_uv = uv_intra_rate_tokenonly; - distortion2 += uv_intra_distortion; - distortion_uv = uv_intra_distortion; + rd.rate2 += uv_intra_rate; + rd.rate_uv = uv_intra_rate_tokenonly; + rd.distortion2 += uv_intra_distortion; + rd.distortion_uv = uv_intra_distortion; } else { @@ -1930,24 +2123,25 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, { int tmp_rd; int this_rd_thresh; + int distortion; this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1) ? cpi->rd_threshes[THR_NEW1] : cpi->rd_threshes[THR_NEW3]; this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2) ? cpi->rd_threshes[THR_NEW2] : this_rd_thresh; tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, - best_yrd, mdcounts, - &rate, &rate_y, &distortion, this_rd_thresh) ; + best_mode.yrd, mdcounts, + &rate, &rd.rate_y, &distortion, this_rd_thresh) ; - rate2 += rate; - distortion2 += distortion; + rd.rate2 += rate; + rd.distortion2 += distortion; // If even the 'Y' rd value of split is higher than best so far then dont bother looking at UV - if (tmp_rd < best_yrd) + if (tmp_rd < best_mode.yrd) { // Now work out UV cost and add it in - rd_inter4x4_uv(cpi, x, &rate_uv, &distortion_uv, cpi->common.full_pixel); - rate2 += rate_uv; - distortion2 += distortion_uv; + rd_inter4x4_uv(cpi, x, &rd.rate_uv, &rd.distortion_uv, cpi->common.full_pixel); + rd.rate2 += rd.rate_uv; + rd.distortion2 += rd.distortion_uv; } else { @@ -1960,18 +2154,21 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, case V_PRED: case H_PRED: case TM_PRED: + { + int distortion; x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; vp8_build_intra_predictors_mby (&x->e_mbd); - macro_block_yrd(x, &rate_y, &distortion) ; - rate2 += rate_y; - distortion2 += distortion; - rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode]; - rate2 += uv_intra_rate; - rate_uv = uv_intra_rate_tokenonly; - distortion2 += uv_intra_distortion; - distortion_uv = uv_intra_distortion; - break; + macro_block_yrd(x, &rd.rate_y, &distortion) ; + rd.rate2 += rd.rate_y; + rd.distortion2 += distortion; + rd.rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode]; + rd.rate2 += uv_intra_rate; + rd.rate_uv = uv_intra_rate_tokenonly; + rd.distortion2 += uv_intra_distortion; + rd.distortion_uv = uv_intra_distortion; + } + break; case NEWMV: { @@ -2114,7 +2311,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, mode_mv[NEWMV].as_int = d->bmi.mv.as_int; // Add the new motion vector cost to our rolling cost variable - rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96); + rd.rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96); } case NEARESTMV: @@ -2136,177 +2333,57 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, continue; vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]); - vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16); - - if (cpi->active_map_enabled && x->active_ptr[0] == 0) { - x->skip = 1; - } - else if (x->encode_breakout) - { - unsigned int sse; - unsigned int var; - int threshold = (xd->block[0].dequant[1] - * xd->block[0].dequant[1] >>4); - - if(threshold < x->encode_breakout) - threshold = x->encode_breakout; - - var = vp8_variance16x16 - (*(b->base_src), b->src_stride, - x->e_mbd.predictor, 16, &sse); - - if (sse < threshold) - { - unsigned int q2dc = xd->block[24].dequant[0]; - /* If theres is no codeable 2nd order dc - or a very small uniform pixel change change */ - if ((sse - var < q2dc * q2dc >>4) || - (sse /2 > var && sse-var < 64)) - { - // Check u and v to make sure skip is ok - int sse2= VP8_UVSSE(x); - if (sse2 * 2 < threshold) - { - x->skip = 1; - distortion2 = sse + sse2; - rate2 = 500; - - /* for best_yrd calculation */ - rate_uv = 0; - distortion_uv = sse2; - - disable_skip = 1; - this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); - - break; - } - } - } - } - - - //intermodecost[mode_index] = vp8_cost_mv_ref(this_mode, mdcounts); // Experimental debug code - - // Add in the Mv/mode cost - rate2 += vp8_cost_mv_ref(this_mode, mdcounts); - - // Y cost and distortion - macro_block_yrd(x, &rate_y, &distortion); - rate2 += rate_y; - distortion2 += distortion; - - // UV cost and distortion - rd_inter16x16_uv(cpi, x, &rate_uv, &distortion_uv, cpi->common.full_pixel); - rate2 += rate_uv; - distortion2 += distortion_uv; + this_rd = evaluate_inter_mode_rd(mdcounts, &rd, + &disable_skip, cpi, x); break; default: break; } - // Where skip is allowable add in the default per mb cost for the no skip case. - // where we then decide to skip we have to delete this and replace it with the - // cost of signallying a skip - if (cpi->common.mb_no_coeff_skip) - { - other_cost += vp8_cost_bit(cpi->prob_skip_false, 0); - rate2 += other_cost; - } + this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost, + disable_skip, uv_intra_tteob, + intra_rd_penalty, cpi, x); - /* Estimate the reference frame signaling cost and add it - * to the rolling cost variable. - */ - rate2 += - x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; - - if (!disable_skip) + // Keep record of best intra distortion + if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) && + (this_rd < best_mode.intra_rd) ) { - // Test for the condition where skip block will be activated because there are no non zero coefficients and make any necessary adjustment for rate - if (cpi->common.mb_no_coeff_skip) - { - int tteob; - int has_y2_block = (this_mode!=SPLITMV && this_mode!=B_PRED); - - tteob = 0; - if(has_y2_block) - tteob += x->e_mbd.eobs[24]; - - for (i = 0; i < 16; i++) - tteob += (x->e_mbd.eobs[i] > has_y2_block); - - if (x->e_mbd.mode_info_context->mbmi.ref_frame) - { - for (i = 16; i < 24; i++) - tteob += x->e_mbd.eobs[i]; - } - else - tteob += uv_intra_tteob; - - if (tteob == 0) - { - rate2 -= (rate_y + rate_uv); - //for best_yrd calculation - rate_uv = 0; - - // Back out no skip flag costing and add in skip flag costing - if (cpi->prob_skip_false) - { - int prob_skip_cost; - - prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1); - prob_skip_cost -= vp8_cost_bit(cpi->prob_skip_false, 0); - rate2 += prob_skip_cost; - other_cost += prob_skip_cost; - } - } - } - // Calculate the final RD estimate for this mode - this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); - if (this_rd < INT_MAX && x->e_mbd.mode_info_context->mbmi.ref_frame - == INTRA_FRAME) - this_rd += intra_rd_penalty; + best_mode.intra_rd = this_rd; + *returnintra = rd.distortion2 ; } - // Keep record of best intra distortion - if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) && - (this_rd < best_intra_rd) ) +#if CONFIG_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity) { - best_intra_rd = this_rd; - *returnintra = distortion2 ; + // Store the best NEWMV in x for later use in the denoiser. + // We are restricted to the LAST_FRAME since the denoiser only keeps + // one filter state. + if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && + x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME) + { + x->e_mbd.best_sse_inter_mode = NEWMV; + x->e_mbd.best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv; + x->e_mbd.need_to_clamp_best_mvs = + x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs; + } } +#endif // Did this mode help.. i.i is it the new best mode - if (this_rd < best_rd || x->skip) + if (this_rd < best_mode.rd || x->skip) { // Note index of best mode so far best_mode_index = mode_index; - + *returnrate = rd.rate2; + *returndistortion = rd.distortion2; if (this_mode <= B_PRED) { x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode; /* required for left and above block mv */ x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; } - - other_cost += - x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; - - /* Calculate the final y RD estimate for this mode */ - best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2-rate_uv-other_cost), - (distortion2-distortion_uv)); - - *returnrate = rate2; - *returndistortion = distortion2; - best_rd = this_rd; - vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO)); - vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO)); - - if ((this_mode == B_PRED) || (this_mode == SPLITMV)) - for (i = 0; i < 16; i++) - { - best_bmodes[i] = x->e_mbd.block[i].bmi; - } + update_best_mode(&best_mode, this_rd, &rd, other_cost, x); // Testing this mode gave rise to an improvement in best error score. Lower threshold a bit for next time @@ -2359,9 +2436,50 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, // Note how often each mode chosen as best cpi->mode_chosen_counts[best_mode_index] ++; +#if CONFIG_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity) + { + if (x->e_mbd.best_sse_inter_mode == DC_PRED) { + // No best MV found. + x->e_mbd.best_sse_inter_mode = best_mode.mbmode.mode; + x->e_mbd.best_sse_mv = best_mode.mbmode.mv; + x->e_mbd.need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs; + } + + // TODO(holmer): No SSEs are calculated in rdopt.c. What else can be used? + vp8_denoiser_denoise_mb(&cpi->denoiser, x, 0, 0, + recon_yoffset, recon_uvoffset); + // Reevalute ZEROMV if the current mode is INTRA. + if (best_mode.mbmode.ref_frame == INTRA_FRAME) + { + int this_rd = INT_MAX; + int disable_skip = 0; + int other_cost = 0; + vpx_memset(&rd, 0, sizeof(rd)); + x->e_mbd.mode_info_context->mbmi.ref_frame = LAST_FRAME; + rd.rate2 += x->ref_frame_cost[LAST_FRAME]; + rd.rate2 += vp8_cost_mv_ref(ZEROMV, mdcounts); + x->e_mbd.mode_info_context->mbmi.mode = ZEROMV; + x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; + x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; + this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x); + this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost, + disable_skip, uv_intra_tteob, + intra_rd_penalty, cpi, x); + if (this_rd < best_mode.rd || x->skip) + { + // Note index of best mode so far + best_mode_index = mode_index; + *returnrate = rd.rate2; + *returndistortion = rd.distortion2; + update_best_mode(&best_mode, this_rd, &rd, other_cost, x); + } + } + } +#endif if (cpi->is_src_frame_alt_ref && - (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) + (best_mode.mbmode.mode != ZEROMV || best_mode.mbmode.ref_frame != ALTREF_FRAME)) { x->e_mbd.mode_info_context->mbmi.mode = ZEROMV; x->e_mbd.mode_info_context->mbmi.ref_frame = ALTREF_FRAME; @@ -2370,26 +2488,25 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, x->e_mbd.mode_info_context->mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip); x->e_mbd.mode_info_context->mbmi.partitioning = 0; - return; } // macroblock modes - vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); + vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode, sizeof(MB_MODE_INFO)); - if (best_mbmode.mode == B_PRED) + if (best_mode.mbmode.mode == B_PRED) { for (i = 0; i < 16; i++) - xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode; + xd->mode_info_context->bmi[i].as_mode = best_mode.bmodes[i].as_mode; } - if (best_mbmode.mode == SPLITMV) + if (best_mode.mbmode.mode == SPLITMV) { for (i = 0; i < 16; i++) - xd->mode_info_context->bmi[i].mv.as_int = best_bmodes[i].mv.as_int; + xd->mode_info_context->bmi[i].mv.as_int = best_mode.bmodes[i].mv.as_int; - vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO)); + vpx_memcpy(x->partition_info, &best_mode.partition, sizeof(PARTITION_INFO)); x->e_mbd.mode_info_context->mbmi.mv.as_int = x->partition_info->bmi[15].mv.as_int; diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c index 709f6e2b4..7e7def462 100644 --- a/vp8/encoder/temporal_filter.c +++ b/vp8/encoder/temporal_filter.c @@ -525,7 +525,8 @@ void vp8_temporal_filter_prepare_c { int which_buffer = start_frame - frame; struct lookahead_entry* buf = vp8_lookahead_peek(cpi->lookahead, - which_buffer); + which_buffer, + PEEK_FORWARD); cpi->frames[frames_to_blur-1-frame] = &buf->img; } diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c index 967b6026a..efe2b4826 100644 --- a/vp8/encoder/tokenize.c +++ b/vp8/encoder/tokenize.c @@ -482,7 +482,7 @@ void vp8_tokenize_initialize() } -static __inline void stuff2nd_order_b +static void stuff2nd_order_b ( TOKENEXTRA **tp, ENTROPY_CONTEXT *a, @@ -506,7 +506,7 @@ static __inline void stuff2nd_order_b } -static __inline void stuff1st_order_b +static void stuff1st_order_b ( TOKENEXTRA **tp, ENTROPY_CONTEXT *a, @@ -530,7 +530,7 @@ static __inline void stuff1st_order_b *a = *l = pt; } -static __inline +static void stuff1st_order_buv ( TOKENEXTRA **tp, diff --git a/vp8/encoder/treewriter.h b/vp8/encoder/treewriter.h index 0aa19431c..48574f33c 100644 --- a/vp8/encoder/treewriter.h +++ b/vp8/encoder/treewriter.h @@ -42,7 +42,7 @@ typedef BOOL_CODER vp8_writer; /* Both of these return bits, not scaled bits. */ -static __inline unsigned int vp8_cost_branch(const unsigned int ct[2], vp8_prob p) +static unsigned int vp8_cost_branch(const unsigned int ct[2], vp8_prob p) { /* Imitate existing calculation */ @@ -53,7 +53,7 @@ static __inline unsigned int vp8_cost_branch(const unsigned int ct[2], vp8_prob /* Small functions to write explicit values and tokens, as well as estimate their lengths. */ -static __inline void vp8_treed_write +static void vp8_treed_write ( vp8_writer *const w, vp8_tree t, @@ -72,7 +72,7 @@ static __inline void vp8_treed_write } while (n); } -static __inline void vp8_write_token +static void vp8_write_token ( vp8_writer *const w, vp8_tree t, @@ -83,7 +83,7 @@ static __inline void vp8_write_token vp8_treed_write(w, t, p, x->value, x->Len); } -static __inline int vp8_treed_cost( +static int vp8_treed_cost( vp8_tree t, const vp8_prob *const p, int v, @@ -103,7 +103,7 @@ static __inline int vp8_treed_cost( return c; } -static __inline int vp8_cost_token +static int vp8_cost_token ( vp8_tree t, const vp8_prob *const p, diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk index f68d007c1..3403557e9 100644 --- a/vp8/vp8_common.mk +++ b/vp8/vp8_common.mk @@ -79,6 +79,7 @@ VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.c VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.h VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp8_asm_stubs.c VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/loopfilter_x86.c +VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/mfqe.c VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.h VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.c VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/dequantize_mmx.asm @@ -112,6 +113,7 @@ VP8_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/sad_sse4.asm ifeq ($(CONFIG_POSTPROC),yes) VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/postproc_x86.c VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/postproc_mmx.asm +VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/mfqe_sse2.asm VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm endif diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c index fa78ec31c..683194a1d 100644 --- a/vp8/vp8_cx_iface.c +++ b/vp8/vp8_cx_iface.c @@ -183,14 +183,20 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK_BOOL(vp8_cfg, enable_auto_alt_ref); RANGE_CHECK(vp8_cfg, cpu_used, -16, 16); - +#if CONFIG_TEMPORAL_DENOISING + RANGE_CHECK(vp8_cfg, noise_sensitivity, 0, 1); +#endif #if !(CONFIG_REALTIME_ONLY) RANGE_CHECK(vp8_cfg, encoding_mode, VP8_BEST_QUALITY_ENCODING, VP8_REAL_TIME_ENCODING); +#if !(CONFIG_TEMPORAL_DENOISING) RANGE_CHECK_HI(vp8_cfg, noise_sensitivity, 6); +#endif #else RANGE_CHECK(vp8_cfg, encoding_mode, VP8_REAL_TIME_ENCODING, VP8_REAL_TIME_ENCODING); +#if !(CONFIG_TEMPORAL_DENOISING) RANGE_CHECK(vp8_cfg, noise_sensitivity, 0, 0); #endif +#endif RANGE_CHECK(vp8_cfg, token_partitions, VP8_ONE_TOKENPARTITION, VP8_EIGHT_TOKENPARTITION); RANGE_CHECK_HI(vp8_cfg, Sharpness, 7); diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk index aad847a72..2e940d787 100644 --- a/vp8/vp8cx.mk +++ b/vp8/vp8cx.mk @@ -47,6 +47,8 @@ VP8_CX_SRCS-yes += encoder/firstpass.c VP8_CX_SRCS-yes += encoder/block.h VP8_CX_SRCS-yes += encoder/boolhuff.h VP8_CX_SRCS-yes += encoder/bitstream.h +VP8_CX_SRCS-$(CONFIG_TEMPORAL_DENOISING) += encoder/denoising.h +VP8_CX_SRCS-$(CONFIG_TEMPORAL_DENOISING) += encoder/denoising.c VP8_CX_SRCS-yes += encoder/encodeintra.h VP8_CX_SRCS-yes += encoder/encodemb.h VP8_CX_SRCS-yes += encoder/encodemv.h |