summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--vp8/common/loopfilter.c20
-rw-r--r--vp8/common/loopfilter.h5
-rw-r--r--vp8/encoder/arm/arm_csystemdependent.c12
-rw-r--r--vp8/encoder/arm/neon/picklpf_arm.c40
-rw-r--r--vp8/encoder/arm/neon/vp8_memcpy_neon.asm8
-rw-r--r--vp8/encoder/generic/csystemdependent.c6
-rw-r--r--vp8/encoder/picklpf.c73
7 files changed, 85 insertions, 79 deletions
diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c
index fe0644bdd..a38b49eb9 100644
--- a/vp8/common/loopfilter.c
+++ b/vp8/common/loopfilter.c
@@ -506,7 +506,8 @@ void vp8_loop_filter_partial_frame
unsigned char *y_ptr;
int mb_row;
int mb_col;
- int mb_cols = post->y_width >> 4;
+ int mb_cols = post->y_width >> 4;
+ int mb_rows = post->y_height >> 4;
int linestocopy, i;
@@ -521,15 +522,9 @@ void vp8_loop_filter_partial_frame
int lvl_seg[MAX_MB_SEGMENTS];
- mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);
-
- /* 3 is a magic number. 4 is probably magic too */
- linestocopy = (post->y_height >> (4 + 3));
-
- if (linestocopy < 1)
- linestocopy = 1;
-
- linestocopy <<= 4;
+ /* number of MB rows to use in partial filtering */
+ linestocopy = mb_rows / PARTIAL_FRAME_FRACTION;
+ linestocopy = linestocopy ? linestocopy << 4 : 16; /* 16 lines per MB */
/* Note the baseline filter values for each segment */
/* See vp8_loop_filter_frame_init. Rather than call that for each change
@@ -554,8 +549,9 @@ void vp8_loop_filter_partial_frame
}
}
- /* Set up the buffer pointers */
- y_ptr = post->y_buffer + (post->y_height >> 5) * 16 * post->y_stride;
+ /* Set up the buffer pointers; partial image starts at ~middle of frame */
+ y_ptr = post->y_buffer + ((post->y_height >> 5) * 16) * post->y_stride;
+ mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);
/* vp8_filter each macro block */
for (mb_row = 0; mb_row<(linestocopy >> 4); mb_row++)
diff --git a/vp8/common/loopfilter.h b/vp8/common/loopfilter.h
index 9887cf55b..340339a91 100644
--- a/vp8/common/loopfilter.h
+++ b/vp8/common/loopfilter.h
@@ -15,7 +15,10 @@
#include "vpx_ports/mem.h"
#include "vpx_config.h"
-#define MAX_LOOP_FILTER 63
+#define MAX_LOOP_FILTER 63
+/* fraction of total macroblock rows to be used in fast filter level picking */
+/* has to be > 2 */
+#define PARTIAL_FRAME_FRACTION 8
typedef enum
{
diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c
index 210a5a5c5..918d7d9ae 100644
--- a/vp8/encoder/arm/arm_csystemdependent.c
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@@ -14,9 +14,9 @@
#include "vp8/encoder/variance.h"
#include "vp8/encoder/onyx_int.h"
-extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
+extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
+extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
+extern void vp8_yv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
{
@@ -123,15 +123,15 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_neon;
cpi->rtcd.quantize.fastquantb_pair = vp8_fast_quantize_b_pair_neon;
}
-#endif
+#endif /* HAVE_ARMV7 */
+#endif /* CONFIG_RUNTIME_CPU_DETECT */
#if HAVE_ARMV7
#if CONFIG_RUNTIME_CPU_DETECT
if (flags & HAS_NEON)
#endif
{
- vp8_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon;
+ vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame_neon;
}
#endif
-#endif
}
diff --git a/vp8/encoder/arm/neon/picklpf_arm.c b/vp8/encoder/arm/neon/picklpf_arm.c
index 3fb370c3d..6610d2dc2 100644
--- a/vp8/encoder/arm/neon/picklpf_arm.c
+++ b/vp8/encoder/arm/neon/picklpf_arm.c
@@ -8,20 +8,16 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "vp8/common/loopfilter.h"
+#include "vpx_scale/yv12config.h"
-#include "vp8/common/onyxc_int.h"
-#include "vp8/encoder/onyx_int.h"
-#include "vp8/encoder/quantize.h"
-#include "vpx_mem/vpx_mem.h"
-#include "vpx_scale/yv12extend.h"
-#include "vpx_scale/vpxscale.h"
-#include "vp8/common/alloccommon.h"
+extern void vp8_memcpy_partial_neon(unsigned char *dst_ptr,
+ unsigned char *src_ptr,
+ int sz);
-extern void vp8_memcpy_neon(unsigned char *dst_ptr, unsigned char *src_ptr, int sz);
-
-void
-vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction)
+void vp8_yv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc,
+ YV12_BUFFER_CONFIG *dst_ybc)
{
unsigned char *src_y, *dst_y;
int yheight;
@@ -34,17 +30,19 @@ vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG
yheight = src_ybc->y_height;
ystride = src_ybc->y_stride;
- linestocopy = (yheight >> (Fraction + 4));
-
- if (linestocopy < 1)
- linestocopy = 1;
-
- linestocopy <<= 4;
-
- yoffset = ystride * ((yheight >> 5) * 16 - 8);
+ /* number of MB rows to use in partial filtering */
+ linestocopy = (yheight >> 4) / PARTIAL_FRAME_FRACTION;
+ linestocopy = linestocopy ? linestocopy << 4 : 16; /* 16 lines per MB */
+
+ /* Copy extra 4 so that full filter context is available if filtering done
+ * on the copied partial frame and not original. Partial filter does mb
+ * filtering for top row also, which can modify3 pixels above.
+ */
+ linestocopy += 4;
+ /* partial image starts at ~middle of frame (macroblock border) */
+ yoffset = ystride * (((yheight >> 5) * 16) - 4);
src_y = src_ybc->y_buffer + yoffset;
dst_y = dst_ybc->y_buffer + yoffset;
- //vpx_memcpy (dst_y, src_y, ystride * (linestocopy +16));
- vp8_memcpy_neon((unsigned char *)dst_y, (unsigned char *)src_y, (int)(ystride *(linestocopy + 16)));
+ vp8_memcpy_partial_neon(dst_y, src_y, ystride * linestocopy);
}
diff --git a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
index b0450e523..5b9f11e59 100644
--- a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
@@ -9,7 +9,7 @@
;
- EXPORT |vp8_memcpy_neon|
+ EXPORT |vp8_memcpy_partial_neon|
ARM
REQUIRE8
@@ -17,8 +17,10 @@
AREA ||.text||, CODE, READONLY, ALIGN=2
;=========================================
-;void vp8_memcpy_neon(unsigned char *dst_ptr, unsigned char *src_ptr, int sz);
-|vp8_memcpy_neon| PROC
+;this is not a full memcpy function!!!
+;void vp8_memcpy_partial_neon(unsigned char *dst_ptr, unsigned char *src_ptr,
+; int sz);
+|vp8_memcpy_partial_neon| PROC
;pld [r1] ;preload pred data
;pld [r1, #128]
;pld [r1, #256]
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index 28526f322..1a6fce9e1 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -17,8 +17,10 @@
void vp8_arch_x86_encoder_init(VP8_COMP *cpi);
void vp8_arch_arm_encoder_init(VP8_COMP *cpi);
-void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
+void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc,
+ YV12_BUFFER_CONFIG *dst_ybc);
+extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc,
+ YV12_BUFFER_CONFIG *dst_ybc);
void vp8_cmachine_specific_config(VP8_COMP *cpi)
{
diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c
index e01c59e2e..c1e5f7797 100644
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -29,12 +29,11 @@ extern int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
#define IF_RTCD(x) NULL
#endif
-extern void
-(*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc,
- YV12_BUFFER_CONFIG *dst_ybc,
- int Fraction);
-void
-vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction)
+extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc,
+ YV12_BUFFER_CONFIG *dst_ybc);
+
+void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc,
+ YV12_BUFFER_CONFIG *dst_ybc)
{
unsigned char *src_y, *dst_y;
int yheight;
@@ -47,21 +46,26 @@ vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst
yheight = src_ybc->y_height;
ystride = src_ybc->y_stride;
- linestocopy = (yheight >> (Fraction + 4));
-
- if (linestocopy < 1)
- linestocopy = 1;
-
- linestocopy <<= 4;
-
- yoffset = ystride * ((yheight >> 5) * 16 - 8);
+ /* number of MB rows to use in partial filtering */
+ linestocopy = (yheight >> 4) / PARTIAL_FRAME_FRACTION;
+ linestocopy = linestocopy ? linestocopy << 4 : 16; /* 16 lines per MB */
+
+ /* Copy extra 4 so that full filter context is available if filtering done
+ * on the copied partial frame and not original. Partial filter does mb
+ * filtering for top row also, which can modify3 pixels above.
+ */
+ linestocopy += 4;
+ /* partial image starts at ~middle of frame (macroblock border)*/
+ yoffset = ystride * (((yheight >> 5) * 16) - 4);
src_y = src_ybc->y_buffer + yoffset;
dst_y = dst_ybc->y_buffer + yoffset;
- vpx_memcpy(dst_y, src_y, ystride *(linestocopy + 16));
+ vpx_memcpy(dst_y, src_y, ystride * linestocopy);
}
-static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, int Fraction, const vp8_variance_rtcd_vtable_t *rtcd)
+static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source,
+ YV12_BUFFER_CONFIG *dest,
+ const vp8_variance_rtcd_vtable_t *rtcd)
{
int i, j;
int Total = 0;
@@ -69,17 +73,16 @@ static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONF
unsigned char *src = source->y_buffer;
unsigned char *dst = dest->y_buffer;
- int linestocopy = (source->y_height >> (Fraction + 4));
- (void)rtcd;
-
- if (linestocopy < 1)
- linestocopy = 1;
+ int linestocopy;
- linestocopy <<= 4;
+ /* number of MB rows to use in partial filtering */
+ linestocopy = (source->y_height >> 4) / PARTIAL_FRAME_FRACTION;
+ linestocopy = linestocopy ? linestocopy << 4 : 16; /* 16 lines per MB */
- srcoffset = source->y_stride * (dest->y_height >> 5) * 16;
- dstoffset = dest->y_stride * (dest->y_height >> 5) * 16;
+ /* partial image starts at ~middle of frame (macroblock border)*/
+ srcoffset = source->y_stride * ((dest->y_height >> 5) * 16);
+ dstoffset = dest->y_stride * ((dest->y_height >> 5) * 16);
src += srcoffset;
dst += dstoffset;
@@ -90,7 +93,9 @@ static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONF
for (j = 0; j < source->y_width; j += 16)
{
unsigned int sse;
- Total += VARIANCE_INVOKE(rtcd, mse16x16)(src + j, source->y_stride, dst + j, dest->y_stride, &sse);
+ Total += VARIANCE_INVOKE(rtcd, mse16x16)(src + j, source->y_stride,
+ dst + j, dest->y_stride,
+ &sse);
}
src += 16 * source->y_stride;
@@ -105,7 +110,8 @@ static int get_min_filter_level(VP8_COMP *cpi, int base_qindex)
{
int min_filter_level;
- if (cpi->source_alt_ref_active && cpi->common.refresh_golden_frame && !cpi->common.refresh_alt_ref_frame)
+ if (cpi->source_alt_ref_active && cpi->common.refresh_golden_frame &&
+ !cpi->common.refresh_alt_ref_frame)
min_filter_level = 0;
else
{
@@ -148,7 +154,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
int best_filt_val = cm->filter_level;
// Make a copy of the unfiltered / processed recon buffer
- vp8_yv12_copy_partial_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf, 3);
+ vp8_yv12_copy_partial_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf);
if (cm->frame_type == KEY_FRAME)
cm->sharpness_level = 0;
@@ -173,10 +179,10 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
// Get the err using the previous frame's filter value.
vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
- best_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance));
+ best_err = calc_partial_ssl_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
// Re-instate the unfiltered frame
- vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3);
+ vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show);
filt_val -= (1 + ((filt_val > 10) ? 1 : 0));
@@ -187,11 +193,10 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
// Get the err for filtered frame
- filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance));
+ filt_err = calc_partial_ssl_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
// Re-instate the unfiltered frame
- vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3);
-
+ vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show);
// Update the best case record or exit loop.
if (filt_err < best_err)
@@ -220,10 +225,10 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
// Get the err for filtered frame
- filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance));
+ filt_err = calc_partial_ssl_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
// Re-instate the unfiltered frame
- vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3);
+ vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show);
// Update the best case record or exit loop.
if (filt_err < best_err)