summaryrefslogtreecommitdiff
path: root/vp9/common
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/common')
-rw-r--r--vp9/common/arm/neon/vp9_reconintra_neon.asm304
-rw-r--r--vp9/common/mips/dspr2/vp9_common_dspr2.h8
-rw-r--r--vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h8
-rw-r--r--vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h8
-rw-r--r--vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h8
-rw-r--r--vp9/common/vp9_alloccommon.c40
-rw-r--r--vp9/common/vp9_alloccommon.h8
-rw-r--r--vp9/common/vp9_blockd.h14
-rw-r--r--vp9/common/vp9_common.h8
-rw-r--r--vp9/common/vp9_common_data.h8
-rw-r--r--vp9/common/vp9_convolve.c32
-rw-r--r--vp9/common/vp9_convolve.h8
-rw-r--r--vp9/common/vp9_entropy.h8
-rw-r--r--vp9/common/vp9_entropymode.c11
-rw-r--r--vp9/common/vp9_entropymode.h10
-rw-r--r--vp9/common/vp9_entropymv.h8
-rw-r--r--vp9/common/vp9_enums.h8
-rw-r--r--vp9/common/vp9_filter.c15
-rw-r--r--vp9/common/vp9_filter.h27
-rw-r--r--vp9/common/vp9_idct.c68
-rw-r--r--vp9/common/vp9_idct.h8
-rw-r--r--vp9/common/vp9_loopfilter.c40
-rw-r--r--vp9/common/vp9_loopfilter.h44
-rw-r--r--vp9/common/vp9_mv.h8
-rw-r--r--vp9/common/vp9_mvref_common.h12
-rw-r--r--vp9/common/vp9_onyx.h12
-rw-r--r--vp9/common/vp9_onyxc_int.h69
-rw-r--r--vp9/common/vp9_postproc.h8
-rw-r--r--vp9/common/vp9_ppflags.h8
-rw-r--r--vp9/common/vp9_pragmas.h8
-rw-r--r--vp9/common/vp9_pred_common.h10
-rw-r--r--vp9/common/vp9_prob.h8
-rw-r--r--vp9/common/vp9_quant_common.h8
-rw-r--r--vp9/common/vp9_reconinter.c15
-rw-r--r--vp9/common/vp9_reconinter.h15
-rw-r--r--vp9/common/vp9_reconintra.h8
-rw-r--r--vp9/common/vp9_rtcd_defs.sh4
-rw-r--r--vp9/common/vp9_scale.h8
-rw-r--r--vp9/common/vp9_scan.h8
-rw-r--r--vp9/common/vp9_seg_common.h8
-rw-r--r--vp9/common/vp9_systemdependent.h8
-rw-r--r--vp9/common/vp9_textblit.h8
-rw-r--r--vp9/common/vp9_tile_common.h8
-rw-r--r--vp9/common/x86/vp9_asm_stubs.c529
-rw-r--r--vp9/common/x86/vp9_idct_intrin_sse2.c102
-rw-r--r--vp9/common/x86/vp9_postproc_x86.h8
46 files changed, 897 insertions, 684 deletions
diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.asm b/vp9/common/arm/neon/vp9_reconintra_neon.asm
index 71bf24c9f..279f678b1 100644
--- a/vp9/common/arm/neon/vp9_reconintra_neon.asm
+++ b/vp9/common/arm/neon/vp9_reconintra_neon.asm
@@ -18,6 +18,8 @@
EXPORT |vp9_h_predictor_32x32_neon|
EXPORT |vp9_tm_predictor_4x4_neon|
EXPORT |vp9_tm_predictor_8x8_neon|
+ EXPORT |vp9_tm_predictor_16x16_neon|
+ EXPORT |vp9_tm_predictor_32x32_neon|
ARM
REQUIRE8
PRESERVE8
@@ -346,61 +348,289 @@ loop_h
ldrb r12, [r12]
vdup.u8 d0, r12
+ ; preload 8 left
+ vld1.8 d30, [r3]
+
; Load above 8 pixels
vld1.64 {d2}, [r2]
+ vmovl.u8 q10, d30
+
; Compute above - ytop_left
vsubl.u8 q3, d2, d0
; Load left row by row and compute left + (above - ytop_left)
; 1st row and 2nd row
- ldrb r12, [r3], #1
- ldrb r2, [r3], #1
- vdup.u16 q1, r12
- vdup.u16 q2, r2
- vadd.s16 q1, q1, q3
- vadd.s16 q2, q2, q3
- vqshrun.s16 d0, q1, #0
- vqshrun.s16 d1, q2, #0
- vst1.64 {d0}, [r0], r1
- vst1.64 {d1}, [r0], r1
+ vdup.16 q0, d20[0]
+ vdup.16 q1, d20[1]
+ vadd.s16 q0, q3, q0
+ vadd.s16 q1, q3, q1
; 3rd row and 4th row
- ldrb r12, [r3], #1
- ldrb r2, [r3], #1
- vdup.u16 q1, r12
- vdup.u16 q2, r2
- vadd.s16 q1, q1, q3
- vadd.s16 q2, q2, q3
- vqshrun.s16 d0, q1, #0
- vqshrun.s16 d1, q2, #0
+ vdup.16 q8, d20[2]
+ vdup.16 q9, d20[3]
+ vadd.s16 q8, q3, q8
+ vadd.s16 q9, q3, q9
+
+ vqshrun.s16 d0, q0, #0
+ vqshrun.s16 d1, q1, #0
+ vqshrun.s16 d2, q8, #0
+ vqshrun.s16 d3, q9, #0
+
vst1.64 {d0}, [r0], r1
vst1.64 {d1}, [r0], r1
+ vst1.64 {d2}, [r0], r1
+ vst1.64 {d3}, [r0], r1
; 5th row and 6th row
- ldrb r12, [r3], #1
- ldrb r2, [r3], #1
- vdup.u16 q1, r12
- vdup.u16 q2, r2
- vadd.s16 q1, q1, q3
- vadd.s16 q2, q2, q3
- vqshrun.s16 d0, q1, #0
- vqshrun.s16 d1, q2, #0
- vst1.64 {d0}, [r0], r1
- vst1.64 {d1}, [r0], r1
+ vdup.16 q0, d21[0]
+ vdup.16 q1, d21[1]
+ vadd.s16 q0, q3, q0
+ vadd.s16 q1, q3, q1
+
+ ; 7th row and 8th row
+ vdup.16 q8, d21[2]
+ vdup.16 q9, d21[3]
+ vadd.s16 q8, q3, q8
+ vadd.s16 q9, q3, q9
+
+ vqshrun.s16 d0, q0, #0
+ vqshrun.s16 d1, q1, #0
+ vqshrun.s16 d2, q8, #0
+ vqshrun.s16 d3, q9, #0
- ; 7rd row and 8th row
- ldrb r12, [r3], #1
- ldrb r2, [r3], #1
- vdup.u16 q1, r12
- vdup.u16 q2, r2
- vadd.s16 q1, q1, q3
- vadd.s16 q2, q2, q3
- vqshrun.s16 d0, q1, #0
- vqshrun.s16 d1, q2, #0
vst1.64 {d0}, [r0], r1
vst1.64 {d1}, [r0], r1
+ vst1.64 {d2}, [r0], r1
+ vst1.64 {d3}, [r0], r1
+
bx lr
ENDP ; |vp9_tm_predictor_8x8_neon|
+;void vp9_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride,
+; const uint8_t *above,
+; const uint8_t *left)
+; r0 uint8_t *dst
+; r1 ptrdiff_t y_stride
+; r2 const uint8_t *above
+; r3 const uint8_t *left
+
+|vp9_tm_predictor_16x16_neon| PROC
+ ; Load ytop_left = above[-1];
+ sub r12, r2, #1
+ ldrb r12, [r12]
+ vdup.u8 q0, r12
+
+ ; Load above 8 pixels
+ vld1.8 q1, [r2]
+
+ ; preload 8 left into r12
+ vld1.8 d18, [r3]!
+
+ ; Compute above - ytop_left
+ vsubl.u8 q2, d2, d0
+ vsubl.u8 q3, d3, d1
+
+ vmovl.u8 q10, d18
+
+ ; Load left row by row and compute left + (above - ytop_left)
+ ; Process 8 rows in each single loop and loop 2 times to process 16 rows.
+ mov r2, #2
+
+loop_16x16_neon
+ ; Process two rows.
+ vdup.16 q0, d20[0]
+ vdup.16 q8, d20[1]
+ vadd.s16 q1, q0, q2
+ vadd.s16 q0, q0, q3
+ vadd.s16 q11, q8, q2
+ vadd.s16 q8, q8, q3
+ vqshrun.s16 d2, q1, #0
+ vqshrun.s16 d3, q0, #0
+ vqshrun.s16 d22, q11, #0
+ vqshrun.s16 d23, q8, #0
+ vdup.16 q0, d20[2] ; proload next 2 rows data
+ vdup.16 q8, d20[3]
+ vst1.64 {d2,d3}, [r0], r1
+ vst1.64 {d22,d23}, [r0], r1
+
+ ; Process two rows.
+ vadd.s16 q1, q0, q2
+ vadd.s16 q0, q0, q3
+ vadd.s16 q11, q8, q2
+ vadd.s16 q8, q8, q3
+ vqshrun.s16 d2, q1, #0
+ vqshrun.s16 d3, q0, #0
+ vqshrun.s16 d22, q11, #0
+ vqshrun.s16 d23, q8, #0
+ vdup.16 q0, d21[0] ; proload next 2 rows data
+ vdup.16 q8, d21[1]
+ vst1.64 {d2,d3}, [r0], r1
+ vst1.64 {d22,d23}, [r0], r1
+
+ vadd.s16 q1, q0, q2
+ vadd.s16 q0, q0, q3
+ vadd.s16 q11, q8, q2
+ vadd.s16 q8, q8, q3
+ vqshrun.s16 d2, q1, #0
+ vqshrun.s16 d3, q0, #0
+ vqshrun.s16 d22, q11, #0
+ vqshrun.s16 d23, q8, #0
+ vdup.16 q0, d21[2] ; proload next 2 rows data
+ vdup.16 q8, d21[3]
+ vst1.64 {d2,d3}, [r0], r1
+ vst1.64 {d22,d23}, [r0], r1
+
+
+ vadd.s16 q1, q0, q2
+ vadd.s16 q0, q0, q3
+ vadd.s16 q11, q8, q2
+ vadd.s16 q8, q8, q3
+ vqshrun.s16 d2, q1, #0
+ vqshrun.s16 d3, q0, #0
+ vqshrun.s16 d22, q11, #0
+ vqshrun.s16 d23, q8, #0
+ vdup.16 q0, d20[2]
+ vdup.16 q8, d20[3]
+ vld1.8 d18, [r3]! ; preload 8 left into r12
+ vmovl.u8 q10, d18
+ vst1.64 {d2,d3}, [r0], r1
+ vst1.64 {d22,d23}, [r0], r1
+
+ subs r2, r2, #1
+ bgt loop_16x16_neon
+
+ bx lr
+ ENDP ; |vp9_tm_predictor_16x16_neon|
+
+;void vp9_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride,
+; const uint8_t *above,
+; const uint8_t *left)
+; r0 uint8_t *dst
+; r1 ptrdiff_t y_stride
+; r2 const uint8_t *above
+; r3 const uint8_t *left
+
+|vp9_tm_predictor_32x32_neon| PROC
+ ; Load ytop_left = above[-1];
+ sub r12, r2, #1
+ ldrb r12, [r12]
+ vdup.u8 q0, r12
+
+ ; Load above 32 pixels
+ vld1.8 q1, [r2]!
+ vld1.8 q2, [r2]
+
+ ; preload 8 left pixels
+ vld1.8 d26, [r3]!
+
+ ; Compute above - ytop_left
+ vsubl.u8 q8, d2, d0
+ vsubl.u8 q9, d3, d1
+ vsubl.u8 q10, d4, d0
+ vsubl.u8 q11, d5, d1
+
+ vmovl.u8 q3, d26
+
+ ; Load left row by row and compute left + (above - ytop_left)
+ ; Process 8 rows in each single loop and loop 4 times to process 32 rows.
+ mov r2, #4
+
+loop_32x32_neon
+ ; Process two rows.
+ vdup.16 q0, d6[0]
+ vdup.16 q2, d6[1]
+ vadd.s16 q12, q0, q8
+ vadd.s16 q13, q0, q9
+ vadd.s16 q14, q0, q10
+ vadd.s16 q15, q0, q11
+ vqshrun.s16 d0, q12, #0
+ vqshrun.s16 d1, q13, #0
+ vadd.s16 q12, q2, q8
+ vadd.s16 q13, q2, q9
+ vqshrun.s16 d2, q14, #0
+ vqshrun.s16 d3, q15, #0
+ vadd.s16 q14, q2, q10
+ vadd.s16 q15, q2, q11
+ vst1.64 {d0-d3}, [r0], r1
+ vqshrun.s16 d24, q12, #0
+ vqshrun.s16 d25, q13, #0
+ vqshrun.s16 d26, q14, #0
+ vqshrun.s16 d27, q15, #0
+ vdup.16 q1, d6[2]
+ vdup.16 q2, d6[3]
+ vst1.64 {d24-d27}, [r0], r1
+
+ ; Process two rows.
+ vadd.s16 q12, q1, q8
+ vadd.s16 q13, q1, q9
+ vadd.s16 q14, q1, q10
+ vadd.s16 q15, q1, q11
+ vqshrun.s16 d0, q12, #0
+ vqshrun.s16 d1, q13, #0
+ vadd.s16 q12, q2, q8
+ vadd.s16 q13, q2, q9
+ vqshrun.s16 d2, q14, #0
+ vqshrun.s16 d3, q15, #0
+ vadd.s16 q14, q2, q10
+ vadd.s16 q15, q2, q11
+ vst1.64 {d0-d3}, [r0], r1
+ vqshrun.s16 d24, q12, #0
+ vqshrun.s16 d25, q13, #0
+ vqshrun.s16 d26, q14, #0
+ vqshrun.s16 d27, q15, #0
+ vdup.16 q0, d7[0]
+ vdup.16 q2, d7[1]
+ vst1.64 {d24-d27}, [r0], r1
+
+ ; Process two rows.
+ vadd.s16 q12, q0, q8
+ vadd.s16 q13, q0, q9
+ vadd.s16 q14, q0, q10
+ vadd.s16 q15, q0, q11
+ vqshrun.s16 d0, q12, #0
+ vqshrun.s16 d1, q13, #0
+ vadd.s16 q12, q2, q8
+ vadd.s16 q13, q2, q9
+ vqshrun.s16 d2, q14, #0
+ vqshrun.s16 d3, q15, #0
+ vadd.s16 q14, q2, q10
+ vadd.s16 q15, q2, q11
+ vst1.64 {d0-d3}, [r0], r1
+ vqshrun.s16 d24, q12, #0
+ vqshrun.s16 d25, q13, #0
+ vqshrun.s16 d26, q14, #0
+ vqshrun.s16 d27, q15, #0
+ vdup.16 q0, d7[2]
+ vdup.16 q2, d7[3]
+ vst1.64 {d24-d27}, [r0], r1
+
+ ; Process two rows.
+ vadd.s16 q12, q0, q8
+ vadd.s16 q13, q0, q9
+ vadd.s16 q14, q0, q10
+ vadd.s16 q15, q0, q11
+ vqshrun.s16 d0, q12, #0
+ vqshrun.s16 d1, q13, #0
+ vadd.s16 q12, q2, q8
+ vadd.s16 q13, q2, q9
+ vqshrun.s16 d2, q14, #0
+ vqshrun.s16 d3, q15, #0
+ vadd.s16 q14, q2, q10
+ vadd.s16 q15, q2, q11
+ vst1.64 {d0-d3}, [r0], r1
+ vqshrun.s16 d24, q12, #0
+ vqshrun.s16 d25, q13, #0
+ vld1.8 d0, [r3]! ; preload 8 left pixels
+ vqshrun.s16 d26, q14, #0
+ vqshrun.s16 d27, q15, #0
+ vmovl.u8 q3, d0
+ vst1.64 {d24-d27}, [r0], r1
+
+ subs r2, r2, #1
+ bgt loop_32x32_neon
+
+ bx lr
+ ENDP ; |vp9_tm_predictor_32x32_neon|
+
END
diff --git a/vp9/common/mips/dspr2/vp9_common_dspr2.h b/vp9/common/mips/dspr2/vp9_common_dspr2.h
index e9c698119..991d3c2b3 100644
--- a/vp9/common/mips/dspr2/vp9_common_dspr2.h
+++ b/vp9/common/mips/dspr2/vp9_common_dspr2.h
@@ -17,6 +17,10 @@
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_common.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#if HAVE_DSPR2
#define CROP_WIDTH 512
extern uint8_t *vp9_ff_cropTbl;
@@ -114,4 +118,8 @@ void vp9_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
int w, int h);
#endif // #if HAVE_DSPR2
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_MIPS_DSPR2_VP9_COMMON_DSPR2_H_
diff --git a/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h b/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h
index 98bfcfaf2..008cf8cac 100644
--- a/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h
+++ b/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h
@@ -17,6 +17,10 @@
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_onyxc_int.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#if HAVE_DSPR2
/* inputs & outputs are quad-byte vectors */
static INLINE void vp9_filter_dspr2(uint32_t mask, uint32_t hev,
@@ -752,4 +756,8 @@ static INLINE void vp9_wide_mbfilter_dspr2(uint32_t *op7, uint32_t *op6,
*oq6 = res_oq6;
}
#endif // #if HAVE_DSPR2
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_FILTERS_DSPR2_H_
diff --git a/vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h b/vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h
index 4cb2ebb46..ca01a6a10 100644
--- a/vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h
+++ b/vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h
@@ -17,6 +17,10 @@
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_onyxc_int.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#if HAVE_DSPR2
#define STORE_F0() { \
__asm__ __volatile__ ( \
@@ -467,4 +471,8 @@
}
#endif // #if HAVE_DSPR2
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MACROS_DSPR2_H_
diff --git a/vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h b/vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h
index b9e0aca90..5b0d9cc9b 100644
--- a/vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h
+++ b/vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h
@@ -17,6 +17,10 @@
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_onyxc_int.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#if HAVE_DSPR2
/* processing 4 pixels at the same time
* compute hev and mask in the same function */
@@ -362,4 +366,8 @@ static INLINE void vp9_flatmask5(uint32_t p4, uint32_t p3,
*flat2 = flat1;
}
#endif // #if HAVE_DSPR2
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index ca42090c1..e033fbb99 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -33,8 +33,8 @@ void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi) {
void vp9_free_frame_buffers(VP9_COMMON *cm) {
int i;
- for (i = 0; i < cm->fb_count; i++)
- vp9_free_frame_buffer(&cm->yv12_fb[i]);
+ for (i = 0; i < FRAME_BUFFERS; i++)
+ vp9_free_frame_buffer(&cm->frame_bufs[i].buf);
vp9_free_frame_buffer(&cm->post_proc_buffer);
@@ -85,7 +85,7 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) {
int mi_size;
if (vp9_realloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
- VP9_DEC_BORDER_IN_PIXELS, NULL, NULL, NULL) < 0)
+ VP9_DEC_BORDER_IN_PIXELS) < 0)
goto fail;
set_mb_mi(cm, aligned_width, aligned_height);
@@ -137,33 +137,21 @@ int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
const int ss_y = cm->subsampling_y;
int mi_size;
- if (cm->fb_count == 0) {
- cm->fb_count = FRAME_BUFFERS;
- CHECK_MEM_ERROR(cm, cm->yv12_fb,
- vpx_calloc(cm->fb_count, sizeof(*cm->yv12_fb)));
- CHECK_MEM_ERROR(cm, cm->fb_idx_ref_cnt,
- vpx_calloc(cm->fb_count, sizeof(*cm->fb_idx_ref_cnt)));
- if (cm->fb_lru) {
- CHECK_MEM_ERROR(cm, cm->fb_idx_ref_lru,
- vpx_calloc(cm->fb_count, sizeof(*cm->fb_idx_ref_lru)));
- }
- }
-
vp9_free_frame_buffers(cm);
- for (i = 0; i < cm->fb_count; i++) {
- cm->fb_idx_ref_cnt[i] = 0;
- if (vp9_alloc_frame_buffer(&cm->yv12_fb[i], width, height, ss_x, ss_y,
- VP9_ENC_BORDER_IN_PIXELS) < 0)
+ for (i = 0; i < FRAME_BUFFERS; i++) {
+ cm->frame_bufs[i].ref_count = 0;
+ if (vp9_alloc_frame_buffer(&cm->frame_bufs[i].buf, width, height,
+ ss_x, ss_y, VP9_ENC_BORDER_IN_PIXELS) < 0)
goto fail;
}
- cm->new_fb_idx = cm->fb_count - 1;
- cm->fb_idx_ref_cnt[cm->new_fb_idx] = 1;
+ cm->new_fb_idx = FRAME_BUFFERS - 1;
+ cm->frame_bufs[cm->new_fb_idx].ref_count = 1;
for (i = 0; i < REF_FRAMES; i++) {
cm->ref_frame_map[i] = i;
- cm->fb_idx_ref_cnt[i] = 1;
+ cm->frame_bufs[i].ref_count = 1;
}
if (vp9_alloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
@@ -211,14 +199,6 @@ void vp9_create_common(VP9_COMMON *cm) {
void vp9_remove_common(VP9_COMMON *cm) {
vp9_free_frame_buffers(cm);
-
- vpx_free(cm->yv12_fb);
- vpx_free(cm->fb_idx_ref_cnt);
- vpx_free(cm->fb_idx_ref_lru);
-
- cm->yv12_fb = NULL;
- cm->fb_idx_ref_cnt = NULL;
- cm->fb_idx_ref_lru = NULL;
}
void vp9_initialize_common() {
diff --git a/vp9/common/vp9_alloccommon.h b/vp9/common/vp9_alloccommon.h
index cf8dca573..e3b5b95d8 100644
--- a/vp9/common/vp9_alloccommon.h
+++ b/vp9/common/vp9_alloccommon.h
@@ -14,6 +14,10 @@
#include "vp9/common/vp9_onyxc_int.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
void vp9_initialize_common();
void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi);
@@ -28,4 +32,8 @@ void vp9_free_frame_buffers(VP9_COMMON *cm);
void vp9_update_frame_size(VP9_COMMON *cm);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_ALLOCCOMMON_H_
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index ad78b0dc4..49e336aa4 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -25,8 +25,12 @@
#include "vp9/common/vp9_scale.h"
#include "vp9/common/vp9_seg_common.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define BLOCK_SIZE_GROUPS 4
-#define MBSKIP_CONTEXTS 3
+#define SKIP_CONTEXTS 3
#define INTER_MODE_CONTEXTS 7
/* Segment Feature Masks */
@@ -131,7 +135,7 @@ typedef struct {
// Flags used for prediction status of various bit-stream signals
unsigned char seg_id_predicted;
- INTERPOLATION_TYPE interp_filter;
+ INTERP_FILTER interp_filter;
BLOCK_SIZE sb_type;
} MB_MODE_INFO;
@@ -248,7 +252,7 @@ typedef struct macroblockd {
/* Inverse transform function pointers. */
void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob);
- struct subpix_fn_table subpix;
+ const interp_kernel *interp_kernel;
int corrupted;
@@ -463,4 +467,8 @@ static int get_tx_eob(const struct segmentation *seg, int segment_id,
return vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
}
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_BLOCKD_H_
diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h
index 36d1cdf14..69964dae8 100644
--- a/vp9/common/vp9_common.h
+++ b/vp9/common/vp9_common.h
@@ -19,6 +19,10 @@
#include "vpx_mem/vpx_mem.h"
#include "vpx/vpx_integer.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
@@ -91,4 +95,8 @@ static int get_unsigned_bits(unsigned int num_values) {
#define VP9_FRAME_MARKER 0x2
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_COMMON_H_
diff --git a/vp9/common/vp9_common_data.h b/vp9/common/vp9_common_data.h
index 5222d29c1..f41962747 100644
--- a/vp9/common/vp9_common_data.h
+++ b/vp9/common/vp9_common_data.h
@@ -13,6 +13,10 @@
#include "vp9/common/vp9_enums.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
extern const int b_width_log2_lookup[BLOCK_SIZES];
extern const int b_height_log2_lookup[BLOCK_SIZES];
extern const int mi_width_log2_lookup[BLOCK_SIZES];
@@ -28,4 +32,8 @@ extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES];
extern const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES];
extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2];
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_COMMON_DATA_H_
diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c
index 6edf7eaca..b105a57bc 100644
--- a/vp9/common/vp9_convolve.c
+++ b/vp9/common/vp9_convolve.c
@@ -20,7 +20,7 @@
static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const subpel_kernel *x_filters,
+ const interp_kernel *x_filters,
int x0_q4, int x_step_q4, int w, int h) {
int x, y;
src -= SUBPEL_TAPS / 2 - 1;
@@ -42,7 +42,7 @@ static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const subpel_kernel *x_filters,
+ const interp_kernel *x_filters,
int x0_q4, int x_step_q4, int w, int h) {
int x, y;
src -= SUBPEL_TAPS / 2 - 1;
@@ -65,7 +65,7 @@ static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,
static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const subpel_kernel *y_filters,
+ const interp_kernel *y_filters,
int y0_q4, int y_step_q4, int w, int h) {
int x, y;
src -= src_stride * (SUBPEL_TAPS / 2 - 1);
@@ -88,7 +88,7 @@ static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const subpel_kernel *y_filters,
+ const interp_kernel *y_filters,
int y0_q4, int y_step_q4, int w, int h) {
int x, y;
src -= src_stride * (SUBPEL_TAPS / 2 - 1);
@@ -112,9 +112,9 @@ static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
static void convolve(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const subpel_kernel *const x_filters,
+ const interp_kernel *const x_filters,
int x0_q4, int x_step_q4,
- const subpel_kernel *const y_filters,
+ const interp_kernel *const y_filters,
int y0_q4, int y_step_q4,
int w, int h) {
// Fixed size intermediate buffer places limits on parameters.
@@ -138,14 +138,14 @@ static void convolve(const uint8_t *src, ptrdiff_t src_stride,
y_filters, y0_q4, y_step_q4, w, h);
}
-static const subpel_kernel *get_filter_base(const int16_t *filter) {
+static const interp_kernel *get_filter_base(const int16_t *filter) {
// NOTE: This assumes that the filter table is 256-byte aligned.
// TODO(agrange) Modify to make independent of table alignment.
- return (const subpel_kernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
+ return (const interp_kernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
}
-static int get_filter_offset(const int16_t *f, const subpel_kernel *base) {
- return (const subpel_kernel *)(intptr_t)f - base;
+static int get_filter_offset(const int16_t *f, const interp_kernel *base) {
+ return (const interp_kernel *)(intptr_t)f - base;
}
void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
@@ -153,7 +153,7 @@ void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
- const subpel_kernel *const filters_x = get_filter_base(filter_x);
+ const interp_kernel *const filters_x = get_filter_base(filter_x);
const int x0_q4 = get_filter_offset(filter_x, filters_x);
convolve_horiz(src, src_stride, dst, dst_stride, filters_x,
@@ -165,7 +165,7 @@ void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
- const subpel_kernel *const filters_x = get_filter_base(filter_x);
+ const interp_kernel *const filters_x = get_filter_base(filter_x);
const int x0_q4 = get_filter_offset(filter_x, filters_x);
convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x,
@@ -177,7 +177,7 @@ void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
- const subpel_kernel *const filters_y = get_filter_base(filter_y);
+ const interp_kernel *const filters_y = get_filter_base(filter_y);
const int y0_q4 = get_filter_offset(filter_y, filters_y);
convolve_vert(src, src_stride, dst, dst_stride, filters_y,
y0_q4, y_step_q4, w, h);
@@ -188,7 +188,7 @@ void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
- const subpel_kernel *const filters_y = get_filter_base(filter_y);
+ const interp_kernel *const filters_y = get_filter_base(filter_y);
const int y0_q4 = get_filter_offset(filter_y, filters_y);
convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y,
y0_q4, y_step_q4, w, h);
@@ -199,10 +199,10 @@ void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
- const subpel_kernel *const filters_x = get_filter_base(filter_x);
+ const interp_kernel *const filters_x = get_filter_base(filter_x);
const int x0_q4 = get_filter_offset(filter_x, filters_x);
- const subpel_kernel *const filters_y = get_filter_base(filter_y);
+ const interp_kernel *const filters_y = get_filter_base(filter_y);
const int y0_q4 = get_filter_offset(filter_y, filters_y);
convolve(src, src_stride, dst, dst_stride,
diff --git a/vp9/common/vp9_convolve.h b/vp9/common/vp9_convolve.h
index 29d499063..6bf71fc79 100644
--- a/vp9/common/vp9_convolve.h
+++ b/vp9/common/vp9_convolve.h
@@ -13,10 +13,18 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_CONVOLVE_H_
diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h
index ba162fd20..e030d92ec 100644
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h
@@ -18,6 +18,10 @@
#include "vp9/common/vp9_scan.h"
#include "vp9/common/vp9_entropymode.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define DIFF_UPDATE_PROB 252
// Coefficient token alphabet
@@ -184,4 +188,8 @@ static const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size,
}
}
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_ENTROPY_H_
diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c
index 83281b2ea..6def3c869 100644
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -303,7 +303,7 @@ void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p,
ct_8x8p[0][1] = tx_count_8x8p[TX_8X8];
}
-static const vp9_prob default_mbskip_probs[MBSKIP_CONTEXTS] = {
+static const vp9_prob default_skip_probs[SKIP_CONTEXTS] = {
192, 128, 64
};
@@ -325,7 +325,7 @@ void vp9_init_mbmode_probs(VP9_COMMON *cm) {
vp9_copy(cm->fc.comp_ref_prob, default_comp_ref_p);
vp9_copy(cm->fc.single_ref_prob, default_single_ref_p);
cm->fc.tx_probs = default_tx_probs;
- vp9_copy(cm->fc.mbskip_probs, default_mbskip_probs);
+ vp9_copy(cm->fc.skip_probs, default_skip_probs);
vp9_copy(cm->fc.inter_mode_probs, default_inter_mode_probs);
}
@@ -385,7 +385,7 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
adapt_probs(vp9_partition_tree, pre_fc->partition_prob[i],
counts->partition[i], fc->partition_prob[i]);
- if (cm->mcomp_filter_type == SWITCHABLE) {
+ if (cm->interp_filter == SWITCHABLE) {
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
adapt_probs(vp9_switchable_interp_tree, pre_fc->switchable_interp_prob[i],
counts->switchable_interp[i], fc->switchable_interp_prob[i]);
@@ -415,9 +415,8 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
}
}
- for (i = 0; i < MBSKIP_CONTEXTS; ++i)
- fc->mbskip_probs[i] = adapt_prob(pre_fc->mbskip_probs[i],
- counts->mbskip[i]);
+ for (i = 0; i < SKIP_CONTEXTS; ++i)
+ fc->skip_probs[i] = adapt_prob(pre_fc->skip_probs[i], counts->skip[i]);
}
static void set_default_lf_deltas(struct loopfilter *lf) {
diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h
index 5312553c7..deec3f652 100644
--- a/vp9/common/vp9_entropymode.h
+++ b/vp9/common/vp9_entropymode.h
@@ -13,12 +13,14 @@
#include "vp9/common/vp9_blockd.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define TX_SIZE_CONTEXTS 2
#define SWITCHABLE_FILTERS 3 // number of switchable filters
#define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1)
-// #define MODE_STATS
-
struct VP9Common;
struct tx_probs {
@@ -57,4 +59,8 @@ void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p,
void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p,
unsigned int (*ct_8x8p)[2]);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_ENTROPYMODE_H_
diff --git a/vp9/common/vp9_entropymv.h b/vp9/common/vp9_entropymv.h
index 48cb82db1..7e1f1479b 100644
--- a/vp9/common/vp9_entropymv.h
+++ b/vp9/common/vp9_entropymv.h
@@ -15,6 +15,10 @@
#include "./vpx_config.h"
#include "vp9/common/vp9_blockd.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
struct VP9Common;
void vp9_init_mv_probs(struct VP9Common *cm);
@@ -121,4 +125,8 @@ typedef struct {
void vp9_inc_mv(const MV *mv, nmv_context_counts *mvctx);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_ENTROPYMV_H_
diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h
index 34411a34f..e96e76947 100644
--- a/vp9/common/vp9_enums.h
+++ b/vp9/common/vp9_enums.h
@@ -13,6 +13,10 @@
#include "./vpx_config.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define MI_SIZE_LOG2 3
#define MI_BLOCK_SIZE_LOG2 (6 - MI_SIZE_LOG2) // 64 = 2^6
@@ -90,4 +94,8 @@ typedef enum {
SRGB = 7 // RGB
} COLOR_SPACE;
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_ENUMS_H_
diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c
index 79ace147c..dbde6d551 100644
--- a/vp9/common/vp9_filter.c
+++ b/vp9/common/vp9_filter.c
@@ -14,7 +14,7 @@
#include "vp9/common/vp9_filter.h"
-DECLARE_ALIGNED(256, const subpel_kernel,
+DECLARE_ALIGNED(256, const interp_kernel,
vp9_bilinear_filters[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0 },
{ 0, 0, 0, 120, 8, 0, 0, 0 },
@@ -35,7 +35,7 @@ DECLARE_ALIGNED(256, const subpel_kernel,
};
// Lagrangian interpolation filter
-DECLARE_ALIGNED(256, const subpel_kernel,
+DECLARE_ALIGNED(256, const interp_kernel,
vp9_sub_pel_filters_8[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0},
{ 0, 1, -5, 126, 8, -3, 1, 0},
@@ -56,7 +56,7 @@ DECLARE_ALIGNED(256, const subpel_kernel,
};
// DCT based filter
-DECLARE_ALIGNED(256, const subpel_kernel,
+DECLARE_ALIGNED(256, const interp_kernel,
vp9_sub_pel_filters_8s[SUBPEL_SHIFTS]) = {
{0, 0, 0, 128, 0, 0, 0, 0},
{-1, 3, -7, 127, 8, -3, 1, 0},
@@ -77,7 +77,7 @@ DECLARE_ALIGNED(256, const subpel_kernel,
};
// freqmultiplier = 0.5
-DECLARE_ALIGNED(256, const subpel_kernel,
+DECLARE_ALIGNED(256, const interp_kernel,
vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0},
{-3, -1, 32, 64, 38, 1, -3, 0},
@@ -98,14 +98,15 @@ DECLARE_ALIGNED(256, const subpel_kernel,
};
-static const subpel_kernel* vp9_filter_kernels[4] = {
+static const interp_kernel* vp9_filter_kernels[4] = {
vp9_sub_pel_filters_8,
vp9_sub_pel_filters_8lp,
vp9_sub_pel_filters_8s,
vp9_bilinear_filters
};
-const subpel_kernel *vp9_get_filter_kernel(INTERPOLATION_TYPE type) {
- return vp9_filter_kernels[type];
+const interp_kernel *vp9_get_interp_kernel(INTERP_FILTER filter) {
+ assert(filter != SWITCHABLE);
+ return vp9_filter_kernels[filter];
}
diff --git a/vp9/common/vp9_filter.h b/vp9/common/vp9_filter.h
index b1e7e6499..b611e304c 100644
--- a/vp9/common/vp9_filter.h
+++ b/vp9/common/vp9_filter.h
@@ -14,6 +14,10 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define FILTER_BITS 7
#define SUBPEL_BITS 4
@@ -27,25 +31,24 @@ typedef enum {
EIGHTTAP_SHARP = 2,
BILINEAR = 3,
SWITCHABLE = 4 /* should be the last one */
-} INTERPOLATION_TYPE;
-
-typedef int16_t subpel_kernel[SUBPEL_TAPS];
+} INTERP_FILTER;
-struct subpix_fn_table {
- const subpel_kernel *filter_x;
- const subpel_kernel *filter_y;
-};
+typedef int16_t interp_kernel[SUBPEL_TAPS];
-const subpel_kernel *vp9_get_filter_kernel(INTERPOLATION_TYPE type);
+const interp_kernel *vp9_get_interp_kernel(INTERP_FILTER filter);
-extern const subpel_kernel vp9_bilinear_filters[SUBPEL_SHIFTS];
-extern const subpel_kernel vp9_sub_pel_filters_8[SUBPEL_SHIFTS];
-extern const subpel_kernel vp9_sub_pel_filters_8s[SUBPEL_SHIFTS];
-extern const subpel_kernel vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS];
+extern const interp_kernel vp9_bilinear_filters[SUBPEL_SHIFTS];
+extern const interp_kernel vp9_sub_pel_filters_8[SUBPEL_SHIFTS];
+extern const interp_kernel vp9_sub_pel_filters_8s[SUBPEL_SHIFTS];
+extern const interp_kernel vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS];
// The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear
// filter kernel as a 2 tap filter.
#define BILINEAR_FILTERS_2TAP(x) \
(vp9_bilinear_filters[(x)] + SUBPEL_TAPS/2 - 1)
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_FILTER_H_
diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c
index 533f7f361..20b78bfed 100644
--- a/vp9/common/vp9_idct.c
+++ b/vp9/common/vp9_idct.c
@@ -96,7 +96,7 @@ void vp9_iwht4x4_1_add_c(const int16_t *in, uint8_t *dest, int dest_stride) {
}
}
-static void idct4_1d(const int16_t *input, int16_t *output) {
+static void idct4(const int16_t *input, int16_t *output) {
int16_t step[4];
int temp1, temp2;
// stage 1
@@ -124,7 +124,7 @@ void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) {
// Rows
for (i = 0; i < 4; ++i) {
- idct4_1d(input, outptr);
+ idct4(input, outptr);
input += 4;
outptr += 4;
}
@@ -133,7 +133,7 @@ void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) {
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j)
temp_in[j] = out[j * 4 + i];
- idct4_1d(temp_in, temp_out);
+ idct4(temp_in, temp_out);
for (j = 0; j < 4; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)
+ dest[j * stride + i]);
@@ -156,7 +156,7 @@ void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride) {
}
}
-static void idct8_1d(const int16_t *input, int16_t *output) {
+static void idct8(const int16_t *input, int16_t *output) {
int16_t step1[8], step2[8];
int temp1, temp2;
// stage 1
@@ -174,7 +174,7 @@ static void idct8_1d(const int16_t *input, int16_t *output) {
step1[6] = dct_const_round_shift(temp2);
// stage 2 & stage 3 - even half
- idct4_1d(step1, step1);
+ idct4(step1, step1);
// stage 2 - odd half
step2[4] = step1[4] + step1[5];
@@ -209,7 +209,7 @@ void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride) {
// First transform rows
for (i = 0; i < 8; ++i) {
- idct8_1d(input, outptr);
+ idct8(input, outptr);
input += 8;
outptr += 8;
}
@@ -218,7 +218,7 @@ void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride) {
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j)
temp_in[j] = out[j * 8 + i];
- idct8_1d(temp_in, temp_out);
+ idct8(temp_in, temp_out);
for (j = 0; j < 8; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
+ dest[j * stride + i]);
@@ -238,7 +238,7 @@ void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int stride) {
}
}
-static void iadst4_1d(const int16_t *input, int16_t *output) {
+static void iadst4(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7;
int x0 = input[0];
@@ -283,10 +283,10 @@ static void iadst4_1d(const int16_t *input, int16_t *output) {
void vp9_iht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride,
int tx_type) {
const transform_2d IHT_4[] = {
- { idct4_1d, idct4_1d }, // DCT_DCT = 0
- { iadst4_1d, idct4_1d }, // ADST_DCT = 1
- { idct4_1d, iadst4_1d }, // DCT_ADST = 2
- { iadst4_1d, iadst4_1d } // ADST_ADST = 3
+ { idct4, idct4 }, // DCT_DCT = 0
+ { iadst4, idct4 }, // ADST_DCT = 1
+ { idct4, iadst4 }, // DCT_ADST = 2
+ { iadst4, iadst4 } // ADST_ADST = 3
};
int i, j;
@@ -311,7 +311,7 @@ void vp9_iht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride,
+ dest[j * stride + i]);
}
}
-static void iadst8_1d(const int16_t *input, int16_t *output) {
+static void iadst8(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7;
int x0 = input[7];
@@ -389,10 +389,10 @@ static void iadst8_1d(const int16_t *input, int16_t *output) {
}
static const transform_2d IHT_8[] = {
- { idct8_1d, idct8_1d }, // DCT_DCT = 0
- { iadst8_1d, idct8_1d }, // ADST_DCT = 1
- { idct8_1d, iadst8_1d }, // DCT_ADST = 2
- { iadst8_1d, iadst8_1d } // ADST_ADST = 3
+ { idct8, idct8 }, // DCT_DCT = 0
+ { iadst8, idct8 }, // ADST_DCT = 1
+ { idct8, iadst8 }, // DCT_ADST = 2
+ { iadst8, iadst8 } // ADST_ADST = 3
};
void vp9_iht8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride,
@@ -430,7 +430,7 @@ void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
// First transform rows
// only first 4 row has non-zero coefs
for (i = 0; i < 4; ++i) {
- idct8_1d(input, outptr);
+ idct8(input, outptr);
input += 8;
outptr += 8;
}
@@ -439,14 +439,14 @@ void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j)
temp_in[j] = out[j * 8 + i];
- idct8_1d(temp_in, temp_out);
+ idct8(temp_in, temp_out);
for (j = 0; j < 8; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
+ dest[j * stride + i]);
}
}
-static void idct16_1d(const int16_t *input, int16_t *output) {
+static void idct16(const int16_t *input, int16_t *output) {
int16_t step1[16], step2[16];
int temp1, temp2;
@@ -619,7 +619,7 @@ void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride) {
// First transform rows
for (i = 0; i < 16; ++i) {
- idct16_1d(input, outptr);
+ idct16(input, outptr);
input += 16;
outptr += 16;
}
@@ -628,14 +628,14 @@ void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride) {
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
temp_in[j] = out[j * 16 + i];
- idct16_1d(temp_in, temp_out);
+ idct16(temp_in, temp_out);
for (j = 0; j < 16; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ dest[j * stride + i]);
}
}
-static void iadst16_1d(const int16_t *input, int16_t *output) {
+static void iadst16(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;
int x0 = input[15];
@@ -807,10 +807,10 @@ static void iadst16_1d(const int16_t *input, int16_t *output) {
}
static const transform_2d IHT_16[] = {
- { idct16_1d, idct16_1d }, // DCT_DCT = 0
- { iadst16_1d, idct16_1d }, // ADST_DCT = 1
- { idct16_1d, iadst16_1d }, // DCT_ADST = 2
- { iadst16_1d, iadst16_1d } // ADST_ADST = 3
+ { idct16, idct16 }, // DCT_DCT = 0
+ { iadst16, idct16 }, // ADST_DCT = 1
+ { idct16, iadst16 }, // DCT_ADST = 2
+ { iadst16, iadst16 } // ADST_ADST = 3
};
void vp9_iht16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride,
@@ -848,7 +848,7 @@ void vp9_idct16x16_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
// First transform rows. Since all non-zero dct coefficients are in
// upper-left 4x4 area, we only need to calculate first 4 rows here.
for (i = 0; i < 4; ++i) {
- idct16_1d(input, outptr);
+ idct16(input, outptr);
input += 16;
outptr += 16;
}
@@ -857,7 +857,7 @@ void vp9_idct16x16_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
temp_in[j] = out[j*16 + i];
- idct16_1d(temp_in, temp_out);
+ idct16(temp_in, temp_out);
for (j = 0; j < 16; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ dest[j * stride + i]);
@@ -877,7 +877,7 @@ void vp9_idct16x16_1_add_c(const int16_t *input, uint8_t *dest, int stride) {
}
}
-static void idct32_1d(const int16_t *input, int16_t *output) {
+static void idct32(const int16_t *input, int16_t *output) {
int16_t step1[32], step2[32];
int temp1, temp2;
@@ -1263,7 +1263,7 @@ void vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int stride) {
zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
if (zero_coeff[0] | zero_coeff[1])
- idct32_1d(input, outptr);
+ idct32(input, outptr);
else
vpx_memset(outptr, 0, sizeof(int16_t) * 32);
input += 32;
@@ -1274,7 +1274,7 @@ void vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int stride) {
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j)
temp_in[j] = out[j * 32 + i];
- idct32_1d(temp_in, temp_out);
+ idct32(temp_in, temp_out);
for (j = 0; j < 32; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ dest[j * stride + i]);
@@ -1290,7 +1290,7 @@ void vp9_idct32x32_34_add_c(const int16_t *input, uint8_t *dest, int stride) {
// Rows
// only upper-left 8x8 has non-zero coeff
for (i = 0; i < 8; ++i) {
- idct32_1d(input, outptr);
+ idct32(input, outptr);
input += 32;
outptr += 32;
}
@@ -1299,7 +1299,7 @@ void vp9_idct32x32_34_add_c(const int16_t *input, uint8_t *dest, int stride) {
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j)
temp_in[j] = out[j * 32 + i];
- idct32_1d(temp_in, temp_out);
+ idct32(temp_in, temp_out);
for (j = 0; j < 32; ++j)
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ dest[j * stride + i]);
diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h
index 183c50abf..ceca7951b 100644
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -18,6 +18,10 @@
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_enums.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
// Constants and Macros used by all idct/dct functions
#define DCT_CONST_BITS 14
@@ -103,4 +107,8 @@ void vp9_iht16x16_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
int stride, int eob);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_IDCT_H_
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index 2266e0ec2..dd304c909 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -16,26 +16,6 @@
#include "vp9/common/vp9_seg_common.h"
-// This structure holds bit masks for all 8x8 blocks in a 64x64 region.
-// Each 1 bit represents a position in which we want to apply the loop filter.
-// Left_ entries refer to whether we apply a filter on the border to the
-// left of the block. Above_ entries refer to whether or not to apply a
-// filter on the above border. Int_ entries refer to whether or not to
-// apply borders on the 4x4 edges within the 8x8 block that each bit
-// represents.
-// Since each transform is accompanied by a potentially different type of
-// loop filter there is a different entry in the array for each transform size.
-typedef struct {
- uint64_t left_y[TX_SIZES];
- uint64_t above_y[TX_SIZES];
- uint64_t int_4x4_y;
- uint16_t left_uv[TX_SIZES];
- uint16_t above_uv[TX_SIZES];
- uint16_t int_4x4_uv;
- uint8_t lfl_y[64];
- uint8_t lfl_uv[16];
-} LOOP_FILTER_MASK;
-
// 64 bit masks for left transform size. Each 1 represents a position where
// we should apply a loop filter across the left border of an 8x8 block
// boundary.
@@ -638,9 +618,9 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n,
// This function sets up the bit masks for the entire 64x64 region represented
// by mi_row, mi_col.
// TODO(JBB): This function only works for yv12.
-static void setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
- MODE_INFO **mi_8x8, const int mode_info_stride,
- LOOP_FILTER_MASK *lfm) {
+void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
+ MODE_INFO **mi_8x8, const int mode_info_stride,
+ LOOP_FILTER_MASK *lfm) {
int idx_32, idx_16, idx_8;
const loop_filter_info_n *const lfi_n = &cm->lf_info;
MODE_INFO **mip = mi_8x8;
@@ -1069,10 +1049,10 @@ static void filter_block_plane_non420(VP9_COMMON *cm,
}
#endif
-static void filter_block_plane(VP9_COMMON *const cm,
- struct macroblockd_plane *const plane,
- int mi_row,
- LOOP_FILTER_MASK *lfm) {
+void vp9_filter_block_plane(VP9_COMMON *const cm,
+ struct macroblockd_plane *const plane,
+ int mi_row,
+ LOOP_FILTER_MASK *lfm) {
struct buf_2d *const dst = &plane->dst;
uint8_t* const dst0 = dst->buf;
int r, c;
@@ -1244,14 +1224,14 @@ void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer,
#if CONFIG_NON420
if (use_420)
#endif
- setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mode_info_stride,
- &lfm);
+ vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col,
+ cm->mode_info_stride, &lfm);
for (plane = 0; plane < num_planes; ++plane) {
#if CONFIG_NON420
if (use_420)
#endif
- filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);
+ vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);
#if CONFIG_NON420
else
filter_block_plane_non420(cm, &xd->plane[plane], mi_8x8 + mi_col,
diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h
index 98fac96ff..668e898cf 100644
--- a/vp9/common/vp9_loopfilter.h
+++ b/vp9/common/vp9_loopfilter.h
@@ -17,6 +17,10 @@
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_seg_common.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define MAX_LOOP_FILTER 63
#define MAX_SHARPNESS 7
@@ -56,9 +60,42 @@ typedef struct {
uint8_t lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS];
} loop_filter_info_n;
+// This structure holds bit masks for all 8x8 blocks in a 64x64 region.
+// Each 1 bit represents a position in which we want to apply the loop filter.
+// Left_ entries refer to whether we apply a filter on the border to the
+// left of the block. Above_ entries refer to whether or not to apply a
+// filter on the above border. Int_ entries refer to whether or not to
+// apply borders on the 4x4 edges within the 8x8 block that each bit
+// represents.
+// Since each transform is accompanied by a potentially different type of
+// loop filter there is a different entry in the array for each transform size.
+typedef struct {
+ uint64_t left_y[TX_SIZES];
+ uint64_t above_y[TX_SIZES];
+ uint64_t int_4x4_y;
+ uint16_t left_uv[TX_SIZES];
+ uint16_t above_uv[TX_SIZES];
+ uint16_t int_4x4_uv;
+ uint8_t lfl_y[64];
+ uint8_t lfl_uv[16];
+} LOOP_FILTER_MASK;
+
/* assorted loopfilter functions which get used elsewhere */
struct VP9Common;
struct macroblockd;
+struct VP9LfSyncData;
+
+// This function sets up the bit masks for the entire 64x64 region represented
+// by mi_row, mi_col.
+void vp9_setup_mask(struct VP9Common *const cm,
+ const int mi_row, const int mi_col,
+ MODE_INFO **mi_8x8, const int mode_info_stride,
+ LOOP_FILTER_MASK *lfm);
+
+void vp9_filter_block_plane(struct VP9Common *const cm,
+ struct macroblockd_plane *const plane,
+ int mi_row,
+ LOOP_FILTER_MASK *lfm);
void vp9_loop_filter_init(struct VP9Common *cm);
@@ -86,8 +123,15 @@ typedef struct LoopFilterWorkerData {
int start;
int stop;
int y_only;
+
+ struct VP9LfSyncData *lf_sync;
+ int num_lf_workers;
} LFWorkerData;
// Operates on the rows described by LFWorkerData passed as 'arg1'.
int vp9_loop_filter_worker(void *arg1, void *arg2);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_LOOPFILTER_H_
diff --git a/vp9/common/vp9_mv.h b/vp9/common/vp9_mv.h
index 155c3f12e..98fd1d82f 100644
--- a/vp9/common/vp9_mv.h
+++ b/vp9/common/vp9_mv.h
@@ -15,6 +15,10 @@
#include "vp9/common/vp9_common.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
typedef struct mv {
int16_t row;
int16_t col;
@@ -36,4 +40,8 @@ static void clamp_mv(MV *mv, int min_col, int max_col,
mv->row = clamp(mv->row, min_row, max_row);
}
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_MV_H_
diff --git a/vp9/common/vp9_mvref_common.h b/vp9/common/vp9_mvref_common.h
index cd89390d5..0936abfcd 100644
--- a/vp9/common/vp9_mvref_common.h
+++ b/vp9/common/vp9_mvref_common.h
@@ -7,12 +7,16 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
+#ifndef VP9_COMMON_VP9_MVREF_COMMON_H_
+#define VP9_COMMON_VP9_MVREF_COMMON_H_
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_blockd.h"
-#ifndef VP9_COMMON_VP9_MVREF_COMMON_H_
-#define VP9_COMMON_VP9_MVREF_COMMON_H_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
void vp9_find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
const TileInfo *const tile,
@@ -56,4 +60,8 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
int block, int ref, int mi_row, int mi_col,
int_mv *nearest, int_mv *near);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_MVREF_COMMON_H_
diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h
index 45d798482..564e4195f 100644
--- a/vp9/common/vp9_onyx.h
+++ b/vp9/common/vp9_onyx.h
@@ -11,17 +11,16 @@
#ifndef VP9_COMMON_VP9_ONYX_H_
#define VP9_COMMON_VP9_ONYX_H_
-#ifdef __cplusplus
-extern "C"
-{ // NOLINT
-#endif
-
#include "./vpx_config.h"
#include "vpx/internal/vpx_codec_internal.h"
#include "vpx/vp8cx.h"
#include "vpx_scale/yv12config.h"
#include "vp9/common/vp9_ppflags.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define MAX_SEGMENTS 8
typedef int *VP9_PTR;
@@ -56,6 +55,7 @@ extern "C"
MODE_FIRSTPASS = 0x3,
MODE_SECONDPASS = 0x4,
MODE_SECONDPASS_BEST = 0x5,
+ MODE_REALTIME = 0x6,
} MODE;
typedef enum {
@@ -237,7 +237,7 @@ extern "C"
int vp9_get_quantizer(VP9_PTR c);
#ifdef __cplusplus
-}
+} // extern "C"
#endif
#endif // VP9_COMMON_VP9_ONYX_H_
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index f6fe4d3f1..d92a25b12 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -25,6 +25,10 @@
#include "vp9/common/vp9_postproc.h"
#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define REFS_PER_FRAME 3
#define REF_FRAMES_LOG2 3
@@ -56,7 +60,7 @@ typedef struct frame_contexts {
vp9_prob single_ref_prob[REF_CONTEXTS][2];
vp9_prob comp_ref_prob[REF_CONTEXTS];
struct tx_probs tx_probs;
- vp9_prob mbskip_probs[MBSKIP_CONTEXTS];
+ vp9_prob skip_probs[SKIP_CONTEXTS];
nmv_context nmvc;
} FRAME_CONTEXT;
@@ -75,7 +79,7 @@ typedef struct {
unsigned int single_ref[REF_CONTEXTS][2][2];
unsigned int comp_ref[REF_CONTEXTS][2];
struct tx_counts tx;
- unsigned int mbskip[MBSKIP_CONTEXTS][2];
+ unsigned int skip[SKIP_CONTEXTS][2];
nmv_context_counts mv;
} FRAME_COUNTS;
@@ -87,6 +91,12 @@ typedef enum {
REFERENCE_MODES = 3,
} REFERENCE_MODE;
+
+typedef struct {
+ int ref_count;
+ YV12_BUFFER_CONFIG buf;
+} RefCntBuffer;
+
typedef struct VP9Common {
struct vpx_internal_error_info error;
@@ -113,8 +123,8 @@ typedef struct VP9Common {
YV12_BUFFER_CONFIG *frame_to_show;
- YV12_BUFFER_CONFIG *yv12_fb;
- int *fb_idx_ref_cnt; /* reference counts */
+ RefCntBuffer frame_bufs[FRAME_BUFFERS];
+
int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */
// TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and
@@ -180,7 +190,7 @@ typedef struct VP9Common {
// Persistent mb segment id map used in prediction.
unsigned char *last_frame_seg_map;
- INTERPOLATION_TYPE mcomp_filter_type;
+ INTERP_FILTER interp_filter;
loop_filter_info_n lf_info;
@@ -213,55 +223,32 @@ typedef struct VP9Common {
int frame_parallel_decoding_mode;
int log2_tile_cols, log2_tile_rows;
-
- vpx_codec_frame_buffer_t *fb_list; // External frame buffers
- int fb_count; // Total number of frame buffers
- vpx_realloc_frame_buffer_cb_fn_t realloc_fb_cb;
- void *user_priv; // Private data associated with the external frame buffers.
-
- int fb_lru; // Flag telling if lru is on/off
- uint32_t *fb_idx_ref_lru; // Frame buffer lru cache
- uint32_t fb_idx_ref_lru_count;
} VP9_COMMON;
static YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) {
- return &cm->yv12_fb[cm->new_fb_idx];
+ return &cm->frame_bufs[cm->new_fb_idx].buf;
}
static int get_free_fb(VP9_COMMON *cm) {
int i;
- uint32_t lru_count = cm->fb_idx_ref_lru_count + 1;
- int free_buffer_idx = cm->fb_count;
- for (i = 0; i < cm->fb_count; i++) {
- if (!cm->fb_lru) {
- if (cm->fb_idx_ref_cnt[i] == 0) {
- free_buffer_idx = i;
- break;
- }
- } else {
- if (cm->fb_idx_ref_cnt[i] == 0 && cm->fb_idx_ref_lru[i] < lru_count) {
- free_buffer_idx = i;
- lru_count = cm->fb_idx_ref_lru[i];
- }
- }
- }
+ for (i = 0; i < FRAME_BUFFERS; i++)
+ if (cm->frame_bufs[i].ref_count == 0)
+ break;
- assert(free_buffer_idx < cm->fb_count);
- cm->fb_idx_ref_cnt[free_buffer_idx] = 1;
- if (cm->fb_lru)
- cm->fb_idx_ref_lru[free_buffer_idx] = ++cm->fb_idx_ref_lru_count;
- return free_buffer_idx;
+ assert(i < FRAME_BUFFERS);
+ cm->frame_bufs[i].ref_count = 1;
+ return i;
}
-static void ref_cnt_fb(int *buf, int *idx, int new_idx) {
+static void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) {
const int ref_index = *idx;
- if (ref_index >= 0 && buf[ref_index] > 0)
- buf[ref_index]--;
+ if (ref_index >= 0 && bufs[ref_index].ref_count > 0)
+ bufs[ref_index].ref_count--;
*idx = new_idx;
- buf[new_idx]++;
+ bufs[new_idx].ref_count++;
}
static int mi_cols_aligned_to_sb(int n_mis) {
@@ -359,4 +346,8 @@ static INLINE int partition_plane_context(
return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
}
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_ONYXC_INT_H_
diff --git a/vp9/common/vp9_postproc.h b/vp9/common/vp9_postproc.h
index b8a456fdb..b07d5d045 100644
--- a/vp9/common/vp9_postproc.h
+++ b/vp9/common/vp9_postproc.h
@@ -15,6 +15,10 @@
#include "vpx_ports/mem.h"
#include "vp9/common/vp9_ppflags.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
struct postproc_state {
int last_q;
int last_noise;
@@ -33,4 +37,8 @@ void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q);
void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_POSTPROC_H_
diff --git a/vp9/common/vp9_ppflags.h b/vp9/common/vp9_ppflags.h
index 561c93028..8168935fc 100644
--- a/vp9/common/vp9_ppflags.h
+++ b/vp9/common/vp9_ppflags.h
@@ -11,6 +11,10 @@
#ifndef VP9_COMMON_VP9_PPFLAGS_H_
#define VP9_COMMON_VP9_PPFLAGS_H_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
enum {
VP9D_NOFILTERING = 0,
VP9D_DEBLOCK = 1 << 0,
@@ -35,4 +39,8 @@ typedef struct {
int display_mv_flag;
} vp9_ppflags_t;
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_PPFLAGS_H_
diff --git a/vp9/common/vp9_pragmas.h b/vp9/common/vp9_pragmas.h
index f079161d6..0efc713ca 100644
--- a/vp9/common/vp9_pragmas.h
+++ b/vp9/common/vp9_pragmas.h
@@ -11,6 +11,10 @@
#ifndef VP9_COMMON_VP9_PRAGMAS_H_
#define VP9_COMMON_VP9_PRAGMAS_H_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#ifdef __INTEL_COMPILER
#pragma warning(disable:997 1011 170)
#endif
@@ -19,4 +23,8 @@
#pragma warning(disable:4799)
#endif
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_PRAGMAS_H_
diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h
index 23722ba72..0acee32f8 100644
--- a/vp9/common/vp9_pred_common.h
+++ b/vp9/common/vp9_pred_common.h
@@ -14,6 +14,10 @@
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_onyxc_int.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
static INLINE const MODE_INFO *get_above_mi(const MACROBLOCKD *const xd) {
return xd->up_available ? xd->mi_8x8[-xd->mode_info_stride] : NULL;
}
@@ -50,7 +54,7 @@ static INLINE int vp9_get_skip_context(const MACROBLOCKD *xd) {
static INLINE vp9_prob vp9_get_skip_prob(const VP9_COMMON *cm,
const MACROBLOCKD *xd) {
- return cm->fc.mbskip_probs[vp9_get_skip_context(xd)];
+ return cm->fc.skip_probs[vp9_get_skip_context(xd)];
}
int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd);
@@ -129,4 +133,8 @@ static unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx,
}
}
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_PRED_COMMON_H_
diff --git a/vp9/common/vp9_prob.h b/vp9/common/vp9_prob.h
index 7a790c542..cc8d8ab38 100644
--- a/vp9/common/vp9_prob.h
+++ b/vp9/common/vp9_prob.h
@@ -18,6 +18,10 @@
#include "vp9/common/vp9_common.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
typedef uint8_t vp9_prob;
#define MAX_PROB 255
@@ -109,4 +113,8 @@ static void tree_merge_probs(const vp9_tree_index *tree,
DECLARE_ALIGNED(16, extern const uint8_t, vp9_norm[256]);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_PROB_H_
diff --git a/vp9/common/vp9_quant_common.h b/vp9/common/vp9_quant_common.h
index 83f2fb655..af50e23cd 100644
--- a/vp9/common/vp9_quant_common.h
+++ b/vp9/common/vp9_quant_common.h
@@ -13,6 +13,10 @@
#include "vp9/common/vp9_blockd.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define MINQ 0
#define MAXQ 255
#define QINDEX_RANGE (MAXQ - MINQ + 1)
@@ -25,4 +29,8 @@ int16_t vp9_ac_quant(int qindex, int delta);
int vp9_get_qindex(struct segmentation *seg, int segment_id, int base_qindex);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_QUANT_COMMON_H_
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index b5a9248c3..d554cc0ed 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -69,13 +69,11 @@ static void inter_predictor(const uint8_t *src, int src_stride,
const int subpel_y,
const struct scale_factors *sf,
int w, int h, int ref,
- const struct subpix_fn_table *subpix,
+ const interp_kernel *kernel,
int xs, int ys) {
sf->predict[subpel_x != 0][subpel_y != 0][ref](
src, src_stride, dst, dst_stride,
- subpix->filter_x[subpel_x], xs,
- subpix->filter_y[subpel_y], ys,
- w, h);
+ kernel[subpel_x], xs, kernel[subpel_y], ys, w, h);
}
void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
@@ -83,7 +81,7 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
const MV *src_mv,
const struct scale_factors *sf,
int w, int h, int ref,
- const struct subpix_fn_table *subpix,
+ const interp_kernel *kernel,
enum mv_precision precision,
int x, int y) {
const int is_q4 = precision == MV_PRECISION_Q4;
@@ -96,7 +94,7 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
- sf, w, h, ref, subpix, sf->x_step_q4, sf->y_step_q4);
+ sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4);
}
static INLINE int round_mv_comp_q4(int value) {
@@ -198,7 +196,8 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
+ (scaled_mv.col >> SUBPEL_BITS);
inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
- subpel_x, subpel_y, sf, w, h, ref, &xd->subpix, xs, ys);
+ subpel_x, subpel_y, sf, w, h, ref, xd->interp_kernel,
+ xs, ys);
}
}
@@ -367,7 +366,7 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
}
inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
- subpel_y, sf, w, h, ref, &xd->subpix, xs, ys);
+ subpel_y, sf, w, h, ref, xd->interp_kernel, xs, ys);
}
}
diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h
index 3cc16d94e..3345d83e8 100644
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@@ -14,7 +14,10 @@
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_onyxc_int.h"
-struct subpix_fn_table;
+#ifdef __cplusplus
+extern "C" {
+#endif
+
void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col,
BLOCK_SIZE bsize);
@@ -32,7 +35,7 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
const MV *mv_q3,
const struct scale_factors *sf,
int w, int h, int do_avg,
- const struct subpix_fn_table *subpix,
+ const interp_kernel *kernel,
enum mv_precision precision,
int x, int y);
@@ -90,10 +93,8 @@ static void setup_pre_planes(MACROBLOCKD *xd, int idx,
}
}
-static void set_scale_factors(VP9_COMMON *cm, MACROBLOCKD *xd,
- int ref0, int ref1) {
- xd->block_refs[0] = &cm->frame_refs[ref0 >= 0 ? ref0 : 0];
- xd->block_refs[1] = &cm->frame_refs[ref1 >= 0 ? ref1 : 0];
-}
+#ifdef __cplusplus
+} // extern "C"
+#endif
#endif // VP9_COMMON_VP9_RECONINTER_H_
diff --git a/vp9/common/vp9_reconintra.h b/vp9/common/vp9_reconintra.h
index fc916fcf3..800736d30 100644
--- a/vp9/common/vp9_reconintra.h
+++ b/vp9/common/vp9_reconintra.h
@@ -14,9 +14,17 @@
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_blockd.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in,
TX_SIZE tx_size, int mode,
const uint8_t *ref, int ref_stride,
uint8_t *dst, int dst_stride,
int aoff, int loff, int plane);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_RECONINTRA_H_
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index e384032f4..04a40bd58 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -135,7 +135,7 @@ prototype void vp9_v_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const ui
specialize vp9_v_predictor_16x16 $sse2_x86inc neon
prototype void vp9_tm_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_tm_predictor_16x16 $sse2_x86inc
+specialize vp9_tm_predictor_16x16 $sse2_x86inc neon
prototype void vp9_dc_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
specialize vp9_dc_predictor_16x16 $sse2_x86inc dspr2
@@ -174,7 +174,7 @@ prototype void vp9_v_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const ui
specialize vp9_v_predictor_32x32 $sse2_x86inc neon
prototype void vp9_tm_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_tm_predictor_32x32 $sse2_x86_64
+specialize vp9_tm_predictor_32x32 $sse2_x86_64 neon
prototype void vp9_dc_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
specialize vp9_dc_predictor_32x32 $sse2_x86inc
diff --git a/vp9/common/vp9_scale.h b/vp9/common/vp9_scale.h
index 55b4d8888..90b0d0bf9 100644
--- a/vp9/common/vp9_scale.h
+++ b/vp9/common/vp9_scale.h
@@ -14,6 +14,10 @@
#include "vp9/common/vp9_mv.h"
#include "vp9/common/vp9_convolve.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define REF_SCALE_SHIFT 14
#define REF_NO_SCALE (1 << REF_SCALE_SHIFT)
#define REF_INVALID_SCALE -1
@@ -46,4 +50,8 @@ static int vp9_is_scaled(const struct scale_factors *sf) {
sf->y_scale_fp != REF_NO_SCALE;
}
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_SCALE_H_
diff --git a/vp9/common/vp9_scan.h b/vp9/common/vp9_scan.h
index efab48bfc..9613b675c 100644
--- a/vp9/common/vp9_scan.h
+++ b/vp9/common/vp9_scan.h
@@ -17,6 +17,10 @@
#include "vp9/common/vp9_enums.h"
#include "vp9/common/vp9_blockd.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define MAX_NEIGHBORS 2
void vp9_init_neighbors();
@@ -36,4 +40,8 @@ static INLINE int get_coef_context(const int16_t *neighbors,
token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1;
}
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_SCAN_H_
diff --git a/vp9/common/vp9_seg_common.h b/vp9/common/vp9_seg_common.h
index 8ff54fb73..ff2d66a36 100644
--- a/vp9/common/vp9_seg_common.h
+++ b/vp9/common/vp9_seg_common.h
@@ -13,6 +13,10 @@
#include "vp9/common/vp9_prob.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define SEGMENT_DELTADATA 0
#define SEGMENT_ABSDATA 1
@@ -70,5 +74,9 @@ int vp9_get_segdata(const struct segmentation *seg,
extern const vp9_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)];
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_SEG_COMMON_H_
diff --git a/vp9/common/vp9_systemdependent.h b/vp9/common/vp9_systemdependent.h
index 6f955ab56..ee9a4823b 100644
--- a/vp9/common/vp9_systemdependent.h
+++ b/vp9/common/vp9_systemdependent.h
@@ -11,6 +11,10 @@
#ifndef VP9_COMMON_VP9_SYSTEMDEPENDENT_H_
#define VP9_COMMON_VP9_SYSTEMDEPENDENT_H_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#ifdef _MSC_VER
#include <math.h>
#define snprintf _snprintf
@@ -72,4 +76,8 @@ static INLINE int get_msb(unsigned int n) {
struct VP9Common;
void vp9_machine_specific_config(struct VP9Common *cm);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_SYSTEMDEPENDENT_H_
diff --git a/vp9/common/vp9_textblit.h b/vp9/common/vp9_textblit.h
index c968628fe..158ec1b37 100644
--- a/vp9/common/vp9_textblit.h
+++ b/vp9/common/vp9_textblit.h
@@ -11,9 +11,17 @@
#ifndef VP9_COMMON_VP9_TEXTBLIT_H_
#define VP9_COMMON_VP9_TEXTBLIT_H_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
void vp9_blit_text(const char *msg, unsigned char *address, int pitch);
void vp9_blit_line(int x0, int x1, int y0, int y1, unsigned char *image,
int pitch);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_TEXTBLIT_H_
diff --git a/vp9/common/vp9_tile_common.h b/vp9/common/vp9_tile_common.h
index a09876e4b..a97719e29 100644
--- a/vp9/common/vp9_tile_common.h
+++ b/vp9/common/vp9_tile_common.h
@@ -11,6 +11,10 @@
#ifndef VP9_COMMON_VP9_TILE_COMMON_H_
#define VP9_COMMON_VP9_TILE_COMMON_H_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
struct VP9Common;
typedef struct TileInfo {
@@ -26,4 +30,8 @@ void vp9_tile_init(TileInfo *tile, const struct VP9Common *cm,
void vp9_get_tile_n_bits(int mi_cols,
int *min_log2_tile_cols, int *max_log2_tile_cols);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_VP9_TILE_COMMON_H_
diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c
index f95423678..8a2297feb 100644
--- a/vp9/common/x86/vp9_asm_stubs.c
+++ b/vp9/common/x86/vp9_asm_stubs.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@@ -23,6 +23,68 @@ typedef void filter8_1dfunction (
const short *filter
);
+#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
+void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \
+ uint8_t *dst, ptrdiff_t dst_stride, \
+ const int16_t *filter_x, int x_step_q4, \
+ const int16_t *filter_y, int y_step_q4, \
+ int w, int h) { \
+ if (step_q4 == 16 && filter[3] != 128) { \
+ while (w >= 16) { \
+ vp9_filter_block1d16_##dir##8_##avg##opt(src_start, src_stride, \
+ dst, dst_stride, \
+ h, filter); \
+ src += 16; \
+ dst += 16; \
+ w -= 16; \
+ } \
+ while (w >= 8) { \
+ vp9_filter_block1d8_##dir##8_##avg##opt(src_start, src_stride, \
+ dst, dst_stride, \
+ h, filter); \
+ src += 8; \
+ dst += 8; \
+ w -= 8; \
+ } \
+ while (w >= 4) { \
+ vp9_filter_block1d4_##dir##8_##avg##opt(src_start, src_stride, \
+ dst, dst_stride, \
+ h, filter); \
+ src += 4; \
+ dst += 4; \
+ w -= 4; \
+ } \
+ } \
+ if (w) { \
+ vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \
+ filter_x, x_step_q4, filter_y, y_step_q4, \
+ w, h); \
+ } \
+}
+
+#define FUN_CONV_2D(avg, opt) \
+void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
+ uint8_t *dst, ptrdiff_t dst_stride, \
+ const int16_t *filter_x, int x_step_q4, \
+ const int16_t *filter_y, int y_step_q4, \
+ int w, int h) { \
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71); \
+ \
+ assert(w <= 64); \
+ assert(h <= 64); \
+ if (x_step_q4 == 16 && y_step_q4 == 16) { \
+ vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
+ filter_x, x_step_q4, filter_y, y_step_q4, \
+ w, h + 7); \
+ vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
+ filter_x, x_step_q4, filter_y, y_step_q4, \
+ w, h); \
+ } else { \
+ vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
+ filter_x, x_step_q4, filter_y, y_step_q4, w, h); \
+ } \
+}
+
#if HAVE_SSSE3
filter8_1dfunction vp9_filter_block1d16_v8_ssse3;
filter8_1dfunction vp9_filter_block1d16_h8_ssse3;
@@ -37,201 +99,44 @@ filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3;
filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3;
filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3;
-void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- /* Ensure the filter can be compressed to int16_t. */
- if (x_step_q4 == 16 && filter_x[3] != 128) {
- while (w >= 16) {
- vp9_filter_block1d16_h8_ssse3(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 16;
- dst += 16;
- w -= 16;
- }
- while (w >= 8) {
- vp9_filter_block1d8_h8_ssse3(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 8;
- dst += 8;
- w -= 8;
- }
- while (w >= 4) {
- vp9_filter_block1d4_h8_ssse3(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 4;
- dst += 4;
- w -= 4;
- }
- }
- if (w) {
- vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
- }
-}
-
-void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- if (y_step_q4 == 16 && filter_y[3] != 128) {
- while (w >= 16) {
- vp9_filter_block1d16_v8_ssse3(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 16;
- dst += 16;
- w -= 16;
- }
- while (w >= 8) {
- vp9_filter_block1d8_v8_ssse3(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 8;
- dst += 8;
- w -= 8;
- }
- while (w >= 4) {
- vp9_filter_block1d4_v8_ssse3(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 4;
- dst += 4;
- w -= 4;
- }
- }
- if (w) {
- vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
- }
-}
-
-void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- if (x_step_q4 == 16 && filter_x[3] != 128) {
- while (w >= 16) {
- vp9_filter_block1d16_h8_avg_ssse3(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 16;
- dst += 16;
- w -= 16;
- }
- while (w >= 8) {
- vp9_filter_block1d8_h8_avg_ssse3(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 8;
- dst += 8;
- w -= 8;
- }
- while (w >= 4) {
- vp9_filter_block1d4_h8_avg_ssse3(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 4;
- dst += 4;
- w -= 4;
- }
- }
- if (w) {
- vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
- }
-}
-
-void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- if (y_step_q4 == 16 && filter_y[3] != 128) {
- while (w >= 16) {
- vp9_filter_block1d16_v8_avg_ssse3(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 16;
- dst += 16;
- w -= 16;
- }
- while (w >= 8) {
- vp9_filter_block1d8_v8_avg_ssse3(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 8;
- dst += 8;
- w -= 8;
- }
- while (w >= 4) {
- vp9_filter_block1d4_v8_avg_ssse3(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 4;
- dst += 4;
- w -= 4;
- }
- }
- if (w) {
- vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
- }
-}
-
-void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71);
-
- assert(w <= 64);
- assert(h <= 64);
- if (x_step_q4 == 16 && y_step_q4 == 16) {
- vp9_convolve8_horiz_ssse3(src - 3 * src_stride, src_stride, fdata2, 64,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h + 7);
- vp9_convolve8_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4, w, h);
- } else {
- vp9_convolve8_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4, w, h);
- }
-}
-
-void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71);
-
- assert(w <= 64);
- assert(h <= 64);
- if (x_step_q4 == 16 && y_step_q4 == 16) {
- vp9_convolve8_horiz_ssse3(src - 3 * src_stride, src_stride, fdata2, 64,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h + 7);
- vp9_convolve8_avg_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
- } else {
- vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4, w, h);
- }
-}
+// void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+// void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+// void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+// void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3);
+FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3);
+FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3);
+FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
+ ssse3);
+
+// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+// void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+FUN_CONV_2D(, ssse3);
+FUN_CONV_2D(avg_ , ssse3);
#endif
#if HAVE_SSE2
@@ -248,199 +153,41 @@ filter8_1dfunction vp9_filter_block1d8_h8_avg_sse2;
filter8_1dfunction vp9_filter_block1d4_v8_avg_sse2;
filter8_1dfunction vp9_filter_block1d4_h8_avg_sse2;
-void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- /* Ensure the filter can be compressed to int16_t. */
- if (x_step_q4 == 16 && filter_x[3] != 128) {
- while (w >= 16) {
- vp9_filter_block1d16_h8_sse2(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 16;
- dst += 16;
- w -= 16;
- }
- while (w >= 8) {
- vp9_filter_block1d8_h8_sse2(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 8;
- dst += 8;
- w -= 8;
- }
- while (w >= 4) {
- vp9_filter_block1d4_h8_sse2(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 4;
- dst += 4;
- w -= 4;
- }
- }
- if (w) {
- vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
- }
-}
-
-void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- if (y_step_q4 == 16 && filter_y[3] != 128) {
- while (w >= 16) {
- vp9_filter_block1d16_v8_sse2(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 16;
- dst += 16;
- w -= 16;
- }
- while (w >= 8) {
- vp9_filter_block1d8_v8_sse2(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 8;
- dst += 8;
- w -= 8;
- }
- while (w >= 4) {
- vp9_filter_block1d4_v8_sse2(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 4;
- dst += 4;
- w -= 4;
- }
- }
- if (w) {
- vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
- }
-}
-
-void vp9_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- if (x_step_q4 == 16 && filter_x[3] != 128) {
- while (w >= 16) {
- vp9_filter_block1d16_h8_avg_sse2(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 16;
- dst += 16;
- w -= 16;
- }
- while (w >= 8) {
- vp9_filter_block1d8_h8_avg_sse2(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 8;
- dst += 8;
- w -= 8;
- }
- while (w >= 4) {
- vp9_filter_block1d4_h8_avg_sse2(src, src_stride,
- dst, dst_stride,
- h, filter_x);
- src += 4;
- dst += 4;
- w -= 4;
- }
- }
- if (w) {
- vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
- }
-}
-
-void vp9_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- if (y_step_q4 == 16 && filter_y[3] != 128) {
- while (w >= 16) {
- vp9_filter_block1d16_v8_avg_sse2(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 16;
- dst += 16;
- w -= 16;
- }
- while (w >= 8) {
- vp9_filter_block1d8_v8_avg_sse2(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 8;
- dst += 8;
- w -= 8;
- }
- while (w >= 4) {
- vp9_filter_block1d4_v8_avg_sse2(src - src_stride * 3, src_stride,
- dst, dst_stride,
- h, filter_y);
- src += 4;
- dst += 4;
- w -= 4;
- }
- }
- if (w) {
- vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
- }
-}
-
-void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71);
-
- assert(w <= 64);
- assert(h <= 64);
- if (x_step_q4 == 16 && y_step_q4 == 16) {
- vp9_convolve8_horiz_sse2(src - 3 * src_stride, src_stride, fdata2, 64,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h + 7);
- vp9_convolve8_vert_sse2(fdata2 + 3 * 64, 64, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4, w, h);
- } else {
- vp9_convolve8_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4, w, h);
- }
-}
-
-void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71);
-
- assert(w <= 64);
- assert(h <= 64);
- if (x_step_q4 == 16 && y_step_q4 == 16) {
- vp9_convolve8_horiz_sse2(src - 3 * src_stride, src_stride, fdata2, 64,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h + 7);
- vp9_convolve8_avg_vert_sse2(fdata2 + 3 * 64, 64, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
- } else {
- vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4, w, h);
- }
-}
+// void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+// void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+// void vp9_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+// void vp9_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2);
+FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2);
+FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2);
+FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2);
+
+// void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+// void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+FUN_CONV_2D(, sse2);
+FUN_CONV_2D(avg_ , sse2);
#endif
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index 2f6149464..13a5b5a82 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -180,7 +180,7 @@ static INLINE void transpose_4x4(__m128i *res) {
res[1] = _mm_unpackhi_epi16(tr0_0, tr0_1);
}
-static void idct4_1d_sse2(__m128i *in) {
+static void idct4_sse2(__m128i *in) {
const __m128i k__cospi_p16_p16 = pair_set_epi16(cospi_16_64, cospi_16_64);
const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
@@ -216,7 +216,7 @@ static void idct4_1d_sse2(__m128i *in) {
in[1] = _mm_shuffle_epi32(in[1], 0x4E);
}
-static void iadst4_1d_sse2(__m128i *in) {
+static void iadst4_sse2(__m128i *in) {
const __m128i k__sinpi_p01_p04 = pair_set_epi16(sinpi_1_9, sinpi_4_9);
const __m128i k__sinpi_p03_p02 = pair_set_epi16(sinpi_3_9, sinpi_2_9);
const __m128i k__sinpi_p02_m01 = pair_set_epi16(sinpi_2_9, -sinpi_1_9);
@@ -276,20 +276,20 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride,
switch (tx_type) {
case 0: // DCT_DCT
- idct4_1d_sse2(in);
- idct4_1d_sse2(in);
+ idct4_sse2(in);
+ idct4_sse2(in);
break;
case 1: // ADST_DCT
- idct4_1d_sse2(in);
- iadst4_1d_sse2(in);
+ idct4_sse2(in);
+ iadst4_sse2(in);
break;
case 2: // DCT_ADST
- iadst4_1d_sse2(in);
- idct4_1d_sse2(in);
+ iadst4_sse2(in);
+ idct4_sse2(in);
break;
case 3: // ADST_ADST
- iadst4_1d_sse2(in);
- iadst4_1d_sse2(in);
+ iadst4_sse2(in);
+ iadst4_sse2(in);
break;
default:
assert(0);
@@ -455,7 +455,7 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride,
res1 = _mm_packs_epi32(tmp2, tmp3); \
}
-#define IDCT8_1D(in0, in1, in2, in3, in4, in5, in6, in7, \
+#define IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, \
out0, out1, out2, out3, out4, out5, out6, out7) \
{ \
/* Stage1 */ \
@@ -573,7 +573,7 @@ void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
in0, in1, in2, in3, in4, in5, in6, in7);
// 4-stage 1D idct8x8
- IDCT8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+ IDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
}
@@ -674,7 +674,7 @@ static INLINE void array_transpose_4X8(__m128i *in, __m128i * out) {
out[3] = _mm_unpackhi_epi64(tr1_2, tr1_6);
}
-static void idct8_1d_sse2(__m128i *in) {
+static void idct8_sse2(__m128i *in) {
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
@@ -695,11 +695,11 @@ static void idct8_1d_sse2(__m128i *in) {
in0, in1, in2, in3, in4, in5, in6, in7);
// 4-stage 1D idct8x8
- IDCT8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+ IDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7]);
}
-static void iadst8_1d_sse2(__m128i *in) {
+static void iadst8_sse2(__m128i *in) {
const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64);
@@ -946,20 +946,20 @@ void vp9_iht8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride,
switch (tx_type) {
case 0: // DCT_DCT
- idct8_1d_sse2(in);
- idct8_1d_sse2(in);
+ idct8_sse2(in);
+ idct8_sse2(in);
break;
case 1: // ADST_DCT
- idct8_1d_sse2(in);
- iadst8_1d_sse2(in);
+ idct8_sse2(in);
+ iadst8_sse2(in);
break;
case 2: // DCT_ADST
- iadst8_1d_sse2(in);
- idct8_1d_sse2(in);
+ iadst8_sse2(in);
+ idct8_sse2(in);
break;
case 3: // ADST_ADST
- iadst8_1d_sse2(in);
- iadst8_1d_sse2(in);
+ iadst8_sse2(in);
+ iadst8_sse2(in);
break;
default:
assert(0);
@@ -1104,7 +1104,7 @@ void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
TRANSPOSE_4X8_10(tmp0, tmp1, tmp2, tmp3, in0, in1, in2, in3)
- IDCT8_1D(in0, in1, in2, in3, zero, zero, zero, zero,
+ IDCT8(in0, in1, in2, in3, zero, zero, zero, zero,
in0, in1, in2, in3, in4, in5, in6, in7);
// Final rounding and shift
in0 = _mm_adds_epi16(in0, final_rounding);
@@ -1135,7 +1135,7 @@ void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
RECON_AND_STORE(dest, in7);
}
-#define IDCT16_1D \
+#define IDCT16 \
/* Stage2 */ \
{ \
const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], in[15]); \
@@ -1264,7 +1264,7 @@ void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
stp2_10, stp2_13, stp2_11, stp2_12) \
}
-#define IDCT16_10_1D \
+#define IDCT16_10 \
/* Stage2 */ \
{ \
const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], zero); \
@@ -1437,7 +1437,7 @@ void vp9_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest,
array_transpose_8x8(in, in);
array_transpose_8x8(in+8, in+8);
- IDCT16_1D
+ IDCT16
// Stage7
curr1[0] = _mm_add_epi16(stp2_0, stp1_15);
@@ -1465,7 +1465,7 @@ void vp9_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest,
array_transpose_8x8(l+i*8, in);
array_transpose_8x8(r+i*8, in+8);
- IDCT16_1D
+ IDCT16
// 2-D
in[0] = _mm_add_epi16(stp2_0, stp1_15);
@@ -1590,7 +1590,7 @@ static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) {
res0[15] = tbuf[7];
}
-static void iadst16_1d_8col(__m128i *in) {
+static void iadst16_8col(__m128i *in) {
// perform 16x16 1-D ADST for 8 columns
__m128i s[16], x[16], u[32], v[32];
const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64);
@@ -2060,7 +2060,7 @@ static void iadst16_1d_8col(__m128i *in) {
in[15] = _mm_sub_epi16(kZero, s[1]);
}
-static void idct16_1d_8col(__m128i *in) {
+static void idct16_8col(__m128i *in) {
const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64);
@@ -2404,16 +2404,16 @@ static void idct16_1d_8col(__m128i *in) {
in[15] = _mm_sub_epi16(s[0], s[15]);
}
-static void idct16_1d_sse2(__m128i *in0, __m128i *in1) {
+static void idct16_sse2(__m128i *in0, __m128i *in1) {
array_transpose_16x16(in0, in1);
- idct16_1d_8col(in0);
- idct16_1d_8col(in1);
+ idct16_8col(in0);
+ idct16_8col(in1);
}
-static void iadst16_1d_sse2(__m128i *in0, __m128i *in1) {
+static void iadst16_sse2(__m128i *in0, __m128i *in1) {
array_transpose_16x16(in0, in1);
- iadst16_1d_8col(in0);
- iadst16_1d_8col(in1);
+ iadst16_8col(in0);
+ iadst16_8col(in1);
}
static INLINE void load_buffer_8x16(const int16_t *input, __m128i *in) {
@@ -2502,20 +2502,20 @@ void vp9_iht16x16_256_add_sse2(const int16_t *input, uint8_t *dest, int stride,
switch (tx_type) {
case 0: // DCT_DCT
- idct16_1d_sse2(in0, in1);
- idct16_1d_sse2(in0, in1);
+ idct16_sse2(in0, in1);
+ idct16_sse2(in0, in1);
break;
case 1: // ADST_DCT
- idct16_1d_sse2(in0, in1);
- iadst16_1d_sse2(in0, in1);
+ idct16_sse2(in0, in1);
+ iadst16_sse2(in0, in1);
break;
case 2: // DCT_ADST
- iadst16_1d_sse2(in0, in1);
- idct16_1d_sse2(in0, in1);
+ iadst16_sse2(in0, in1);
+ idct16_sse2(in0, in1);
break;
case 3: // ADST_ADST
- iadst16_1d_sse2(in0, in1);
- iadst16_1d_sse2(in0, in1);
+ iadst16_sse2(in0, in1);
+ iadst16_sse2(in0, in1);
break;
default:
assert(0);
@@ -2732,7 +2732,7 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
for (i = 0; i < 2; i++) {
array_transpose_4X8(l + 8*i, in);
- IDCT16_10_1D
+ IDCT16_10
// Stage7
in[0] = _mm_add_epi16(stp2_0, stp1_15);
@@ -2814,7 +2814,7 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
input += 8; \
} \
-#define IDCT32_1D_34 \
+#define IDCT32_34 \
/* Stage1 */ \
{ \
const __m128i zero = _mm_setzero_si128();\
@@ -3115,7 +3115,7 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
}
-#define IDCT32_1D \
+#define IDCT32 \
/* Stage1 */ \
{ \
const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], in[31]); \
@@ -3554,7 +3554,7 @@ void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
array_transpose_8x8(in+16, in+16);
array_transpose_8x8(in+24, in+24);
- IDCT32_1D
+ IDCT32
// 1_D: Store 32 intermediate results for each 8x32 block.
col[0] = _mm_add_epi16(stp1_0, stp1_31);
@@ -3593,7 +3593,7 @@ void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
const __m128i zero = _mm_setzero_si128();
// Transpose 32x8 block to 8x32 block
array_transpose_8x8(col+i*8, in);
- IDCT32_1D_34
+ IDCT32_34
// 2_D: Calculate the results and store them to destination.
in[0] = _mm_add_epi16(stp1_0, stp1_31);
@@ -3922,7 +3922,7 @@ void vp9_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest,
array_transpose_8x8(in+16, in+16);
array_transpose_8x8(in+24, in+24);
- IDCT32_1D
+ IDCT32
// 1_D: Store 32 intermediate results for each 8x32 block.
col[i32 + 0] = _mm_add_epi16(stp1_0, stp1_31);
@@ -3969,7 +3969,7 @@ void vp9_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest,
array_transpose_8x8(col+j+64, in+16);
array_transpose_8x8(col+j+96, in+24);
- IDCT32_1D
+ IDCT32
// 2_D: Calculate the results and store them to destination.
in[0] = _mm_add_epi16(stp1_0, stp1_31);
diff --git a/vp9/common/x86/vp9_postproc_x86.h b/vp9/common/x86/vp9_postproc_x86.h
index 8870215a2..cab9d34f2 100644
--- a/vp9/common/x86/vp9_postproc_x86.h
+++ b/vp9/common/x86/vp9_postproc_x86.h
@@ -12,6 +12,10 @@
#ifndef VP9_COMMON_X86_VP9_POSTPROC_X86_H_
#define VP9_COMMON_X86_VP9_POSTPROC_X86_H_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/* Note:
*
* This platform is commonly built for runtime CPU detection. If you modify
@@ -61,4 +65,8 @@ extern prototype_postproc_addnoise(vp9_plane_add_noise_wmt);
#endif
#endif
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_COMMON_X86_VP9_POSTPROC_X86_H_