diff options
Diffstat (limited to 'vp9/common')
46 files changed, 897 insertions, 684 deletions
diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.asm b/vp9/common/arm/neon/vp9_reconintra_neon.asm index 71bf24c9f..279f678b1 100644 --- a/vp9/common/arm/neon/vp9_reconintra_neon.asm +++ b/vp9/common/arm/neon/vp9_reconintra_neon.asm @@ -18,6 +18,8 @@ EXPORT |vp9_h_predictor_32x32_neon| EXPORT |vp9_tm_predictor_4x4_neon| EXPORT |vp9_tm_predictor_8x8_neon| + EXPORT |vp9_tm_predictor_16x16_neon| + EXPORT |vp9_tm_predictor_32x32_neon| ARM REQUIRE8 PRESERVE8 @@ -346,61 +348,289 @@ loop_h ldrb r12, [r12] vdup.u8 d0, r12 + ; preload 8 left + vld1.8 d30, [r3] + ; Load above 8 pixels vld1.64 {d2}, [r2] + vmovl.u8 q10, d30 + ; Compute above - ytop_left vsubl.u8 q3, d2, d0 ; Load left row by row and compute left + (above - ytop_left) ; 1st row and 2nd row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqshrun.s16 d0, q1, #0 - vqshrun.s16 d1, q2, #0 - vst1.64 {d0}, [r0], r1 - vst1.64 {d1}, [r0], r1 + vdup.16 q0, d20[0] + vdup.16 q1, d20[1] + vadd.s16 q0, q3, q0 + vadd.s16 q1, q3, q1 ; 3rd row and 4th row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqshrun.s16 d0, q1, #0 - vqshrun.s16 d1, q2, #0 + vdup.16 q8, d20[2] + vdup.16 q9, d20[3] + vadd.s16 q8, q3, q8 + vadd.s16 q9, q3, q9 + + vqshrun.s16 d0, q0, #0 + vqshrun.s16 d1, q1, #0 + vqshrun.s16 d2, q8, #0 + vqshrun.s16 d3, q9, #0 + vst1.64 {d0}, [r0], r1 vst1.64 {d1}, [r0], r1 + vst1.64 {d2}, [r0], r1 + vst1.64 {d3}, [r0], r1 ; 5th row and 6th row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqshrun.s16 d0, q1, #0 - vqshrun.s16 d1, q2, #0 - vst1.64 {d0}, [r0], r1 - vst1.64 {d1}, [r0], r1 + vdup.16 q0, d21[0] + vdup.16 q1, d21[1] + vadd.s16 q0, q3, q0 + vadd.s16 q1, q3, q1 + + ; 7th row and 8th row + vdup.16 q8, d21[2] + vdup.16 q9, d21[3] + vadd.s16 q8, q3, q8 + vadd.s16 q9, q3, q9 + + vqshrun.s16 d0, q0, #0 + vqshrun.s16 d1, q1, #0 + vqshrun.s16 d2, q8, #0 + vqshrun.s16 d3, q9, #0 - ; 7rd row and 8th row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqshrun.s16 d0, q1, #0 - vqshrun.s16 d1, q2, #0 vst1.64 {d0}, [r0], r1 vst1.64 {d1}, [r0], r1 + vst1.64 {d2}, [r0], r1 + vst1.64 {d3}, [r0], r1 + bx lr ENDP ; |vp9_tm_predictor_8x8_neon| +;void vp9_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride, +; const uint8_t *above, +; const uint8_t *left) +; r0 uint8_t *dst +; r1 ptrdiff_t y_stride +; r2 const uint8_t *above +; r3 const uint8_t *left + +|vp9_tm_predictor_16x16_neon| PROC + ; Load ytop_left = above[-1]; + sub r12, r2, #1 + ldrb r12, [r12] + vdup.u8 q0, r12 + + ; Load above 8 pixels + vld1.8 q1, [r2] + + ; preload 8 left into r12 + vld1.8 d18, [r3]! + + ; Compute above - ytop_left + vsubl.u8 q2, d2, d0 + vsubl.u8 q3, d3, d1 + + vmovl.u8 q10, d18 + + ; Load left row by row and compute left + (above - ytop_left) + ; Process 8 rows in each single loop and loop 2 times to process 16 rows. + mov r2, #2 + +loop_16x16_neon + ; Process two rows. + vdup.16 q0, d20[0] + vdup.16 q8, d20[1] + vadd.s16 q1, q0, q2 + vadd.s16 q0, q0, q3 + vadd.s16 q11, q8, q2 + vadd.s16 q8, q8, q3 + vqshrun.s16 d2, q1, #0 + vqshrun.s16 d3, q0, #0 + vqshrun.s16 d22, q11, #0 + vqshrun.s16 d23, q8, #0 + vdup.16 q0, d20[2] ; proload next 2 rows data + vdup.16 q8, d20[3] + vst1.64 {d2,d3}, [r0], r1 + vst1.64 {d22,d23}, [r0], r1 + + ; Process two rows. + vadd.s16 q1, q0, q2 + vadd.s16 q0, q0, q3 + vadd.s16 q11, q8, q2 + vadd.s16 q8, q8, q3 + vqshrun.s16 d2, q1, #0 + vqshrun.s16 d3, q0, #0 + vqshrun.s16 d22, q11, #0 + vqshrun.s16 d23, q8, #0 + vdup.16 q0, d21[0] ; proload next 2 rows data + vdup.16 q8, d21[1] + vst1.64 {d2,d3}, [r0], r1 + vst1.64 {d22,d23}, [r0], r1 + + vadd.s16 q1, q0, q2 + vadd.s16 q0, q0, q3 + vadd.s16 q11, q8, q2 + vadd.s16 q8, q8, q3 + vqshrun.s16 d2, q1, #0 + vqshrun.s16 d3, q0, #0 + vqshrun.s16 d22, q11, #0 + vqshrun.s16 d23, q8, #0 + vdup.16 q0, d21[2] ; proload next 2 rows data + vdup.16 q8, d21[3] + vst1.64 {d2,d3}, [r0], r1 + vst1.64 {d22,d23}, [r0], r1 + + + vadd.s16 q1, q0, q2 + vadd.s16 q0, q0, q3 + vadd.s16 q11, q8, q2 + vadd.s16 q8, q8, q3 + vqshrun.s16 d2, q1, #0 + vqshrun.s16 d3, q0, #0 + vqshrun.s16 d22, q11, #0 + vqshrun.s16 d23, q8, #0 + vdup.16 q0, d20[2] + vdup.16 q8, d20[3] + vld1.8 d18, [r3]! ; preload 8 left into r12 + vmovl.u8 q10, d18 + vst1.64 {d2,d3}, [r0], r1 + vst1.64 {d22,d23}, [r0], r1 + + subs r2, r2, #1 + bgt loop_16x16_neon + + bx lr + ENDP ; |vp9_tm_predictor_16x16_neon| + +;void vp9_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride, +; const uint8_t *above, +; const uint8_t *left) +; r0 uint8_t *dst +; r1 ptrdiff_t y_stride +; r2 const uint8_t *above +; r3 const uint8_t *left + +|vp9_tm_predictor_32x32_neon| PROC + ; Load ytop_left = above[-1]; + sub r12, r2, #1 + ldrb r12, [r12] + vdup.u8 q0, r12 + + ; Load above 32 pixels + vld1.8 q1, [r2]! + vld1.8 q2, [r2] + + ; preload 8 left pixels + vld1.8 d26, [r3]! + + ; Compute above - ytop_left + vsubl.u8 q8, d2, d0 + vsubl.u8 q9, d3, d1 + vsubl.u8 q10, d4, d0 + vsubl.u8 q11, d5, d1 + + vmovl.u8 q3, d26 + + ; Load left row by row and compute left + (above - ytop_left) + ; Process 8 rows in each single loop and loop 4 times to process 32 rows. + mov r2, #4 + +loop_32x32_neon + ; Process two rows. + vdup.16 q0, d6[0] + vdup.16 q2, d6[1] + vadd.s16 q12, q0, q8 + vadd.s16 q13, q0, q9 + vadd.s16 q14, q0, q10 + vadd.s16 q15, q0, q11 + vqshrun.s16 d0, q12, #0 + vqshrun.s16 d1, q13, #0 + vadd.s16 q12, q2, q8 + vadd.s16 q13, q2, q9 + vqshrun.s16 d2, q14, #0 + vqshrun.s16 d3, q15, #0 + vadd.s16 q14, q2, q10 + vadd.s16 q15, q2, q11 + vst1.64 {d0-d3}, [r0], r1 + vqshrun.s16 d24, q12, #0 + vqshrun.s16 d25, q13, #0 + vqshrun.s16 d26, q14, #0 + vqshrun.s16 d27, q15, #0 + vdup.16 q1, d6[2] + vdup.16 q2, d6[3] + vst1.64 {d24-d27}, [r0], r1 + + ; Process two rows. + vadd.s16 q12, q1, q8 + vadd.s16 q13, q1, q9 + vadd.s16 q14, q1, q10 + vadd.s16 q15, q1, q11 + vqshrun.s16 d0, q12, #0 + vqshrun.s16 d1, q13, #0 + vadd.s16 q12, q2, q8 + vadd.s16 q13, q2, q9 + vqshrun.s16 d2, q14, #0 + vqshrun.s16 d3, q15, #0 + vadd.s16 q14, q2, q10 + vadd.s16 q15, q2, q11 + vst1.64 {d0-d3}, [r0], r1 + vqshrun.s16 d24, q12, #0 + vqshrun.s16 d25, q13, #0 + vqshrun.s16 d26, q14, #0 + vqshrun.s16 d27, q15, #0 + vdup.16 q0, d7[0] + vdup.16 q2, d7[1] + vst1.64 {d24-d27}, [r0], r1 + + ; Process two rows. + vadd.s16 q12, q0, q8 + vadd.s16 q13, q0, q9 + vadd.s16 q14, q0, q10 + vadd.s16 q15, q0, q11 + vqshrun.s16 d0, q12, #0 + vqshrun.s16 d1, q13, #0 + vadd.s16 q12, q2, q8 + vadd.s16 q13, q2, q9 + vqshrun.s16 d2, q14, #0 + vqshrun.s16 d3, q15, #0 + vadd.s16 q14, q2, q10 + vadd.s16 q15, q2, q11 + vst1.64 {d0-d3}, [r0], r1 + vqshrun.s16 d24, q12, #0 + vqshrun.s16 d25, q13, #0 + vqshrun.s16 d26, q14, #0 + vqshrun.s16 d27, q15, #0 + vdup.16 q0, d7[2] + vdup.16 q2, d7[3] + vst1.64 {d24-d27}, [r0], r1 + + ; Process two rows. + vadd.s16 q12, q0, q8 + vadd.s16 q13, q0, q9 + vadd.s16 q14, q0, q10 + vadd.s16 q15, q0, q11 + vqshrun.s16 d0, q12, #0 + vqshrun.s16 d1, q13, #0 + vadd.s16 q12, q2, q8 + vadd.s16 q13, q2, q9 + vqshrun.s16 d2, q14, #0 + vqshrun.s16 d3, q15, #0 + vadd.s16 q14, q2, q10 + vadd.s16 q15, q2, q11 + vst1.64 {d0-d3}, [r0], r1 + vqshrun.s16 d24, q12, #0 + vqshrun.s16 d25, q13, #0 + vld1.8 d0, [r3]! ; preload 8 left pixels + vqshrun.s16 d26, q14, #0 + vqshrun.s16 d27, q15, #0 + vmovl.u8 q3, d0 + vst1.64 {d24-d27}, [r0], r1 + + subs r2, r2, #1 + bgt loop_32x32_neon + + bx lr + ENDP ; |vp9_tm_predictor_32x32_neon| + END diff --git a/vp9/common/mips/dspr2/vp9_common_dspr2.h b/vp9/common/mips/dspr2/vp9_common_dspr2.h index e9c698119..991d3c2b3 100644 --- a/vp9/common/mips/dspr2/vp9_common_dspr2.h +++ b/vp9/common/mips/dspr2/vp9_common_dspr2.h @@ -17,6 +17,10 @@ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_common.h" +#ifdef __cplusplus +extern "C" { +#endif + #if HAVE_DSPR2 #define CROP_WIDTH 512 extern uint8_t *vp9_ff_cropTbl; @@ -114,4 +118,8 @@ void vp9_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, int w, int h); #endif // #if HAVE_DSPR2 +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_MIPS_DSPR2_VP9_COMMON_DSPR2_H_ diff --git a/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h b/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h index 98bfcfaf2..008cf8cac 100644 --- a/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h +++ b/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h @@ -17,6 +17,10 @@ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_onyxc_int.h" +#ifdef __cplusplus +extern "C" { +#endif + #if HAVE_DSPR2 /* inputs & outputs are quad-byte vectors */ static INLINE void vp9_filter_dspr2(uint32_t mask, uint32_t hev, @@ -752,4 +756,8 @@ static INLINE void vp9_wide_mbfilter_dspr2(uint32_t *op7, uint32_t *op6, *oq6 = res_oq6; } #endif // #if HAVE_DSPR2 +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_FILTERS_DSPR2_H_ diff --git a/vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h b/vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h index 4cb2ebb46..ca01a6a10 100644 --- a/vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h +++ b/vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h @@ -17,6 +17,10 @@ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_onyxc_int.h" +#ifdef __cplusplus +extern "C" { +#endif + #if HAVE_DSPR2 #define STORE_F0() { \ __asm__ __volatile__ ( \ @@ -467,4 +471,8 @@ } #endif // #if HAVE_DSPR2 +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MACROS_DSPR2_H_ diff --git a/vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h b/vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h index b9e0aca90..5b0d9cc9b 100644 --- a/vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h +++ b/vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h @@ -17,6 +17,10 @@ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_onyxc_int.h" +#ifdef __cplusplus +extern "C" { +#endif + #if HAVE_DSPR2 /* processing 4 pixels at the same time * compute hev and mask in the same function */ @@ -362,4 +366,8 @@ static INLINE void vp9_flatmask5(uint32_t p4, uint32_t p3, *flat2 = flat1; } #endif // #if HAVE_DSPR2 +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_ diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c index ca42090c1..e033fbb99 100644 --- a/vp9/common/vp9_alloccommon.c +++ b/vp9/common/vp9_alloccommon.c @@ -33,8 +33,8 @@ void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi) { void vp9_free_frame_buffers(VP9_COMMON *cm) { int i; - for (i = 0; i < cm->fb_count; i++) - vp9_free_frame_buffer(&cm->yv12_fb[i]); + for (i = 0; i < FRAME_BUFFERS; i++) + vp9_free_frame_buffer(&cm->frame_bufs[i].buf); vp9_free_frame_buffer(&cm->post_proc_buffer); @@ -85,7 +85,7 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) { int mi_size; if (vp9_realloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y, - VP9_DEC_BORDER_IN_PIXELS, NULL, NULL, NULL) < 0) + VP9_DEC_BORDER_IN_PIXELS) < 0) goto fail; set_mb_mi(cm, aligned_width, aligned_height); @@ -137,33 +137,21 @@ int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) { const int ss_y = cm->subsampling_y; int mi_size; - if (cm->fb_count == 0) { - cm->fb_count = FRAME_BUFFERS; - CHECK_MEM_ERROR(cm, cm->yv12_fb, - vpx_calloc(cm->fb_count, sizeof(*cm->yv12_fb))); - CHECK_MEM_ERROR(cm, cm->fb_idx_ref_cnt, - vpx_calloc(cm->fb_count, sizeof(*cm->fb_idx_ref_cnt))); - if (cm->fb_lru) { - CHECK_MEM_ERROR(cm, cm->fb_idx_ref_lru, - vpx_calloc(cm->fb_count, sizeof(*cm->fb_idx_ref_lru))); - } - } - vp9_free_frame_buffers(cm); - for (i = 0; i < cm->fb_count; i++) { - cm->fb_idx_ref_cnt[i] = 0; - if (vp9_alloc_frame_buffer(&cm->yv12_fb[i], width, height, ss_x, ss_y, - VP9_ENC_BORDER_IN_PIXELS) < 0) + for (i = 0; i < FRAME_BUFFERS; i++) { + cm->frame_bufs[i].ref_count = 0; + if (vp9_alloc_frame_buffer(&cm->frame_bufs[i].buf, width, height, + ss_x, ss_y, VP9_ENC_BORDER_IN_PIXELS) < 0) goto fail; } - cm->new_fb_idx = cm->fb_count - 1; - cm->fb_idx_ref_cnt[cm->new_fb_idx] = 1; + cm->new_fb_idx = FRAME_BUFFERS - 1; + cm->frame_bufs[cm->new_fb_idx].ref_count = 1; for (i = 0; i < REF_FRAMES; i++) { cm->ref_frame_map[i] = i; - cm->fb_idx_ref_cnt[i] = 1; + cm->frame_bufs[i].ref_count = 1; } if (vp9_alloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y, @@ -211,14 +199,6 @@ void vp9_create_common(VP9_COMMON *cm) { void vp9_remove_common(VP9_COMMON *cm) { vp9_free_frame_buffers(cm); - - vpx_free(cm->yv12_fb); - vpx_free(cm->fb_idx_ref_cnt); - vpx_free(cm->fb_idx_ref_lru); - - cm->yv12_fb = NULL; - cm->fb_idx_ref_cnt = NULL; - cm->fb_idx_ref_lru = NULL; } void vp9_initialize_common() { diff --git a/vp9/common/vp9_alloccommon.h b/vp9/common/vp9_alloccommon.h index cf8dca573..e3b5b95d8 100644 --- a/vp9/common/vp9_alloccommon.h +++ b/vp9/common/vp9_alloccommon.h @@ -14,6 +14,10 @@ #include "vp9/common/vp9_onyxc_int.h" +#ifdef __cplusplus +extern "C" { +#endif + void vp9_initialize_common(); void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi); @@ -28,4 +32,8 @@ void vp9_free_frame_buffers(VP9_COMMON *cm); void vp9_update_frame_size(VP9_COMMON *cm); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_ALLOCCOMMON_H_ diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index ad78b0dc4..49e336aa4 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -25,8 +25,12 @@ #include "vp9/common/vp9_scale.h" #include "vp9/common/vp9_seg_common.h" +#ifdef __cplusplus +extern "C" { +#endif + #define BLOCK_SIZE_GROUPS 4 -#define MBSKIP_CONTEXTS 3 +#define SKIP_CONTEXTS 3 #define INTER_MODE_CONTEXTS 7 /* Segment Feature Masks */ @@ -131,7 +135,7 @@ typedef struct { // Flags used for prediction status of various bit-stream signals unsigned char seg_id_predicted; - INTERPOLATION_TYPE interp_filter; + INTERP_FILTER interp_filter; BLOCK_SIZE sb_type; } MB_MODE_INFO; @@ -248,7 +252,7 @@ typedef struct macroblockd { /* Inverse transform function pointers. */ void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob); - struct subpix_fn_table subpix; + const interp_kernel *interp_kernel; int corrupted; @@ -463,4 +467,8 @@ static int get_tx_eob(const struct segmentation *seg, int segment_id, return vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max; } +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_BLOCKD_H_ diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h index 36d1cdf14..69964dae8 100644 --- a/vp9/common/vp9_common.h +++ b/vp9/common/vp9_common.h @@ -19,6 +19,10 @@ #include "vpx_mem/vpx_mem.h" #include "vpx/vpx_integer.h" +#ifdef __cplusplus +extern "C" { +#endif + #define MIN(x, y) (((x) < (y)) ? (x) : (y)) #define MAX(x, y) (((x) > (y)) ? (x) : (y)) @@ -91,4 +95,8 @@ static int get_unsigned_bits(unsigned int num_values) { #define VP9_FRAME_MARKER 0x2 +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_COMMON_H_ diff --git a/vp9/common/vp9_common_data.h b/vp9/common/vp9_common_data.h index 5222d29c1..f41962747 100644 --- a/vp9/common/vp9_common_data.h +++ b/vp9/common/vp9_common_data.h @@ -13,6 +13,10 @@ #include "vp9/common/vp9_enums.h" +#ifdef __cplusplus +extern "C" { +#endif + extern const int b_width_log2_lookup[BLOCK_SIZES]; extern const int b_height_log2_lookup[BLOCK_SIZES]; extern const int mi_width_log2_lookup[BLOCK_SIZES]; @@ -28,4 +32,8 @@ extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES]; extern const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES]; extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2]; +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_COMMON_DATA_H_ diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c index 6edf7eaca..b105a57bc 100644 --- a/vp9/common/vp9_convolve.c +++ b/vp9/common/vp9_convolve.c @@ -20,7 +20,7 @@ static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, - const subpel_kernel *x_filters, + const interp_kernel *x_filters, int x0_q4, int x_step_q4, int w, int h) { int x, y; src -= SUBPEL_TAPS / 2 - 1; @@ -42,7 +42,7 @@ static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, - const subpel_kernel *x_filters, + const interp_kernel *x_filters, int x0_q4, int x_step_q4, int w, int h) { int x, y; src -= SUBPEL_TAPS / 2 - 1; @@ -65,7 +65,7 @@ static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride, static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, - const subpel_kernel *y_filters, + const interp_kernel *y_filters, int y0_q4, int y_step_q4, int w, int h) { int x, y; src -= src_stride * (SUBPEL_TAPS / 2 - 1); @@ -88,7 +88,7 @@ static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride, static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, - const subpel_kernel *y_filters, + const interp_kernel *y_filters, int y0_q4, int y_step_q4, int w, int h) { int x, y; src -= src_stride * (SUBPEL_TAPS / 2 - 1); @@ -112,9 +112,9 @@ static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride, static void convolve(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, - const subpel_kernel *const x_filters, + const interp_kernel *const x_filters, int x0_q4, int x_step_q4, - const subpel_kernel *const y_filters, + const interp_kernel *const y_filters, int y0_q4, int y_step_q4, int w, int h) { // Fixed size intermediate buffer places limits on parameters. @@ -138,14 +138,14 @@ static void convolve(const uint8_t *src, ptrdiff_t src_stride, y_filters, y0_q4, y_step_q4, w, h); } -static const subpel_kernel *get_filter_base(const int16_t *filter) { +static const interp_kernel *get_filter_base(const int16_t *filter) { // NOTE: This assumes that the filter table is 256-byte aligned. // TODO(agrange) Modify to make independent of table alignment. - return (const subpel_kernel *)(((intptr_t)filter) & ~((intptr_t)0xFF)); + return (const interp_kernel *)(((intptr_t)filter) & ~((intptr_t)0xFF)); } -static int get_filter_offset(const int16_t *f, const subpel_kernel *base) { - return (const subpel_kernel *)(intptr_t)f - base; +static int get_filter_offset(const int16_t *f, const interp_kernel *base) { + return (const interp_kernel *)(intptr_t)f - base; } void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, @@ -153,7 +153,7 @@ void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h) { - const subpel_kernel *const filters_x = get_filter_base(filter_x); + const interp_kernel *const filters_x = get_filter_base(filter_x); const int x0_q4 = get_filter_offset(filter_x, filters_x); convolve_horiz(src, src_stride, dst, dst_stride, filters_x, @@ -165,7 +165,7 @@ void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h) { - const subpel_kernel *const filters_x = get_filter_base(filter_x); + const interp_kernel *const filters_x = get_filter_base(filter_x); const int x0_q4 = get_filter_offset(filter_x, filters_x); convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, @@ -177,7 +177,7 @@ void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h) { - const subpel_kernel *const filters_y = get_filter_base(filter_y); + const interp_kernel *const filters_y = get_filter_base(filter_y); const int y0_q4 = get_filter_offset(filter_y, filters_y); convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4, y_step_q4, w, h); @@ -188,7 +188,7 @@ void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h) { - const subpel_kernel *const filters_y = get_filter_base(filter_y); + const interp_kernel *const filters_y = get_filter_base(filter_y); const int y0_q4 = get_filter_offset(filter_y, filters_y); convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4, y_step_q4, w, h); @@ -199,10 +199,10 @@ void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h) { - const subpel_kernel *const filters_x = get_filter_base(filter_x); + const interp_kernel *const filters_x = get_filter_base(filter_x); const int x0_q4 = get_filter_offset(filter_x, filters_x); - const subpel_kernel *const filters_y = get_filter_base(filter_y); + const interp_kernel *const filters_y = get_filter_base(filter_y); const int y0_q4 = get_filter_offset(filter_y, filters_y); convolve(src, src_stride, dst, dst_stride, diff --git a/vp9/common/vp9_convolve.h b/vp9/common/vp9_convolve.h index 29d499063..6bf71fc79 100644 --- a/vp9/common/vp9_convolve.h +++ b/vp9/common/vp9_convolve.h @@ -13,10 +13,18 @@ #include "./vpx_config.h" #include "vpx/vpx_integer.h" +#ifdef __cplusplus +extern "C" { +#endif + typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_CONVOLVE_H_ diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index ba162fd20..e030d92ec 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -18,6 +18,10 @@ #include "vp9/common/vp9_scan.h" #include "vp9/common/vp9_entropymode.h" +#ifdef __cplusplus +extern "C" { +#endif + #define DIFF_UPDATE_PROB 252 // Coefficient token alphabet @@ -184,4 +188,8 @@ static const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size, } } +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_ENTROPY_H_ diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c index 83281b2ea..6def3c869 100644 --- a/vp9/common/vp9_entropymode.c +++ b/vp9/common/vp9_entropymode.c @@ -303,7 +303,7 @@ void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p, ct_8x8p[0][1] = tx_count_8x8p[TX_8X8]; } -static const vp9_prob default_mbskip_probs[MBSKIP_CONTEXTS] = { +static const vp9_prob default_skip_probs[SKIP_CONTEXTS] = { 192, 128, 64 }; @@ -325,7 +325,7 @@ void vp9_init_mbmode_probs(VP9_COMMON *cm) { vp9_copy(cm->fc.comp_ref_prob, default_comp_ref_p); vp9_copy(cm->fc.single_ref_prob, default_single_ref_p); cm->fc.tx_probs = default_tx_probs; - vp9_copy(cm->fc.mbskip_probs, default_mbskip_probs); + vp9_copy(cm->fc.skip_probs, default_skip_probs); vp9_copy(cm->fc.inter_mode_probs, default_inter_mode_probs); } @@ -385,7 +385,7 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) { adapt_probs(vp9_partition_tree, pre_fc->partition_prob[i], counts->partition[i], fc->partition_prob[i]); - if (cm->mcomp_filter_type == SWITCHABLE) { + if (cm->interp_filter == SWITCHABLE) { for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) adapt_probs(vp9_switchable_interp_tree, pre_fc->switchable_interp_prob[i], counts->switchable_interp[i], fc->switchable_interp_prob[i]); @@ -415,9 +415,8 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) { } } - for (i = 0; i < MBSKIP_CONTEXTS; ++i) - fc->mbskip_probs[i] = adapt_prob(pre_fc->mbskip_probs[i], - counts->mbskip[i]); + for (i = 0; i < SKIP_CONTEXTS; ++i) + fc->skip_probs[i] = adapt_prob(pre_fc->skip_probs[i], counts->skip[i]); } static void set_default_lf_deltas(struct loopfilter *lf) { diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h index 5312553c7..deec3f652 100644 --- a/vp9/common/vp9_entropymode.h +++ b/vp9/common/vp9_entropymode.h @@ -13,12 +13,14 @@ #include "vp9/common/vp9_blockd.h" +#ifdef __cplusplus +extern "C" { +#endif + #define TX_SIZE_CONTEXTS 2 #define SWITCHABLE_FILTERS 3 // number of switchable filters #define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1) -// #define MODE_STATS - struct VP9Common; struct tx_probs { @@ -57,4 +59,8 @@ void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p, void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p, unsigned int (*ct_8x8p)[2]); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_ENTROPYMODE_H_ diff --git a/vp9/common/vp9_entropymv.h b/vp9/common/vp9_entropymv.h index 48cb82db1..7e1f1479b 100644 --- a/vp9/common/vp9_entropymv.h +++ b/vp9/common/vp9_entropymv.h @@ -15,6 +15,10 @@ #include "./vpx_config.h" #include "vp9/common/vp9_blockd.h" +#ifdef __cplusplus +extern "C" { +#endif + struct VP9Common; void vp9_init_mv_probs(struct VP9Common *cm); @@ -121,4 +125,8 @@ typedef struct { void vp9_inc_mv(const MV *mv, nmv_context_counts *mvctx); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_ENTROPYMV_H_ diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h index 34411a34f..e96e76947 100644 --- a/vp9/common/vp9_enums.h +++ b/vp9/common/vp9_enums.h @@ -13,6 +13,10 @@ #include "./vpx_config.h" +#ifdef __cplusplus +extern "C" { +#endif + #define MI_SIZE_LOG2 3 #define MI_BLOCK_SIZE_LOG2 (6 - MI_SIZE_LOG2) // 64 = 2^6 @@ -90,4 +94,8 @@ typedef enum { SRGB = 7 // RGB } COLOR_SPACE; +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_ENUMS_H_ diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c index 79ace147c..dbde6d551 100644 --- a/vp9/common/vp9_filter.c +++ b/vp9/common/vp9_filter.c @@ -14,7 +14,7 @@ #include "vp9/common/vp9_filter.h" -DECLARE_ALIGNED(256, const subpel_kernel, +DECLARE_ALIGNED(256, const interp_kernel, vp9_bilinear_filters[SUBPEL_SHIFTS]) = { { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 0, 120, 8, 0, 0, 0 }, @@ -35,7 +35,7 @@ DECLARE_ALIGNED(256, const subpel_kernel, }; // Lagrangian interpolation filter -DECLARE_ALIGNED(256, const subpel_kernel, +DECLARE_ALIGNED(256, const interp_kernel, vp9_sub_pel_filters_8[SUBPEL_SHIFTS]) = { { 0, 0, 0, 128, 0, 0, 0, 0}, { 0, 1, -5, 126, 8, -3, 1, 0}, @@ -56,7 +56,7 @@ DECLARE_ALIGNED(256, const subpel_kernel, }; // DCT based filter -DECLARE_ALIGNED(256, const subpel_kernel, +DECLARE_ALIGNED(256, const interp_kernel, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS]) = { {0, 0, 0, 128, 0, 0, 0, 0}, {-1, 3, -7, 127, 8, -3, 1, 0}, @@ -77,7 +77,7 @@ DECLARE_ALIGNED(256, const subpel_kernel, }; // freqmultiplier = 0.5 -DECLARE_ALIGNED(256, const subpel_kernel, +DECLARE_ALIGNED(256, const interp_kernel, vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS]) = { { 0, 0, 0, 128, 0, 0, 0, 0}, {-3, -1, 32, 64, 38, 1, -3, 0}, @@ -98,14 +98,15 @@ DECLARE_ALIGNED(256, const subpel_kernel, }; -static const subpel_kernel* vp9_filter_kernels[4] = { +static const interp_kernel* vp9_filter_kernels[4] = { vp9_sub_pel_filters_8, vp9_sub_pel_filters_8lp, vp9_sub_pel_filters_8s, vp9_bilinear_filters }; -const subpel_kernel *vp9_get_filter_kernel(INTERPOLATION_TYPE type) { - return vp9_filter_kernels[type]; +const interp_kernel *vp9_get_interp_kernel(INTERP_FILTER filter) { + assert(filter != SWITCHABLE); + return vp9_filter_kernels[filter]; } diff --git a/vp9/common/vp9_filter.h b/vp9/common/vp9_filter.h index b1e7e6499..b611e304c 100644 --- a/vp9/common/vp9_filter.h +++ b/vp9/common/vp9_filter.h @@ -14,6 +14,10 @@ #include "./vpx_config.h" #include "vpx/vpx_integer.h" +#ifdef __cplusplus +extern "C" { +#endif + #define FILTER_BITS 7 #define SUBPEL_BITS 4 @@ -27,25 +31,24 @@ typedef enum { EIGHTTAP_SHARP = 2, BILINEAR = 3, SWITCHABLE = 4 /* should be the last one */ -} INTERPOLATION_TYPE; - -typedef int16_t subpel_kernel[SUBPEL_TAPS]; +} INTERP_FILTER; -struct subpix_fn_table { - const subpel_kernel *filter_x; - const subpel_kernel *filter_y; -}; +typedef int16_t interp_kernel[SUBPEL_TAPS]; -const subpel_kernel *vp9_get_filter_kernel(INTERPOLATION_TYPE type); +const interp_kernel *vp9_get_interp_kernel(INTERP_FILTER filter); -extern const subpel_kernel vp9_bilinear_filters[SUBPEL_SHIFTS]; -extern const subpel_kernel vp9_sub_pel_filters_8[SUBPEL_SHIFTS]; -extern const subpel_kernel vp9_sub_pel_filters_8s[SUBPEL_SHIFTS]; -extern const subpel_kernel vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS]; +extern const interp_kernel vp9_bilinear_filters[SUBPEL_SHIFTS]; +extern const interp_kernel vp9_sub_pel_filters_8[SUBPEL_SHIFTS]; +extern const interp_kernel vp9_sub_pel_filters_8s[SUBPEL_SHIFTS]; +extern const interp_kernel vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS]; // The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear // filter kernel as a 2 tap filter. #define BILINEAR_FILTERS_2TAP(x) \ (vp9_bilinear_filters[(x)] + SUBPEL_TAPS/2 - 1) +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_FILTER_H_ diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c index 533f7f361..20b78bfed 100644 --- a/vp9/common/vp9_idct.c +++ b/vp9/common/vp9_idct.c @@ -96,7 +96,7 @@ void vp9_iwht4x4_1_add_c(const int16_t *in, uint8_t *dest, int dest_stride) { } } -static void idct4_1d(const int16_t *input, int16_t *output) { +static void idct4(const int16_t *input, int16_t *output) { int16_t step[4]; int temp1, temp2; // stage 1 @@ -124,7 +124,7 @@ void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) { // Rows for (i = 0; i < 4; ++i) { - idct4_1d(input, outptr); + idct4(input, outptr); input += 4; outptr += 4; } @@ -133,7 +133,7 @@ void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) { for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i]; - idct4_1d(temp_in, temp_out); + idct4(temp_in, temp_out); for (j = 0; j < 4; ++j) dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * stride + i]); @@ -156,7 +156,7 @@ void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride) { } } -static void idct8_1d(const int16_t *input, int16_t *output) { +static void idct8(const int16_t *input, int16_t *output) { int16_t step1[8], step2[8]; int temp1, temp2; // stage 1 @@ -174,7 +174,7 @@ static void idct8_1d(const int16_t *input, int16_t *output) { step1[6] = dct_const_round_shift(temp2); // stage 2 & stage 3 - even half - idct4_1d(step1, step1); + idct4(step1, step1); // stage 2 - odd half step2[4] = step1[4] + step1[5]; @@ -209,7 +209,7 @@ void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride) { // First transform rows for (i = 0; i < 8; ++i) { - idct8_1d(input, outptr); + idct8(input, outptr); input += 8; outptr += 8; } @@ -218,7 +218,7 @@ void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride) { for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; - idct8_1d(temp_in, temp_out); + idct8(temp_in, temp_out); for (j = 0; j < 8; ++j) dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * stride + i]); @@ -238,7 +238,7 @@ void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int stride) { } } -static void iadst4_1d(const int16_t *input, int16_t *output) { +static void iadst4(const int16_t *input, int16_t *output) { int s0, s1, s2, s3, s4, s5, s6, s7; int x0 = input[0]; @@ -283,10 +283,10 @@ static void iadst4_1d(const int16_t *input, int16_t *output) { void vp9_iht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride, int tx_type) { const transform_2d IHT_4[] = { - { idct4_1d, idct4_1d }, // DCT_DCT = 0 - { iadst4_1d, idct4_1d }, // ADST_DCT = 1 - { idct4_1d, iadst4_1d }, // DCT_ADST = 2 - { iadst4_1d, iadst4_1d } // ADST_ADST = 3 + { idct4, idct4 }, // DCT_DCT = 0 + { iadst4, idct4 }, // ADST_DCT = 1 + { idct4, iadst4 }, // DCT_ADST = 2 + { iadst4, iadst4 } // ADST_ADST = 3 }; int i, j; @@ -311,7 +311,7 @@ void vp9_iht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride, + dest[j * stride + i]); } } -static void iadst8_1d(const int16_t *input, int16_t *output) { +static void iadst8(const int16_t *input, int16_t *output) { int s0, s1, s2, s3, s4, s5, s6, s7; int x0 = input[7]; @@ -389,10 +389,10 @@ static void iadst8_1d(const int16_t *input, int16_t *output) { } static const transform_2d IHT_8[] = { - { idct8_1d, idct8_1d }, // DCT_DCT = 0 - { iadst8_1d, idct8_1d }, // ADST_DCT = 1 - { idct8_1d, iadst8_1d }, // DCT_ADST = 2 - { iadst8_1d, iadst8_1d } // ADST_ADST = 3 + { idct8, idct8 }, // DCT_DCT = 0 + { iadst8, idct8 }, // ADST_DCT = 1 + { idct8, iadst8 }, // DCT_ADST = 2 + { iadst8, iadst8 } // ADST_ADST = 3 }; void vp9_iht8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride, @@ -430,7 +430,7 @@ void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int stride) { // First transform rows // only first 4 row has non-zero coefs for (i = 0; i < 4; ++i) { - idct8_1d(input, outptr); + idct8(input, outptr); input += 8; outptr += 8; } @@ -439,14 +439,14 @@ void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int stride) { for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; - idct8_1d(temp_in, temp_out); + idct8(temp_in, temp_out); for (j = 0; j < 8; ++j) dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * stride + i]); } } -static void idct16_1d(const int16_t *input, int16_t *output) { +static void idct16(const int16_t *input, int16_t *output) { int16_t step1[16], step2[16]; int temp1, temp2; @@ -619,7 +619,7 @@ void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride) { // First transform rows for (i = 0; i < 16; ++i) { - idct16_1d(input, outptr); + idct16(input, outptr); input += 16; outptr += 16; } @@ -628,14 +628,14 @@ void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride) { for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; - idct16_1d(temp_in, temp_out); + idct16(temp_in, temp_out); for (j = 0; j < 16; ++j) dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + dest[j * stride + i]); } } -static void iadst16_1d(const int16_t *input, int16_t *output) { +static void iadst16(const int16_t *input, int16_t *output) { int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; int x0 = input[15]; @@ -807,10 +807,10 @@ static void iadst16_1d(const int16_t *input, int16_t *output) { } static const transform_2d IHT_16[] = { - { idct16_1d, idct16_1d }, // DCT_DCT = 0 - { iadst16_1d, idct16_1d }, // ADST_DCT = 1 - { idct16_1d, iadst16_1d }, // DCT_ADST = 2 - { iadst16_1d, iadst16_1d } // ADST_ADST = 3 + { idct16, idct16 }, // DCT_DCT = 0 + { iadst16, idct16 }, // ADST_DCT = 1 + { idct16, iadst16 }, // DCT_ADST = 2 + { iadst16, iadst16 } // ADST_ADST = 3 }; void vp9_iht16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride, @@ -848,7 +848,7 @@ void vp9_idct16x16_10_add_c(const int16_t *input, uint8_t *dest, int stride) { // First transform rows. Since all non-zero dct coefficients are in // upper-left 4x4 area, we only need to calculate first 4 rows here. for (i = 0; i < 4; ++i) { - idct16_1d(input, outptr); + idct16(input, outptr); input += 16; outptr += 16; } @@ -857,7 +857,7 @@ void vp9_idct16x16_10_add_c(const int16_t *input, uint8_t *dest, int stride) { for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) temp_in[j] = out[j*16 + i]; - idct16_1d(temp_in, temp_out); + idct16(temp_in, temp_out); for (j = 0; j < 16; ++j) dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + dest[j * stride + i]); @@ -877,7 +877,7 @@ void vp9_idct16x16_1_add_c(const int16_t *input, uint8_t *dest, int stride) { } } -static void idct32_1d(const int16_t *input, int16_t *output) { +static void idct32(const int16_t *input, int16_t *output) { int16_t step1[32], step2[32]; int temp1, temp2; @@ -1263,7 +1263,7 @@ void vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int stride) { zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; if (zero_coeff[0] | zero_coeff[1]) - idct32_1d(input, outptr); + idct32(input, outptr); else vpx_memset(outptr, 0, sizeof(int16_t) * 32); input += 32; @@ -1274,7 +1274,7 @@ void vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int stride) { for (i = 0; i < 32; ++i) { for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i]; - idct32_1d(temp_in, temp_out); + idct32(temp_in, temp_out); for (j = 0; j < 32; ++j) dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + dest[j * stride + i]); @@ -1290,7 +1290,7 @@ void vp9_idct32x32_34_add_c(const int16_t *input, uint8_t *dest, int stride) { // Rows // only upper-left 8x8 has non-zero coeff for (i = 0; i < 8; ++i) { - idct32_1d(input, outptr); + idct32(input, outptr); input += 32; outptr += 32; } @@ -1299,7 +1299,7 @@ void vp9_idct32x32_34_add_c(const int16_t *input, uint8_t *dest, int stride) { for (i = 0; i < 32; ++i) { for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i]; - idct32_1d(temp_in, temp_out); + idct32(temp_in, temp_out); for (j = 0; j < 32; ++j) dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + dest[j * stride + i]); diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h index 183c50abf..ceca7951b 100644 --- a/vp9/common/vp9_idct.h +++ b/vp9/common/vp9_idct.h @@ -18,6 +18,10 @@ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_enums.h" +#ifdef __cplusplus +extern "C" { +#endif + // Constants and Macros used by all idct/dct functions #define DCT_CONST_BITS 14 @@ -103,4 +107,8 @@ void vp9_iht16x16_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, int stride, int eob); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_IDCT_H_ diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index 2266e0ec2..dd304c909 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -16,26 +16,6 @@ #include "vp9/common/vp9_seg_common.h" -// This structure holds bit masks for all 8x8 blocks in a 64x64 region. -// Each 1 bit represents a position in which we want to apply the loop filter. -// Left_ entries refer to whether we apply a filter on the border to the -// left of the block. Above_ entries refer to whether or not to apply a -// filter on the above border. Int_ entries refer to whether or not to -// apply borders on the 4x4 edges within the 8x8 block that each bit -// represents. -// Since each transform is accompanied by a potentially different type of -// loop filter there is a different entry in the array for each transform size. -typedef struct { - uint64_t left_y[TX_SIZES]; - uint64_t above_y[TX_SIZES]; - uint64_t int_4x4_y; - uint16_t left_uv[TX_SIZES]; - uint16_t above_uv[TX_SIZES]; - uint16_t int_4x4_uv; - uint8_t lfl_y[64]; - uint8_t lfl_uv[16]; -} LOOP_FILTER_MASK; - // 64 bit masks for left transform size. Each 1 represents a position where // we should apply a loop filter across the left border of an 8x8 block // boundary. @@ -638,9 +618,9 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n, // This function sets up the bit masks for the entire 64x64 region represented // by mi_row, mi_col. // TODO(JBB): This function only works for yv12. -static void setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, - MODE_INFO **mi_8x8, const int mode_info_stride, - LOOP_FILTER_MASK *lfm) { +void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, + MODE_INFO **mi_8x8, const int mode_info_stride, + LOOP_FILTER_MASK *lfm) { int idx_32, idx_16, idx_8; const loop_filter_info_n *const lfi_n = &cm->lf_info; MODE_INFO **mip = mi_8x8; @@ -1069,10 +1049,10 @@ static void filter_block_plane_non420(VP9_COMMON *cm, } #endif -static void filter_block_plane(VP9_COMMON *const cm, - struct macroblockd_plane *const plane, - int mi_row, - LOOP_FILTER_MASK *lfm) { +void vp9_filter_block_plane(VP9_COMMON *const cm, + struct macroblockd_plane *const plane, + int mi_row, + LOOP_FILTER_MASK *lfm) { struct buf_2d *const dst = &plane->dst; uint8_t* const dst0 = dst->buf; int r, c; @@ -1244,14 +1224,14 @@ void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer, #if CONFIG_NON420 if (use_420) #endif - setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mode_info_stride, - &lfm); + vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, + cm->mode_info_stride, &lfm); for (plane = 0; plane < num_planes; ++plane) { #if CONFIG_NON420 if (use_420) #endif - filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm); + vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm); #if CONFIG_NON420 else filter_block_plane_non420(cm, &xd->plane[plane], mi_8x8 + mi_col, diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h index 98fac96ff..668e898cf 100644 --- a/vp9/common/vp9_loopfilter.h +++ b/vp9/common/vp9_loopfilter.h @@ -17,6 +17,10 @@ #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_seg_common.h" +#ifdef __cplusplus +extern "C" { +#endif + #define MAX_LOOP_FILTER 63 #define MAX_SHARPNESS 7 @@ -56,9 +60,42 @@ typedef struct { uint8_t lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS]; } loop_filter_info_n; +// This structure holds bit masks for all 8x8 blocks in a 64x64 region. +// Each 1 bit represents a position in which we want to apply the loop filter. +// Left_ entries refer to whether we apply a filter on the border to the +// left of the block. Above_ entries refer to whether or not to apply a +// filter on the above border. Int_ entries refer to whether or not to +// apply borders on the 4x4 edges within the 8x8 block that each bit +// represents. +// Since each transform is accompanied by a potentially different type of +// loop filter there is a different entry in the array for each transform size. +typedef struct { + uint64_t left_y[TX_SIZES]; + uint64_t above_y[TX_SIZES]; + uint64_t int_4x4_y; + uint16_t left_uv[TX_SIZES]; + uint16_t above_uv[TX_SIZES]; + uint16_t int_4x4_uv; + uint8_t lfl_y[64]; + uint8_t lfl_uv[16]; +} LOOP_FILTER_MASK; + /* assorted loopfilter functions which get used elsewhere */ struct VP9Common; struct macroblockd; +struct VP9LfSyncData; + +// This function sets up the bit masks for the entire 64x64 region represented +// by mi_row, mi_col. +void vp9_setup_mask(struct VP9Common *const cm, + const int mi_row, const int mi_col, + MODE_INFO **mi_8x8, const int mode_info_stride, + LOOP_FILTER_MASK *lfm); + +void vp9_filter_block_plane(struct VP9Common *const cm, + struct macroblockd_plane *const plane, + int mi_row, + LOOP_FILTER_MASK *lfm); void vp9_loop_filter_init(struct VP9Common *cm); @@ -86,8 +123,15 @@ typedef struct LoopFilterWorkerData { int start; int stop; int y_only; + + struct VP9LfSyncData *lf_sync; + int num_lf_workers; } LFWorkerData; // Operates on the rows described by LFWorkerData passed as 'arg1'. int vp9_loop_filter_worker(void *arg1, void *arg2); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_LOOPFILTER_H_ diff --git a/vp9/common/vp9_mv.h b/vp9/common/vp9_mv.h index 155c3f12e..98fd1d82f 100644 --- a/vp9/common/vp9_mv.h +++ b/vp9/common/vp9_mv.h @@ -15,6 +15,10 @@ #include "vp9/common/vp9_common.h" +#ifdef __cplusplus +extern "C" { +#endif + typedef struct mv { int16_t row; int16_t col; @@ -36,4 +40,8 @@ static void clamp_mv(MV *mv, int min_col, int max_col, mv->row = clamp(mv->row, min_row, max_row); } +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_MV_H_ diff --git a/vp9/common/vp9_mvref_common.h b/vp9/common/vp9_mvref_common.h index cd89390d5..0936abfcd 100644 --- a/vp9/common/vp9_mvref_common.h +++ b/vp9/common/vp9_mvref_common.h @@ -7,12 +7,16 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ +#ifndef VP9_COMMON_VP9_MVREF_COMMON_H_ +#define VP9_COMMON_VP9_MVREF_COMMON_H_ #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_blockd.h" -#ifndef VP9_COMMON_VP9_MVREF_COMMON_H_ -#define VP9_COMMON_VP9_MVREF_COMMON_H_ +#ifdef __cplusplus +extern "C" { +#endif + void vp9_find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, const TileInfo *const tile, @@ -56,4 +60,8 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, int block, int ref, int mi_row, int mi_col, int_mv *nearest, int_mv *near); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_MVREF_COMMON_H_ diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h index 45d798482..564e4195f 100644 --- a/vp9/common/vp9_onyx.h +++ b/vp9/common/vp9_onyx.h @@ -11,17 +11,16 @@ #ifndef VP9_COMMON_VP9_ONYX_H_ #define VP9_COMMON_VP9_ONYX_H_ -#ifdef __cplusplus -extern "C" -{ // NOLINT -#endif - #include "./vpx_config.h" #include "vpx/internal/vpx_codec_internal.h" #include "vpx/vp8cx.h" #include "vpx_scale/yv12config.h" #include "vp9/common/vp9_ppflags.h" +#ifdef __cplusplus +extern "C" { +#endif + #define MAX_SEGMENTS 8 typedef int *VP9_PTR; @@ -56,6 +55,7 @@ extern "C" MODE_FIRSTPASS = 0x3, MODE_SECONDPASS = 0x4, MODE_SECONDPASS_BEST = 0x5, + MODE_REALTIME = 0x6, } MODE; typedef enum { @@ -237,7 +237,7 @@ extern "C" int vp9_get_quantizer(VP9_PTR c); #ifdef __cplusplus -} +} // extern "C" #endif #endif // VP9_COMMON_VP9_ONYX_H_ diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index f6fe4d3f1..d92a25b12 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -25,6 +25,10 @@ #include "vp9/common/vp9_postproc.h" #endif +#ifdef __cplusplus +extern "C" { +#endif + #define REFS_PER_FRAME 3 #define REF_FRAMES_LOG2 3 @@ -56,7 +60,7 @@ typedef struct frame_contexts { vp9_prob single_ref_prob[REF_CONTEXTS][2]; vp9_prob comp_ref_prob[REF_CONTEXTS]; struct tx_probs tx_probs; - vp9_prob mbskip_probs[MBSKIP_CONTEXTS]; + vp9_prob skip_probs[SKIP_CONTEXTS]; nmv_context nmvc; } FRAME_CONTEXT; @@ -75,7 +79,7 @@ typedef struct { unsigned int single_ref[REF_CONTEXTS][2][2]; unsigned int comp_ref[REF_CONTEXTS][2]; struct tx_counts tx; - unsigned int mbskip[MBSKIP_CONTEXTS][2]; + unsigned int skip[SKIP_CONTEXTS][2]; nmv_context_counts mv; } FRAME_COUNTS; @@ -87,6 +91,12 @@ typedef enum { REFERENCE_MODES = 3, } REFERENCE_MODE; + +typedef struct { + int ref_count; + YV12_BUFFER_CONFIG buf; +} RefCntBuffer; + typedef struct VP9Common { struct vpx_internal_error_info error; @@ -113,8 +123,8 @@ typedef struct VP9Common { YV12_BUFFER_CONFIG *frame_to_show; - YV12_BUFFER_CONFIG *yv12_fb; - int *fb_idx_ref_cnt; /* reference counts */ + RefCntBuffer frame_bufs[FRAME_BUFFERS]; + int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */ // TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and @@ -180,7 +190,7 @@ typedef struct VP9Common { // Persistent mb segment id map used in prediction. unsigned char *last_frame_seg_map; - INTERPOLATION_TYPE mcomp_filter_type; + INTERP_FILTER interp_filter; loop_filter_info_n lf_info; @@ -213,55 +223,32 @@ typedef struct VP9Common { int frame_parallel_decoding_mode; int log2_tile_cols, log2_tile_rows; - - vpx_codec_frame_buffer_t *fb_list; // External frame buffers - int fb_count; // Total number of frame buffers - vpx_realloc_frame_buffer_cb_fn_t realloc_fb_cb; - void *user_priv; // Private data associated with the external frame buffers. - - int fb_lru; // Flag telling if lru is on/off - uint32_t *fb_idx_ref_lru; // Frame buffer lru cache - uint32_t fb_idx_ref_lru_count; } VP9_COMMON; static YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) { - return &cm->yv12_fb[cm->new_fb_idx]; + return &cm->frame_bufs[cm->new_fb_idx].buf; } static int get_free_fb(VP9_COMMON *cm) { int i; - uint32_t lru_count = cm->fb_idx_ref_lru_count + 1; - int free_buffer_idx = cm->fb_count; - for (i = 0; i < cm->fb_count; i++) { - if (!cm->fb_lru) { - if (cm->fb_idx_ref_cnt[i] == 0) { - free_buffer_idx = i; - break; - } - } else { - if (cm->fb_idx_ref_cnt[i] == 0 && cm->fb_idx_ref_lru[i] < lru_count) { - free_buffer_idx = i; - lru_count = cm->fb_idx_ref_lru[i]; - } - } - } + for (i = 0; i < FRAME_BUFFERS; i++) + if (cm->frame_bufs[i].ref_count == 0) + break; - assert(free_buffer_idx < cm->fb_count); - cm->fb_idx_ref_cnt[free_buffer_idx] = 1; - if (cm->fb_lru) - cm->fb_idx_ref_lru[free_buffer_idx] = ++cm->fb_idx_ref_lru_count; - return free_buffer_idx; + assert(i < FRAME_BUFFERS); + cm->frame_bufs[i].ref_count = 1; + return i; } -static void ref_cnt_fb(int *buf, int *idx, int new_idx) { +static void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) { const int ref_index = *idx; - if (ref_index >= 0 && buf[ref_index] > 0) - buf[ref_index]--; + if (ref_index >= 0 && bufs[ref_index].ref_count > 0) + bufs[ref_index].ref_count--; *idx = new_idx; - buf[new_idx]++; + bufs[new_idx].ref_count++; } static int mi_cols_aligned_to_sb(int n_mis) { @@ -359,4 +346,8 @@ static INLINE int partition_plane_context( return (left * 2 + above) + bsl * PARTITION_PLOFFSET; } +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_ONYXC_INT_H_ diff --git a/vp9/common/vp9_postproc.h b/vp9/common/vp9_postproc.h index b8a456fdb..b07d5d045 100644 --- a/vp9/common/vp9_postproc.h +++ b/vp9/common/vp9_postproc.h @@ -15,6 +15,10 @@ #include "vpx_ports/mem.h" #include "vp9/common/vp9_ppflags.h" +#ifdef __cplusplus +extern "C" { +#endif + struct postproc_state { int last_q; int last_noise; @@ -33,4 +37,8 @@ void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q); void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_POSTPROC_H_ diff --git a/vp9/common/vp9_ppflags.h b/vp9/common/vp9_ppflags.h index 561c93028..8168935fc 100644 --- a/vp9/common/vp9_ppflags.h +++ b/vp9/common/vp9_ppflags.h @@ -11,6 +11,10 @@ #ifndef VP9_COMMON_VP9_PPFLAGS_H_ #define VP9_COMMON_VP9_PPFLAGS_H_ +#ifdef __cplusplus +extern "C" { +#endif + enum { VP9D_NOFILTERING = 0, VP9D_DEBLOCK = 1 << 0, @@ -35,4 +39,8 @@ typedef struct { int display_mv_flag; } vp9_ppflags_t; +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_PPFLAGS_H_ diff --git a/vp9/common/vp9_pragmas.h b/vp9/common/vp9_pragmas.h index f079161d6..0efc713ca 100644 --- a/vp9/common/vp9_pragmas.h +++ b/vp9/common/vp9_pragmas.h @@ -11,6 +11,10 @@ #ifndef VP9_COMMON_VP9_PRAGMAS_H_ #define VP9_COMMON_VP9_PRAGMAS_H_ +#ifdef __cplusplus +extern "C" { +#endif + #ifdef __INTEL_COMPILER #pragma warning(disable:997 1011 170) #endif @@ -19,4 +23,8 @@ #pragma warning(disable:4799) #endif +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_PRAGMAS_H_ diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h index 23722ba72..0acee32f8 100644 --- a/vp9/common/vp9_pred_common.h +++ b/vp9/common/vp9_pred_common.h @@ -14,6 +14,10 @@ #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_onyxc_int.h" +#ifdef __cplusplus +extern "C" { +#endif + static INLINE const MODE_INFO *get_above_mi(const MACROBLOCKD *const xd) { return xd->up_available ? xd->mi_8x8[-xd->mode_info_stride] : NULL; } @@ -50,7 +54,7 @@ static INLINE int vp9_get_skip_context(const MACROBLOCKD *xd) { static INLINE vp9_prob vp9_get_skip_prob(const VP9_COMMON *cm, const MACROBLOCKD *xd) { - return cm->fc.mbskip_probs[vp9_get_skip_context(xd)]; + return cm->fc.skip_probs[vp9_get_skip_context(xd)]; } int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd); @@ -129,4 +133,8 @@ static unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx, } } +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_PRED_COMMON_H_ diff --git a/vp9/common/vp9_prob.h b/vp9/common/vp9_prob.h index 7a790c542..cc8d8ab38 100644 --- a/vp9/common/vp9_prob.h +++ b/vp9/common/vp9_prob.h @@ -18,6 +18,10 @@ #include "vp9/common/vp9_common.h" +#ifdef __cplusplus +extern "C" { +#endif + typedef uint8_t vp9_prob; #define MAX_PROB 255 @@ -109,4 +113,8 @@ static void tree_merge_probs(const vp9_tree_index *tree, DECLARE_ALIGNED(16, extern const uint8_t, vp9_norm[256]); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_PROB_H_ diff --git a/vp9/common/vp9_quant_common.h b/vp9/common/vp9_quant_common.h index 83f2fb655..af50e23cd 100644 --- a/vp9/common/vp9_quant_common.h +++ b/vp9/common/vp9_quant_common.h @@ -13,6 +13,10 @@ #include "vp9/common/vp9_blockd.h" +#ifdef __cplusplus +extern "C" { +#endif + #define MINQ 0 #define MAXQ 255 #define QINDEX_RANGE (MAXQ - MINQ + 1) @@ -25,4 +29,8 @@ int16_t vp9_ac_quant(int qindex, int delta); int vp9_get_qindex(struct segmentation *seg, int segment_id, int base_qindex); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_QUANT_COMMON_H_ diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index b5a9248c3..d554cc0ed 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -69,13 +69,11 @@ static void inter_predictor(const uint8_t *src, int src_stride, const int subpel_y, const struct scale_factors *sf, int w, int h, int ref, - const struct subpix_fn_table *subpix, + const interp_kernel *kernel, int xs, int ys) { sf->predict[subpel_x != 0][subpel_y != 0][ref]( src, src_stride, dst, dst_stride, - subpix->filter_x[subpel_x], xs, - subpix->filter_y[subpel_y], ys, - w, h); + kernel[subpel_x], xs, kernel[subpel_y], ys, w, h); } void vp9_build_inter_predictor(const uint8_t *src, int src_stride, @@ -83,7 +81,7 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride, const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref, - const struct subpix_fn_table *subpix, + const interp_kernel *kernel, enum mv_precision precision, int x, int y) { const int is_q4 = precision == MV_PRECISION_Q4; @@ -96,7 +94,7 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride, src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS); inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, - sf, w, h, ref, subpix, sf->x_step_q4, sf->y_step_q4); + sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4); } static INLINE int round_mv_comp_q4(int value) { @@ -198,7 +196,8 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, + (scaled_mv.col >> SUBPEL_BITS); inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, - subpel_x, subpel_y, sf, w, h, ref, &xd->subpix, xs, ys); + subpel_x, subpel_y, sf, w, h, ref, xd->interp_kernel, + xs, ys); } } @@ -367,7 +366,7 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, } inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, - subpel_y, sf, w, h, ref, &xd->subpix, xs, ys); + subpel_y, sf, w, h, ref, xd->interp_kernel, xs, ys); } } diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index 3cc16d94e..3345d83e8 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h @@ -14,7 +14,10 @@ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_onyxc_int.h" -struct subpix_fn_table; +#ifdef __cplusplus +extern "C" { +#endif + void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize); @@ -32,7 +35,7 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride, const MV *mv_q3, const struct scale_factors *sf, int w, int h, int do_avg, - const struct subpix_fn_table *subpix, + const interp_kernel *kernel, enum mv_precision precision, int x, int y); @@ -90,10 +93,8 @@ static void setup_pre_planes(MACROBLOCKD *xd, int idx, } } -static void set_scale_factors(VP9_COMMON *cm, MACROBLOCKD *xd, - int ref0, int ref1) { - xd->block_refs[0] = &cm->frame_refs[ref0 >= 0 ? ref0 : 0]; - xd->block_refs[1] = &cm->frame_refs[ref1 >= 0 ? ref1 : 0]; -} +#ifdef __cplusplus +} // extern "C" +#endif #endif // VP9_COMMON_VP9_RECONINTER_H_ diff --git a/vp9/common/vp9_reconintra.h b/vp9/common/vp9_reconintra.h index fc916fcf3..800736d30 100644 --- a/vp9/common/vp9_reconintra.h +++ b/vp9/common/vp9_reconintra.h @@ -14,9 +14,17 @@ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_blockd.h" +#ifdef __cplusplus +extern "C" { +#endif + void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in, TX_SIZE tx_size, int mode, const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride, int aoff, int loff, int plane); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_RECONINTRA_H_ diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index e384032f4..04a40bd58 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -135,7 +135,7 @@ prototype void vp9_v_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const ui specialize vp9_v_predictor_16x16 $sse2_x86inc neon prototype void vp9_tm_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_tm_predictor_16x16 $sse2_x86inc +specialize vp9_tm_predictor_16x16 $sse2_x86inc neon prototype void vp9_dc_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" specialize vp9_dc_predictor_16x16 $sse2_x86inc dspr2 @@ -174,7 +174,7 @@ prototype void vp9_v_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const ui specialize vp9_v_predictor_32x32 $sse2_x86inc neon prototype void vp9_tm_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_tm_predictor_32x32 $sse2_x86_64 +specialize vp9_tm_predictor_32x32 $sse2_x86_64 neon prototype void vp9_dc_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" specialize vp9_dc_predictor_32x32 $sse2_x86inc diff --git a/vp9/common/vp9_scale.h b/vp9/common/vp9_scale.h index 55b4d8888..90b0d0bf9 100644 --- a/vp9/common/vp9_scale.h +++ b/vp9/common/vp9_scale.h @@ -14,6 +14,10 @@ #include "vp9/common/vp9_mv.h" #include "vp9/common/vp9_convolve.h" +#ifdef __cplusplus +extern "C" { +#endif + #define REF_SCALE_SHIFT 14 #define REF_NO_SCALE (1 << REF_SCALE_SHIFT) #define REF_INVALID_SCALE -1 @@ -46,4 +50,8 @@ static int vp9_is_scaled(const struct scale_factors *sf) { sf->y_scale_fp != REF_NO_SCALE; } +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_SCALE_H_ diff --git a/vp9/common/vp9_scan.h b/vp9/common/vp9_scan.h index efab48bfc..9613b675c 100644 --- a/vp9/common/vp9_scan.h +++ b/vp9/common/vp9_scan.h @@ -17,6 +17,10 @@ #include "vp9/common/vp9_enums.h" #include "vp9/common/vp9_blockd.h" +#ifdef __cplusplus +extern "C" { +#endif + #define MAX_NEIGHBORS 2 void vp9_init_neighbors(); @@ -36,4 +40,8 @@ static INLINE int get_coef_context(const int16_t *neighbors, token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1; } +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_SCAN_H_ diff --git a/vp9/common/vp9_seg_common.h b/vp9/common/vp9_seg_common.h index 8ff54fb73..ff2d66a36 100644 --- a/vp9/common/vp9_seg_common.h +++ b/vp9/common/vp9_seg_common.h @@ -13,6 +13,10 @@ #include "vp9/common/vp9_prob.h" +#ifdef __cplusplus +extern "C" { +#endif + #define SEGMENT_DELTADATA 0 #define SEGMENT_ABSDATA 1 @@ -70,5 +74,9 @@ int vp9_get_segdata(const struct segmentation *seg, extern const vp9_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)]; +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_SEG_COMMON_H_ diff --git a/vp9/common/vp9_systemdependent.h b/vp9/common/vp9_systemdependent.h index 6f955ab56..ee9a4823b 100644 --- a/vp9/common/vp9_systemdependent.h +++ b/vp9/common/vp9_systemdependent.h @@ -11,6 +11,10 @@ #ifndef VP9_COMMON_VP9_SYSTEMDEPENDENT_H_ #define VP9_COMMON_VP9_SYSTEMDEPENDENT_H_ +#ifdef __cplusplus +extern "C" { +#endif + #ifdef _MSC_VER #include <math.h> #define snprintf _snprintf @@ -72,4 +76,8 @@ static INLINE int get_msb(unsigned int n) { struct VP9Common; void vp9_machine_specific_config(struct VP9Common *cm); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_SYSTEMDEPENDENT_H_ diff --git a/vp9/common/vp9_textblit.h b/vp9/common/vp9_textblit.h index c968628fe..158ec1b37 100644 --- a/vp9/common/vp9_textblit.h +++ b/vp9/common/vp9_textblit.h @@ -11,9 +11,17 @@ #ifndef VP9_COMMON_VP9_TEXTBLIT_H_ #define VP9_COMMON_VP9_TEXTBLIT_H_ +#ifdef __cplusplus +extern "C" { +#endif + void vp9_blit_text(const char *msg, unsigned char *address, int pitch); void vp9_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, int pitch); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_TEXTBLIT_H_ diff --git a/vp9/common/vp9_tile_common.h b/vp9/common/vp9_tile_common.h index a09876e4b..a97719e29 100644 --- a/vp9/common/vp9_tile_common.h +++ b/vp9/common/vp9_tile_common.h @@ -11,6 +11,10 @@ #ifndef VP9_COMMON_VP9_TILE_COMMON_H_ #define VP9_COMMON_VP9_TILE_COMMON_H_ +#ifdef __cplusplus +extern "C" { +#endif + struct VP9Common; typedef struct TileInfo { @@ -26,4 +30,8 @@ void vp9_tile_init(TileInfo *tile, const struct VP9Common *cm, void vp9_get_tile_n_bits(int mi_cols, int *min_log2_tile_cols, int *max_log2_tile_cols); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_VP9_TILE_COMMON_H_ diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c index f95423678..8a2297feb 100644 --- a/vp9/common/x86/vp9_asm_stubs.c +++ b/vp9/common/x86/vp9_asm_stubs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -23,6 +23,68 @@ typedef void filter8_1dfunction ( const short *filter ); +#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ +void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \ + uint8_t *dst, ptrdiff_t dst_stride, \ + const int16_t *filter_x, int x_step_q4, \ + const int16_t *filter_y, int y_step_q4, \ + int w, int h) { \ + if (step_q4 == 16 && filter[3] != 128) { \ + while (w >= 16) { \ + vp9_filter_block1d16_##dir##8_##avg##opt(src_start, src_stride, \ + dst, dst_stride, \ + h, filter); \ + src += 16; \ + dst += 16; \ + w -= 16; \ + } \ + while (w >= 8) { \ + vp9_filter_block1d8_##dir##8_##avg##opt(src_start, src_stride, \ + dst, dst_stride, \ + h, filter); \ + src += 8; \ + dst += 8; \ + w -= 8; \ + } \ + while (w >= 4) { \ + vp9_filter_block1d4_##dir##8_##avg##opt(src_start, src_stride, \ + dst, dst_stride, \ + h, filter); \ + src += 4; \ + dst += 4; \ + w -= 4; \ + } \ + } \ + if (w) { \ + vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \ + filter_x, x_step_q4, filter_y, y_step_q4, \ + w, h); \ + } \ +} + +#define FUN_CONV_2D(avg, opt) \ +void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ + uint8_t *dst, ptrdiff_t dst_stride, \ + const int16_t *filter_x, int x_step_q4, \ + const int16_t *filter_y, int y_step_q4, \ + int w, int h) { \ + DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71); \ + \ + assert(w <= 64); \ + assert(h <= 64); \ + if (x_step_q4 == 16 && y_step_q4 == 16) { \ + vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ + filter_x, x_step_q4, filter_y, y_step_q4, \ + w, h + 7); \ + vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ + filter_x, x_step_q4, filter_y, y_step_q4, \ + w, h); \ + } else { \ + vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ + filter_x, x_step_q4, filter_y, y_step_q4, w, h); \ + } \ +} + #if HAVE_SSSE3 filter8_1dfunction vp9_filter_block1d16_v8_ssse3; filter8_1dfunction vp9_filter_block1d16_h8_ssse3; @@ -37,201 +99,44 @@ filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3; filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3; filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3; -void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - /* Ensure the filter can be compressed to int16_t. */ - if (x_step_q4 == 16 && filter_x[3] != 128) { - while (w >= 16) { - vp9_filter_block1d16_h8_ssse3(src, src_stride, - dst, dst_stride, - h, filter_x); - src += 16; - dst += 16; - w -= 16; - } - while (w >= 8) { - vp9_filter_block1d8_h8_ssse3(src, src_stride, - dst, dst_stride, - h, filter_x); - src += 8; - dst += 8; - w -= 8; - } - while (w >= 4) { - vp9_filter_block1d4_h8_ssse3(src, src_stride, - dst, dst_stride, - h, filter_x); - src += 4; - dst += 4; - w -= 4; - } - } - if (w) { - vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h); - } -} - -void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - if (y_step_q4 == 16 && filter_y[3] != 128) { - while (w >= 16) { - vp9_filter_block1d16_v8_ssse3(src - src_stride * 3, src_stride, - dst, dst_stride, - h, filter_y); - src += 16; - dst += 16; - w -= 16; - } - while (w >= 8) { - vp9_filter_block1d8_v8_ssse3(src - src_stride * 3, src_stride, - dst, dst_stride, - h, filter_y); - src += 8; - dst += 8; - w -= 8; - } - while (w >= 4) { - vp9_filter_block1d4_v8_ssse3(src - src_stride * 3, src_stride, - dst, dst_stride, - h, filter_y); - src += 4; - dst += 4; - w -= 4; - } - } - if (w) { - vp9_convolve8_vert_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h); - } -} - -void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - if (x_step_q4 == 16 && filter_x[3] != 128) { - while (w >= 16) { - vp9_filter_block1d16_h8_avg_ssse3(src, src_stride, - dst, dst_stride, - h, filter_x); - src += 16; - dst += 16; - w -= 16; - } - while (w >= 8) { - vp9_filter_block1d8_h8_avg_ssse3(src, src_stride, - dst, dst_stride, - h, filter_x); - src += 8; - dst += 8; - w -= 8; - } - while (w >= 4) { - vp9_filter_block1d4_h8_avg_ssse3(src, src_stride, - dst, dst_stride, - h, filter_x); - src += 4; - dst += 4; - w -= 4; - } - } - if (w) { - vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h); - } -} - -void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - if (y_step_q4 == 16 && filter_y[3] != 128) { - while (w >= 16) { - vp9_filter_block1d16_v8_avg_ssse3(src - src_stride * 3, src_stride, - dst, dst_stride, - h, filter_y); - src += 16; - dst += 16; - w -= 16; - } - while (w >= 8) { - vp9_filter_block1d8_v8_avg_ssse3(src - src_stride * 3, src_stride, - dst, dst_stride, - h, filter_y); - src += 8; - dst += 8; - w -= 8; - } - while (w >= 4) { - vp9_filter_block1d4_v8_avg_ssse3(src - src_stride * 3, src_stride, - dst, dst_stride, - h, filter_y); - src += 4; - dst += 4; - w -= 4; - } - } - if (w) { - vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h); - } -} - -void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71); - - assert(w <= 64); - assert(h <= 64); - if (x_step_q4 == 16 && y_step_q4 == 16) { - vp9_convolve8_horiz_ssse3(src - 3 * src_stride, src_stride, fdata2, 64, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h + 7); - vp9_convolve8_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, w, h); - } else { - vp9_convolve8_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, w, h); - } -} - -void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71); - - assert(w <= 64); - assert(h <= 64); - if (x_step_q4 == 16 && y_step_q4 == 16) { - vp9_convolve8_horiz_ssse3(src - 3 * src_stride, src_stride, fdata2, 64, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h + 7); - vp9_convolve8_avg_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h); - } else { - vp9_convolve8_avg_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, w, h); - } -} +// void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +// void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +// void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +// void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3); +FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3); +FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3); +FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, + ssse3); + +// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +// void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +FUN_CONV_2D(, ssse3); +FUN_CONV_2D(avg_ , ssse3); #endif #if HAVE_SSE2 @@ -248,199 +153,41 @@ filter8_1dfunction vp9_filter_block1d8_h8_avg_sse2; filter8_1dfunction vp9_filter_block1d4_v8_avg_sse2; filter8_1dfunction vp9_filter_block1d4_h8_avg_sse2; -void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - /* Ensure the filter can be compressed to int16_t. */ - if (x_step_q4 == 16 && filter_x[3] != 128) { - while (w >= 16) { - vp9_filter_block1d16_h8_sse2(src, src_stride, - dst, dst_stride, - h, filter_x); - src += 16; - dst += 16; - w -= 16; - } - while (w >= 8) { - vp9_filter_block1d8_h8_sse2(src, src_stride, - dst, dst_stride, - h, filter_x); - src += 8; - dst += 8; - w -= 8; - } - while (w >= 4) { - vp9_filter_block1d4_h8_sse2(src, src_stride, - dst, dst_stride, - h, filter_x); - src += 4; - dst += 4; - w -= 4; - } - } - if (w) { - vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h); - } -} - -void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - if (y_step_q4 == 16 && filter_y[3] != 128) { - while (w >= 16) { - vp9_filter_block1d16_v8_sse2(src - src_stride * 3, src_stride, - dst, dst_stride, - h, filter_y); - src += 16; - dst += 16; - w -= 16; - } - while (w >= 8) { - vp9_filter_block1d8_v8_sse2(src - src_stride * 3, src_stride, - dst, dst_stride, - h, filter_y); - src += 8; - dst += 8; - w -= 8; - } - while (w >= 4) { - vp9_filter_block1d4_v8_sse2(src - src_stride * 3, src_stride, - dst, dst_stride, - h, filter_y); - src += 4; - dst += 4; - w -= 4; - } - } - if (w) { - vp9_convolve8_vert_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h); - } -} - -void vp9_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - if (x_step_q4 == 16 && filter_x[3] != 128) { - while (w >= 16) { - vp9_filter_block1d16_h8_avg_sse2(src, src_stride, - dst, dst_stride, - h, filter_x); - src += 16; - dst += 16; - w -= 16; - } - while (w >= 8) { - vp9_filter_block1d8_h8_avg_sse2(src, src_stride, - dst, dst_stride, - h, filter_x); - src += 8; - dst += 8; - w -= 8; - } - while (w >= 4) { - vp9_filter_block1d4_h8_avg_sse2(src, src_stride, - dst, dst_stride, - h, filter_x); - src += 4; - dst += 4; - w -= 4; - } - } - if (w) { - vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h); - } -} - -void vp9_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - if (y_step_q4 == 16 && filter_y[3] != 128) { - while (w >= 16) { - vp9_filter_block1d16_v8_avg_sse2(src - src_stride * 3, src_stride, - dst, dst_stride, - h, filter_y); - src += 16; - dst += 16; - w -= 16; - } - while (w >= 8) { - vp9_filter_block1d8_v8_avg_sse2(src - src_stride * 3, src_stride, - dst, dst_stride, - h, filter_y); - src += 8; - dst += 8; - w -= 8; - } - while (w >= 4) { - vp9_filter_block1d4_v8_avg_sse2(src - src_stride * 3, src_stride, - dst, dst_stride, - h, filter_y); - src += 4; - dst += 4; - w -= 4; - } - } - if (w) { - vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h); - } -} - -void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71); - - assert(w <= 64); - assert(h <= 64); - if (x_step_q4 == 16 && y_step_q4 == 16) { - vp9_convolve8_horiz_sse2(src - 3 * src_stride, src_stride, fdata2, 64, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h + 7); - vp9_convolve8_vert_sse2(fdata2 + 3 * 64, 64, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, w, h); - } else { - vp9_convolve8_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, w, h); - } -} - -void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71); - - assert(w <= 64); - assert(h <= 64); - if (x_step_q4 == 16 && y_step_q4 == 16) { - vp9_convolve8_horiz_sse2(src - 3 * src_stride, src_stride, fdata2, 64, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h + 7); - vp9_convolve8_avg_vert_sse2(fdata2 + 3 * 64, 64, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h); - } else { - vp9_convolve8_avg_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, w, h); - } -} +// void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +// void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +// void vp9_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +// void vp9_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2); +FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2); +FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2); +FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2); + +// void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +// void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +FUN_CONV_2D(, sse2); +FUN_CONV_2D(avg_ , sse2); #endif diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c index 2f6149464..13a5b5a82 100644 --- a/vp9/common/x86/vp9_idct_intrin_sse2.c +++ b/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -180,7 +180,7 @@ static INLINE void transpose_4x4(__m128i *res) { res[1] = _mm_unpackhi_epi16(tr0_0, tr0_1); } -static void idct4_1d_sse2(__m128i *in) { +static void idct4_sse2(__m128i *in) { const __m128i k__cospi_p16_p16 = pair_set_epi16(cospi_16_64, cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); @@ -216,7 +216,7 @@ static void idct4_1d_sse2(__m128i *in) { in[1] = _mm_shuffle_epi32(in[1], 0x4E); } -static void iadst4_1d_sse2(__m128i *in) { +static void iadst4_sse2(__m128i *in) { const __m128i k__sinpi_p01_p04 = pair_set_epi16(sinpi_1_9, sinpi_4_9); const __m128i k__sinpi_p03_p02 = pair_set_epi16(sinpi_3_9, sinpi_2_9); const __m128i k__sinpi_p02_m01 = pair_set_epi16(sinpi_2_9, -sinpi_1_9); @@ -276,20 +276,20 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride, switch (tx_type) { case 0: // DCT_DCT - idct4_1d_sse2(in); - idct4_1d_sse2(in); + idct4_sse2(in); + idct4_sse2(in); break; case 1: // ADST_DCT - idct4_1d_sse2(in); - iadst4_1d_sse2(in); + idct4_sse2(in); + iadst4_sse2(in); break; case 2: // DCT_ADST - iadst4_1d_sse2(in); - idct4_1d_sse2(in); + iadst4_sse2(in); + idct4_sse2(in); break; case 3: // ADST_ADST - iadst4_1d_sse2(in); - iadst4_1d_sse2(in); + iadst4_sse2(in); + iadst4_sse2(in); break; default: assert(0); @@ -455,7 +455,7 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride, res1 = _mm_packs_epi32(tmp2, tmp3); \ } -#define IDCT8_1D(in0, in1, in2, in3, in4, in5, in6, in7, \ +#define IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, \ out0, out1, out2, out3, out4, out5, out6, out7) \ { \ /* Stage1 */ \ @@ -573,7 +573,7 @@ void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) { in0, in1, in2, in3, in4, in5, in6, in7); // 4-stage 1D idct8x8 - IDCT8_1D(in0, in1, in2, in3, in4, in5, in6, in7, + IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); } @@ -674,7 +674,7 @@ static INLINE void array_transpose_4X8(__m128i *in, __m128i * out) { out[3] = _mm_unpackhi_epi64(tr1_2, tr1_6); } -static void idct8_1d_sse2(__m128i *in) { +static void idct8_sse2(__m128i *in) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64); @@ -695,11 +695,11 @@ static void idct8_1d_sse2(__m128i *in) { in0, in1, in2, in3, in4, in5, in6, in7); // 4-stage 1D idct8x8 - IDCT8_1D(in0, in1, in2, in3, in4, in5, in6, in7, + IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7]); } -static void iadst8_1d_sse2(__m128i *in) { +static void iadst8_sse2(__m128i *in) { const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64); const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64); const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64); @@ -946,20 +946,20 @@ void vp9_iht8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride, switch (tx_type) { case 0: // DCT_DCT - idct8_1d_sse2(in); - idct8_1d_sse2(in); + idct8_sse2(in); + idct8_sse2(in); break; case 1: // ADST_DCT - idct8_1d_sse2(in); - iadst8_1d_sse2(in); + idct8_sse2(in); + iadst8_sse2(in); break; case 2: // DCT_ADST - iadst8_1d_sse2(in); - idct8_1d_sse2(in); + iadst8_sse2(in); + idct8_sse2(in); break; case 3: // ADST_ADST - iadst8_1d_sse2(in); - iadst8_1d_sse2(in); + iadst8_sse2(in); + iadst8_sse2(in); break; default: assert(0); @@ -1104,7 +1104,7 @@ void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) { TRANSPOSE_4X8_10(tmp0, tmp1, tmp2, tmp3, in0, in1, in2, in3) - IDCT8_1D(in0, in1, in2, in3, zero, zero, zero, zero, + IDCT8(in0, in1, in2, in3, zero, zero, zero, zero, in0, in1, in2, in3, in4, in5, in6, in7); // Final rounding and shift in0 = _mm_adds_epi16(in0, final_rounding); @@ -1135,7 +1135,7 @@ void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) { RECON_AND_STORE(dest, in7); } -#define IDCT16_1D \ +#define IDCT16 \ /* Stage2 */ \ { \ const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], in[15]); \ @@ -1264,7 +1264,7 @@ void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) { stp2_10, stp2_13, stp2_11, stp2_12) \ } -#define IDCT16_10_1D \ +#define IDCT16_10 \ /* Stage2 */ \ { \ const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], zero); \ @@ -1437,7 +1437,7 @@ void vp9_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest, array_transpose_8x8(in, in); array_transpose_8x8(in+8, in+8); - IDCT16_1D + IDCT16 // Stage7 curr1[0] = _mm_add_epi16(stp2_0, stp1_15); @@ -1465,7 +1465,7 @@ void vp9_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest, array_transpose_8x8(l+i*8, in); array_transpose_8x8(r+i*8, in+8); - IDCT16_1D + IDCT16 // 2-D in[0] = _mm_add_epi16(stp2_0, stp1_15); @@ -1590,7 +1590,7 @@ static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) { res0[15] = tbuf[7]; } -static void iadst16_1d_8col(__m128i *in) { +static void iadst16_8col(__m128i *in) { // perform 16x16 1-D ADST for 8 columns __m128i s[16], x[16], u[32], v[32]; const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64); @@ -2060,7 +2060,7 @@ static void iadst16_1d_8col(__m128i *in) { in[15] = _mm_sub_epi16(kZero, s[1]); } -static void idct16_1d_8col(__m128i *in) { +static void idct16_8col(__m128i *in) { const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64); const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64); const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64); @@ -2404,16 +2404,16 @@ static void idct16_1d_8col(__m128i *in) { in[15] = _mm_sub_epi16(s[0], s[15]); } -static void idct16_1d_sse2(__m128i *in0, __m128i *in1) { +static void idct16_sse2(__m128i *in0, __m128i *in1) { array_transpose_16x16(in0, in1); - idct16_1d_8col(in0); - idct16_1d_8col(in1); + idct16_8col(in0); + idct16_8col(in1); } -static void iadst16_1d_sse2(__m128i *in0, __m128i *in1) { +static void iadst16_sse2(__m128i *in0, __m128i *in1) { array_transpose_16x16(in0, in1); - iadst16_1d_8col(in0); - iadst16_1d_8col(in1); + iadst16_8col(in0); + iadst16_8col(in1); } static INLINE void load_buffer_8x16(const int16_t *input, __m128i *in) { @@ -2502,20 +2502,20 @@ void vp9_iht16x16_256_add_sse2(const int16_t *input, uint8_t *dest, int stride, switch (tx_type) { case 0: // DCT_DCT - idct16_1d_sse2(in0, in1); - idct16_1d_sse2(in0, in1); + idct16_sse2(in0, in1); + idct16_sse2(in0, in1); break; case 1: // ADST_DCT - idct16_1d_sse2(in0, in1); - iadst16_1d_sse2(in0, in1); + idct16_sse2(in0, in1); + iadst16_sse2(in0, in1); break; case 2: // DCT_ADST - iadst16_1d_sse2(in0, in1); - idct16_1d_sse2(in0, in1); + iadst16_sse2(in0, in1); + idct16_sse2(in0, in1); break; case 3: // ADST_ADST - iadst16_1d_sse2(in0, in1); - iadst16_1d_sse2(in0, in1); + iadst16_sse2(in0, in1); + iadst16_sse2(in0, in1); break; default: assert(0); @@ -2732,7 +2732,7 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, for (i = 0; i < 2; i++) { array_transpose_4X8(l + 8*i, in); - IDCT16_10_1D + IDCT16_10 // Stage7 in[0] = _mm_add_epi16(stp2_0, stp1_15); @@ -2814,7 +2814,7 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, input += 8; \ } \ -#define IDCT32_1D_34 \ +#define IDCT32_34 \ /* Stage1 */ \ { \ const __m128i zero = _mm_setzero_si128();\ @@ -3115,7 +3115,7 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, } -#define IDCT32_1D \ +#define IDCT32 \ /* Stage1 */ \ { \ const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], in[31]); \ @@ -3554,7 +3554,7 @@ void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest, array_transpose_8x8(in+16, in+16); array_transpose_8x8(in+24, in+24); - IDCT32_1D + IDCT32 // 1_D: Store 32 intermediate results for each 8x32 block. col[0] = _mm_add_epi16(stp1_0, stp1_31); @@ -3593,7 +3593,7 @@ void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest, const __m128i zero = _mm_setzero_si128(); // Transpose 32x8 block to 8x32 block array_transpose_8x8(col+i*8, in); - IDCT32_1D_34 + IDCT32_34 // 2_D: Calculate the results and store them to destination. in[0] = _mm_add_epi16(stp1_0, stp1_31); @@ -3922,7 +3922,7 @@ void vp9_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest, array_transpose_8x8(in+16, in+16); array_transpose_8x8(in+24, in+24); - IDCT32_1D + IDCT32 // 1_D: Store 32 intermediate results for each 8x32 block. col[i32 + 0] = _mm_add_epi16(stp1_0, stp1_31); @@ -3969,7 +3969,7 @@ void vp9_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest, array_transpose_8x8(col+j+64, in+16); array_transpose_8x8(col+j+96, in+24); - IDCT32_1D + IDCT32 // 2_D: Calculate the results and store them to destination. in[0] = _mm_add_epi16(stp1_0, stp1_31); diff --git a/vp9/common/x86/vp9_postproc_x86.h b/vp9/common/x86/vp9_postproc_x86.h index 8870215a2..cab9d34f2 100644 --- a/vp9/common/x86/vp9_postproc_x86.h +++ b/vp9/common/x86/vp9_postproc_x86.h @@ -12,6 +12,10 @@ #ifndef VP9_COMMON_X86_VP9_POSTPROC_X86_H_ #define VP9_COMMON_X86_VP9_POSTPROC_X86_H_ +#ifdef __cplusplus +extern "C" { +#endif + /* Note: * * This platform is commonly built for runtime CPU detection. If you modify @@ -61,4 +65,8 @@ extern prototype_postproc_addnoise(vp9_plane_add_noise_wmt); #endif #endif +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_COMMON_X86_VP9_POSTPROC_X86_H_ |