diff options
Diffstat (limited to 'vp9/common')
-rw-r--r-- | vp9/common/arm/neon/vp9_reconintra_neon_asm.asm | 19 | ||||
-rw-r--r-- | vp9/common/vp9_blockd.h | 10 | ||||
-rw-r--r-- | vp9/common/vp9_onyxc_int.h | 8 | ||||
-rw-r--r-- | vp9/common/vp9_reconintra.c | 118 |
4 files changed, 76 insertions, 79 deletions
diff --git a/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm b/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm index d4f6d9b48..14f574a50 100644 --- a/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm +++ b/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm @@ -298,8 +298,7 @@ loop_h |vp9_tm_predictor_4x4_neon| PROC ; Load ytop_left = above[-1]; sub r12, r2, #1 - ldrb r12, [r12] - vdup.u8 d0, r12 + vld1.u8 {d0[]}, [r12] ; Load above 4 pixels vld1.32 {d2[0]}, [r2] @@ -309,10 +308,10 @@ loop_h ; Load left row by row and compute left + (above - ytop_left) ; 1st row and 2nd row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 + vld1.u8 {d2[]}, [r3]! + vld1.u8 {d4[]}, [r3]! + vmovl.u8 q1, d2 + vmovl.u8 q2, d4 vadd.s16 q1, q1, q3 vadd.s16 q2, q2, q3 vqmovun.s16 d0, q1 @@ -321,10 +320,10 @@ loop_h vst1.32 {d1[0]}, [r0], r1 ; 3rd row and 4th row - ldrb r12, [r3], #1 - ldrb r2, [r3], #1 - vdup.u16 q1, r12 - vdup.u16 q2, r2 + vld1.u8 {d2[]}, [r3]! + vld1.u8 {d4[]}, [r3] + vmovl.u8 q1, d2 + vmovl.u8 q2, d4 vadd.s16 q1, q1, q3 vadd.s16 q2, q2, q3 vqmovun.s16 d0, q1 diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 319d34832..e53e15da9 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -124,9 +124,12 @@ struct macroblockd_plane { int subsampling_y; struct buf_2d dst; struct buf_2d pre[2]; - const int16_t *dequant; ENTROPY_CONTEXT *above_context; ENTROPY_CONTEXT *left_context; + int16_t seg_dequant[MAX_SEGMENTS][2]; + + // encoder + const int16_t *dequant; }; #define BLOCK_OFFSET(x, i) ((x) + (i) * 16) @@ -141,7 +144,7 @@ typedef struct RefBuffer { typedef struct macroblockd { struct macroblockd_plane plane[MAX_MB_PLANE]; - + FRAME_COUNTS *counts; int mi_stride; MODE_INFO **mi; @@ -159,6 +162,9 @@ typedef struct macroblockd { int mb_to_top_edge; int mb_to_bottom_edge; + FRAME_CONTEXT *fc; + int frame_parallel_decoding_mode; + /* pointers to reference frames */ RefBuffer *block_refs[2]; diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index 188b03d41..045d35049 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -341,6 +341,14 @@ static INLINE void init_macroblockd(VP9_COMMON *cm, MACROBLOCKD *xd) { xd->plane[i].dqcoeff = xd->dqcoeff; xd->above_context[i] = cm->above_context + i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols); + + if (xd->plane[i].plane_type == PLANE_TYPE_Y) { + memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant)); + } else { + memcpy(xd->plane[i].seg_dequant, cm->uv_dequant, sizeof(cm->uv_dequant)); + } + xd->fc = cm->fc; + xd->frame_parallel_decoding_mode = cm->frame_parallel_decoding_mode; } xd->above_seg_context = cm->above_seg_context; diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index f7c01fd1d..80034e4db 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -102,6 +102,10 @@ static const uint8_t extend_modes[INTRA_MODES] = { intra_pred_sized(type, 32) #endif // CONFIG_VP9_HIGHBITDEPTH +#define DST(x, y) dst[(x) + (y) * stride] +#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) +#define AVG2(a, b) (((a) + (b) + 1) >> 1) + #if CONFIG_VP9_HIGHBITDEPTH static INLINE void highbd_d207_predictor(uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, @@ -112,18 +116,16 @@ static INLINE void highbd_d207_predictor(uint16_t *dst, ptrdiff_t stride, // First column. for (r = 0; r < bs - 1; ++r) { - dst[r * stride] = ROUND_POWER_OF_TWO(left[r] + left[r + 1], 1); + dst[r * stride] = AVG2(left[r], left[r + 1]); } dst[(bs - 1) * stride] = left[bs - 1]; dst++; // Second column. for (r = 0; r < bs - 2; ++r) { - dst[r * stride] = ROUND_POWER_OF_TWO(left[r] + left[r + 1] * 2 + - left[r + 2], 2); + dst[r * stride] = AVG3(left[r], left[r + 1], left[r + 2]); } - dst[(bs - 2) * stride] = ROUND_POWER_OF_TWO(left[bs - 2] + - left[bs - 1] * 3, 2); + dst[(bs - 2) * stride] = AVG3(left[bs - 2], left[bs - 1], left[bs - 1]); dst[(bs - 1) * stride] = left[bs - 1]; dst++; @@ -145,11 +147,9 @@ static INLINE void highbd_d63_predictor(uint16_t *dst, ptrdiff_t stride, (void) bd; for (r = 0; r < bs; ++r) { for (c = 0; c < bs; ++c) { - dst[c] = r & 1 ? ROUND_POWER_OF_TWO(above[r/2 + c] + - above[r/2 + c + 1] * 2 + - above[r/2 + c + 2], 2) - : ROUND_POWER_OF_TWO(above[r/2 + c] + - above[r/2 + c + 1], 1); + dst[c] = r & 1 ? AVG3(above[(r >> 1) + c], above[(r >> 1) + c + 1], + above[(r >> 1) + c + 2]) + : AVG2(above[(r >> 1) + c], above[(r >> 1) + c + 1]); } dst += stride; } @@ -163,9 +163,8 @@ static INLINE void highbd_d45_predictor(uint16_t *dst, ptrdiff_t stride, int bs, (void) bd; for (r = 0; r < bs; ++r) { for (c = 0; c < bs; ++c) { - dst[c] = r + c + 2 < bs * 2 ? ROUND_POWER_OF_TWO(above[r + c] + - above[r + c + 1] * 2 + - above[r + c + 2], 2) + dst[c] = r + c + 2 < bs * 2 ? AVG3(above[r + c], above[r + c + 1], + above[r + c + 2]) : above[bs * 2 - 1]; } dst += stride; @@ -180,20 +179,19 @@ static INLINE void highbd_d117_predictor(uint16_t *dst, ptrdiff_t stride, // first row for (c = 0; c < bs; c++) - dst[c] = ROUND_POWER_OF_TWO(above[c - 1] + above[c], 1); + dst[c] = AVG2(above[c - 1], above[c]); dst += stride; // second row - dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2); + dst[0] = AVG3(left[0], above[-1], above[0]); for (c = 1; c < bs; c++) - dst[c] = ROUND_POWER_OF_TWO(above[c - 2] + above[c - 1] * 2 + above[c], 2); + dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); dst += stride; // the rest of first col - dst[0] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2); + dst[0] = AVG3(above[-1], left[0], left[1]); for (r = 3; r < bs; ++r) - dst[(r - 2) * stride] = ROUND_POWER_OF_TWO(left[r - 3] + left[r - 2] * 2 + - left[r - 1], 2); + dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]); // the rest of the block for (r = 2; r < bs; ++r) { @@ -208,14 +206,13 @@ static INLINE void highbd_d135_predictor(uint16_t *dst, ptrdiff_t stride, const uint16_t *left, int bd) { int r, c; (void) bd; - dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2); + dst[0] = AVG3(left[0], above[-1], above[0]); for (c = 1; c < bs; c++) - dst[c] = ROUND_POWER_OF_TWO(above[c - 2] + above[c - 1] * 2 + above[c], 2); + dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); - dst[stride] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2); + dst[stride] = AVG3(above[-1], left[0], left[1]); for (r = 2; r < bs; ++r) - dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 2] + left[r - 1] * 2 + - left[r], 2); + dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); dst += stride; for (r = 1; r < bs; ++r) { @@ -230,20 +227,19 @@ static INLINE void highbd_d153_predictor(uint16_t *dst, ptrdiff_t stride, const uint16_t *left, int bd) { int r, c; (void) bd; - dst[0] = ROUND_POWER_OF_TWO(above[-1] + left[0], 1); + dst[0] = AVG2(above[-1], left[0]); for (r = 1; r < bs; r++) - dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 1] + left[r], 1); + dst[r * stride] = AVG2(left[r - 1], left[r]); dst++; - dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2); - dst[stride] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2); + dst[0] = AVG3(left[0], above[-1], above[0]); + dst[stride] = AVG3(above[-1], left[0], left[1]); for (r = 2; r < bs; r++) - dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 2] + left[r - 1] * 2 + - left[r], 2); + dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); dst++; for (c = 0; c < bs - 2; c++) - dst[c] = ROUND_POWER_OF_TWO(above[c - 1] + above[c] * 2 + above[c + 1], 2); + dst[c] = AVG3(above[c - 1], above[c], above[c + 1]); dst += stride; for (r = 1; r < bs; ++r) { @@ -359,10 +355,6 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, } #endif // CONFIG_VP9_HIGHBITDEPTH -#define DST(x, y) dst[(x) + (y) * stride] -#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) -#define AVG2(a, b) (((a) + (b) + 1) >> 1) - void vp9_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const int I = left[0]; @@ -386,16 +378,14 @@ static INLINE void d207_predictor(uint8_t *dst, ptrdiff_t stride, int bs, (void) above; // first column for (r = 0; r < bs - 1; ++r) - dst[r * stride] = ROUND_POWER_OF_TWO(left[r] + left[r + 1], 1); + dst[r * stride] = AVG2(left[r], left[r + 1]); dst[(bs - 1) * stride] = left[bs - 1]; dst++; // second column for (r = 0; r < bs - 2; ++r) - dst[r * stride] = ROUND_POWER_OF_TWO(left[r] + left[r + 1] * 2 + - left[r + 2], 2); - dst[(bs - 2) * stride] = ROUND_POWER_OF_TWO(left[bs - 2] + - left[bs - 1] * 3, 2); + dst[r * stride] = AVG3(left[r], left[r + 1], left[r + 2]); + dst[(bs - 2) * stride] = AVG3(left[bs - 2], left[bs - 1], left[bs - 1]); dst[(bs - 1) * stride] = left[bs - 1]; dst++; @@ -418,19 +408,18 @@ void vp9_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, const int E = above[4]; const int F = above[5]; const int G = above[6]; - const int H = above[7]; (void)left; DST(0, 0) = AVG2(A, B); DST(1, 0) = DST(0, 2) = AVG2(B, C); DST(2, 0) = DST(1, 2) = AVG2(C, D); DST(3, 0) = DST(2, 2) = AVG2(D, E); + DST(3, 2) = AVG2(E, F); // differs from vp8 DST(0, 1) = AVG3(A, B, C); DST(1, 1) = DST(0, 3) = AVG3(B, C, D); DST(2, 1) = DST(1, 3) = AVG3(C, D, E); DST(3, 1) = DST(2, 3) = AVG3(D, E, F); - DST(3, 2) = AVG3(E, F, G); - DST(3, 3) = AVG3(F, G, H); + DST(3, 3) = AVG3(E, F, G); // differs from vp8 } static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs, @@ -439,11 +428,9 @@ static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs, (void) left; for (r = 0; r < bs; ++r) { for (c = 0; c < bs; ++c) - dst[c] = r & 1 ? ROUND_POWER_OF_TWO(above[r/2 + c] + - above[r/2 + c + 1] * 2 + - above[r/2 + c + 2], 2) - : ROUND_POWER_OF_TWO(above[r/2 + c] + - above[r/2 + c + 1], 1); + dst[c] = r & 1 ? AVG3(above[(r >> 1) + c], above[(r >> 1) + c + 1], + above[(r >> 1) + c + 2]) + : AVG2(above[(r >> 1) + c], above[(r >> 1) + c + 1]); dst += stride; } } @@ -467,7 +454,7 @@ void vp9_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); DST(3, 2) = DST(2, 3) = AVG3(F, G, H); - DST(3, 3) = AVG3(G, H, H); + DST(3, 3) = H; // differs from vp8 } static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs, @@ -517,20 +504,19 @@ static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs, // first row for (c = 0; c < bs; c++) - dst[c] = ROUND_POWER_OF_TWO(above[c - 1] + above[c], 1); + dst[c] = AVG2(above[c - 1], above[c]); dst += stride; // second row - dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2); + dst[0] = AVG3(left[0], above[-1], above[0]); for (c = 1; c < bs; c++) - dst[c] = ROUND_POWER_OF_TWO(above[c - 2] + above[c - 1] * 2 + above[c], 2); + dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); dst += stride; // the rest of first col - dst[0] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2); + dst[0] = AVG3(above[-1], left[0], left[1]); for (r = 3; r < bs; ++r) - dst[(r - 2) * stride] = ROUND_POWER_OF_TWO(left[r - 3] + left[r - 2] * 2 + - left[r - 1], 2); + dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]); // the rest of the block for (r = 2; r < bs; ++r) { @@ -565,14 +551,13 @@ void vp9_d135_predictor_4x4(uint8_t *dst, ptrdiff_t stride, static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r, c; - dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2); + dst[0] = AVG3(left[0], above[-1], above[0]); for (c = 1; c < bs; c++) - dst[c] = ROUND_POWER_OF_TWO(above[c - 2] + above[c - 1] * 2 + above[c], 2); + dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); - dst[stride] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2); + dst[stride] = AVG3(above[-1], left[0], left[1]); for (r = 2; r < bs; ++r) - dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 2] + left[r - 1] * 2 + - left[r], 2); + dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); dst += stride; for (r = 1; r < bs; ++r) { @@ -610,20 +595,19 @@ void vp9_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, static INLINE void d153_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r, c; - dst[0] = ROUND_POWER_OF_TWO(above[-1] + left[0], 1); + dst[0] = AVG2(above[-1], left[0]); for (r = 1; r < bs; r++) - dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 1] + left[r], 1); + dst[r * stride] = AVG2(left[r - 1], left[r]); dst++; - dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2); - dst[stride] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2); + dst[0] = AVG3(left[0], above[-1], above[0]); + dst[stride] = AVG3(above[-1], left[0], left[1]); for (r = 2; r < bs; r++) - dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 2] + left[r - 1] * 2 + - left[r], 2); + dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); dst++; for (c = 0; c < bs - 2; c++) - dst[c] = ROUND_POWER_OF_TWO(above[c - 1] + above[c] * 2 + above[c + 1], 2); + dst[c] = AVG3(above[c - 1], above[c], above[c + 1]); dst += stride; for (r = 1; r < bs; ++r) { |