diff options
Diffstat (limited to 'vp8/common/arm/neon/bilinearpredict_neon.c')
-rw-r--r-- | vp8/common/arm/neon/bilinearpredict_neon.c | 108 |
1 files changed, 0 insertions, 108 deletions
diff --git a/vp8/common/arm/neon/bilinearpredict_neon.c b/vp8/common/arm/neon/bilinearpredict_neon.c index 9824a3193..bb6ea76ba 100644 --- a/vp8/common/arm/neon/bilinearpredict_neon.c +++ b/vp8/common/arm/neon/bilinearpredict_neon.c @@ -21,114 +21,6 @@ static const uint8_t bifilter4_coeff[8][2] = { { 16, 112} }; -void vp8_bilinear_predict4x4_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8; - uint8x8_t d26u8, d27u8, d28u8, d29u8, d30u8; - uint8x16_t q1u8, q2u8; - uint16x8_t q1u16, q2u16; - uint16x8_t q7u16, q8u16, q9u16; - uint64x2_t q4u64, q5u64; - uint64x1_t d12u64; - uint32x2x2_t d0u32x2, d1u32x2, d2u32x2, d3u32x2; - - if (xoffset == 0) { // skip_1stpass_filter - uint32x2_t d28u32 = vdup_n_u32(0); - uint32x2_t d29u32 = vdup_n_u32(0); - uint32x2_t d30u32 = vdup_n_u32(0); - - d28u32 = vld1_lane_u32((const uint32_t *)src_ptr, d28u32, 0); - src_ptr += src_pixels_per_line; - d28u32 = vld1_lane_u32((const uint32_t *)src_ptr, d28u32, 1); - src_ptr += src_pixels_per_line; - d29u32 = vld1_lane_u32((const uint32_t *)src_ptr, d29u32, 0); - src_ptr += src_pixels_per_line; - d29u32 = vld1_lane_u32((const uint32_t *)src_ptr, d29u32, 1); - src_ptr += src_pixels_per_line; - d30u32 = vld1_lane_u32((const uint32_t *)src_ptr, d30u32, 0); - d28u8 = vreinterpret_u8_u32(d28u32); - d29u8 = vreinterpret_u8_u32(d29u32); - d30u8 = vreinterpret_u8_u32(d30u32); - } else { - d2u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d3u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d4u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d5u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d6u8 = vld1_u8(src_ptr); - - q1u8 = vcombine_u8(d2u8, d3u8); - q2u8 = vcombine_u8(d4u8, d5u8); - - d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); - - q4u64 = vshrq_n_u64(vreinterpretq_u64_u8(q1u8), 8); - q5u64 = vshrq_n_u64(vreinterpretq_u64_u8(q2u8), 8); - d12u64 = vshr_n_u64(vreinterpret_u64_u8(d6u8), 8); - - d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q1u8)), - vreinterpret_u32_u8(vget_high_u8(q1u8))); - d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q2u8)), - vreinterpret_u32_u8(vget_high_u8(q2u8))); - d2u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q4u64)), - vreinterpret_u32_u64(vget_high_u64(q4u64))); - d3u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), - vreinterpret_u32_u64(vget_high_u64(q5u64))); - - q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d0u8); - q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d0u8); - q9u16 = vmull_u8(d6u8, d0u8); - - q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d2u32x2.val[0]), d1u8); - q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d3u32x2.val[0]), d1u8); - q9u16 = vmlal_u8(q9u16, vreinterpret_u8_u64(d12u64), d1u8); - - d28u8 = vqrshrn_n_u16(q7u16, 7); - d29u8 = vqrshrn_n_u16(q8u16, 7); - d30u8 = vqrshrn_n_u16(q9u16, 7); - } - - // secondpass_filter - if (yoffset == 0) { // skip_2ndpass_filter - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 1); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d29u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d29u8), 1); - } else { - d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); - - q1u16 = vmull_u8(d28u8, d0u8); - q2u16 = vmull_u8(d29u8, d0u8); - - d26u8 = vext_u8(d28u8, d29u8, 4); - d27u8 = vext_u8(d29u8, d30u8, 4); - - q1u16 = vmlal_u8(q1u16, d26u8, d1u8); - q2u16 = vmlal_u8(q2u16, d27u8, d1u8); - - d2u8 = vqrshrn_n_u16(q1u16, 7); - d3u8 = vqrshrn_n_u16(q2u16, 7); - - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0); - dst_ptr += dst_pitch; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1); - } - return; -} - void vp8_bilinear_predict8x4_neon( unsigned char *src_ptr, int src_pixels_per_line, |