diff options
-rw-r--r-- | third_party/x86inc/x86inc.asm | 6 | ||||
-rw-r--r-- | vp10/common/reconintra.c | 2 | ||||
-rw-r--r-- | vp10/decoder/decodeframe.c | 16 | ||||
-rw-r--r-- | vp10/encoder/bitstream.c | 2 | ||||
-rw-r--r-- | vp10/encoder/encodeframe.c | 20 | ||||
-rw-r--r-- | vp9/encoder/vp9_temporal_filter.c | 111 | ||||
-rw-r--r-- | vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm | 12 | ||||
-rw-r--r-- | vpx_dsp/x86/subpel_variance_sse2.asm | 12 | ||||
-rw-r--r-- | vpx_ports/x86_abi_support.asm | 2 |
9 files changed, 102 insertions, 81 deletions
diff --git a/third_party/x86inc/x86inc.asm b/third_party/x86inc/x86inc.asm index be59de311..e7d3fa5eb 100644 --- a/third_party/x86inc/x86inc.asm +++ b/third_party/x86inc/x86inc.asm @@ -119,7 +119,7 @@ %if ABI_IS_32BIT %if CONFIG_PIC=1 %ifidn __OUTPUT_FORMAT__,elf32 - %define GET_GOT_SAVE_ARG 1 + %define GET_GOT_DEFINED 1 %define WRT_PLT wrt ..plt %macro GET_GOT 1 extern _GLOBAL_OFFSET_TABLE_ @@ -138,7 +138,7 @@ %define RESTORE_GOT pop %1 %endmacro %elifidn __OUTPUT_FORMAT__,macho32 - %define GET_GOT_SAVE_ARG 1 + %define GET_GOT_DEFINED 1 %macro GET_GOT 1 push %1 call %%get_got @@ -149,6 +149,8 @@ %undef RESTORE_GOT %define RESTORE_GOT pop %1 %endmacro + %else + %define GET_GOT_DEFINED 0 %endif %endif diff --git a/vp10/common/reconintra.c b/vp10/common/reconintra.c index b852a655f..e9e3949ad 100644 --- a/vp10/common/reconintra.c +++ b/vp10/common/reconintra.c @@ -262,7 +262,7 @@ static void vp10_init_intra_predictors_internal(void) { } #if CONFIG_MISC_FIXES -static inline void memset16(uint16_t *dst, int val, int n) { +static INLINE void memset16(uint16_t *dst, int val, int n) { while (n--) *dst++ = val; } diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c index 80f1778de..70d012b27 100644 --- a/vp10/decoder/decodeframe.c +++ b/vp10/decoder/decodeframe.c @@ -1152,16 +1152,12 @@ static void setup_quantization(VP10_COMMON *const cm, MACROBLOCKD *const xd, cm->uv_dc_delta_q = read_delta_q(rb); cm->uv_ac_delta_q = read_delta_q(rb); cm->dequant_bit_depth = cm->bit_depth; - for (i = 0; i < (cm->seg.enabled ? MAX_SEGMENTS : 1); ++i) { -#if CONFIG_MISC_FIXES - const int qindex = vp10_get_qindex(&cm->seg, i, cm->base_qindex); -#endif - xd->lossless[i] = cm->y_dc_delta_q == 0 && -#if CONFIG_MISC_FIXES - qindex == 0 && -#else - cm->base_qindex == 0 && -#endif + for (i = 0; i < MAX_SEGMENTS; ++i) { + const int qindex = CONFIG_MISC_FIXES && cm->seg.enabled ? + vp10_get_qindex(&cm->seg, i, cm->base_qindex) : + cm->base_qindex; + xd->lossless[i] = qindex == 0 && + cm->y_dc_delta_q == 0 && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; } diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c index 32275d4ae..5f6d9d342 100644 --- a/vp10/encoder/bitstream.c +++ b/vp10/encoder/bitstream.c @@ -1473,7 +1473,7 @@ void vp10_pack_bitstream(VP10_COMP *const cpi, uint8_t *dest, size_t *size) { assert(n_log2_tiles > 0); vpx_wb_write_literal(&saved_wb, mag, 2); if (mag < 3) - data_sz = remux_tiles(data, data_sz, 1 << n_log2_tiles, mag); + data_sz = remux_tiles(data, (int)data_sz, 1 << n_log2_tiles, mag); } else { assert(n_log2_tiles == 0); } diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c index bcdcff3d3..44ca2762e 100644 --- a/vp10/encoder/encodeframe.c +++ b/vp10/encoder/encodeframe.c @@ -1155,7 +1155,7 @@ static void rd_pick_sb_modes(VP10_COMP *cpi, if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { x->source_variance = vp10_high_get_sby_perpixel_variance(cpi, &x->plane[0].src, - bsize, xd->bd); + bsize, xd->bd); } else { x->source_variance = vp10_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); @@ -2579,7 +2579,7 @@ static MV_REFERENCE_FRAME get_frame_type(const VP10_COMP *cpi) { } static TX_MODE select_tx_mode(const VP10_COMP *cpi, MACROBLOCKD *const xd) { - if (!cpi->common.seg.enabled && xd->lossless[0]) + if (xd->lossless[0]) return ONLY_4X4; if (cpi->sf.tx_size_search_method == USE_LARGESTALL) return ALLOW_32X32; @@ -2702,16 +2702,12 @@ static void encode_frame_internal(VP10_COMP *cpi) { rdc->m_search_count = 0; // Count of motion search hits. rdc->ex_search_count = 0; // Exhaustive mesh search hits. - for (i = 0; i < (cm->seg.enabled ? MAX_SEGMENTS : 1); ++i) { -#if CONFIG_MISC_FIXES - const int qindex = vp10_get_qindex(&cm->seg, i, cm->base_qindex); -#endif - xd->lossless[i] = cm->y_dc_delta_q == 0 && -#if CONFIG_MISC_FIXES - qindex == 0 && -#else - cm->base_qindex == 0 && -#endif + for (i = 0; i < MAX_SEGMENTS; ++i) { + const int qindex = CONFIG_MISC_FIXES && cm->seg.enabled ? + vp10_get_qindex(&cm->seg, i, cm->base_qindex) : + cm->base_qindex; + xd->lossless[i] = qindex == 0 && + cm->y_dc_delta_q == 0 && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; } diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index 7e4c61fbc..015dbc0ca 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -205,15 +205,34 @@ void vp9_highbd_temporal_filter_apply_c(uint8_t *frame1_8, for (i = 0, k = 0; i < block_height; i++) { for (j = 0; j < block_width; j++, k++) { - int src_byte = frame1[byte]; - int pixel_value = *frame2++; - - modifier = src_byte - pixel_value; - // This is an integer approximation of: - // float coeff = (3.0 * modifer * modifier) / pow(2, strength); - // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff); - modifier *= modifier; + int pixel_value = *frame2; + int diff_sse[9] = { 0 }; + int idx, idy, index = 0; + + for (idy = -1; idy <= 1; ++idy) { + for (idx = -1; idx <= 1; ++idx) { + int row = i + idy; + int col = j + idx; + + if (row >= 0 && row < (int)block_height && + col >= 0 && col < (int)block_width) { + int diff = frame1[byte + idy * (int)stride + idx] - + frame2[idy * (int)block_width + idx]; + diff_sse[index] = diff * diff; + ++index; + } + } + } + assert(index > 0); + + modifier = 0; + for (idx = 0; idx < 9; ++idx) + modifier += diff_sse[idx]; + modifier *= 3; + modifier /= index; + + ++frame2; modifier += rounding; modifier >>= strength; @@ -406,56 +425,58 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { int adj_strength = strength + 2 * (mbd->bd - 8); // Apply the filter (YUV) - vp9_highbd_temporal_filter_apply(f->y_buffer + mb_y_offset, - f->y_stride, - predictor, 16, 16, adj_strength, - filter_weight, - accumulator, count); - vp9_highbd_temporal_filter_apply(f->u_buffer + mb_uv_offset, - f->uv_stride, predictor + 256, - mb_uv_width, mb_uv_height, - adj_strength, - filter_weight, accumulator + 256, - count + 256); - vp9_highbd_temporal_filter_apply(f->v_buffer + mb_uv_offset, - f->uv_stride, predictor + 512, - mb_uv_width, mb_uv_height, - adj_strength, filter_weight, - accumulator + 512, count + 512); + vp9_highbd_temporal_filter_apply_c(f->y_buffer + mb_y_offset, + f->y_stride, + predictor, 16, 16, adj_strength, + filter_weight, + accumulator, count); + vp9_highbd_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, + f->uv_stride, predictor + 256, + mb_uv_width, mb_uv_height, + adj_strength, + filter_weight, accumulator + 256, + count + 256); + vp9_highbd_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, + f->uv_stride, predictor + 512, + mb_uv_width, mb_uv_height, + adj_strength, filter_weight, + accumulator + 512, count + 512); } else { // Apply the filter (YUV) - vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride, + vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride, + predictor, 16, 16, + strength, filter_weight, + accumulator, count); + vp9_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, + f->uv_stride, + predictor + 256, + mb_uv_width, mb_uv_height, strength, + filter_weight, accumulator + 256, + count + 256); + vp9_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, + f->uv_stride, + predictor + 512, + mb_uv_width, mb_uv_height, strength, + filter_weight, accumulator + 512, + count + 512); + } +#else + // Apply the filter (YUV) + // TODO(jingning): Need SIMD optimization for this. + vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride, predictor, 16, 16, strength, filter_weight, accumulator, count); - vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride, + vp9_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256, mb_uv_width, mb_uv_height, strength, filter_weight, accumulator + 256, count + 256); - vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride, + vp9_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512, mb_uv_width, mb_uv_height, strength, filter_weight, accumulator + 512, count + 512); - } -#else - // Apply the filter (YUV) - // TODO(jingning): Need SIMD optimization for this. - vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride, - predictor, 16, 16, - strength, filter_weight, - accumulator, count); - vp9_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, f->uv_stride, - predictor + 256, - mb_uv_width, mb_uv_height, strength, - filter_weight, accumulator + 256, - count + 256); - vp9_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, f->uv_stride, - predictor + 512, - mb_uv_width, mb_uv_height, strength, - filter_weight, accumulator + 512, - count + 512); #endif // CONFIG_VP9_HIGHBITDEPTH } } diff --git a/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm b/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm index 5f9c963d4..22d52a2af 100644 --- a/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm +++ b/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm @@ -123,7 +123,10 @@ SECTION .text %define sec_str sec_stridemp ; Store bilin_filter and pw_8 location in stack - GET_GOT eax + %if GET_GOT_DEFINED == 1 + GET_GOT eax + add esp, 4 ; restore esp + %endif lea ecx, [GLOBAL(bilin_filter_m)] mov g_bilin_filterm, ecx @@ -131,7 +134,6 @@ SECTION .text lea ecx, [GLOBAL(pw_8)] mov g_pw_8m, ecx - RESTORE_GOT ; restore esp LOAD_IF_USED 0, 1 ; load eax, ecx back %else cglobal highbd_sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \ @@ -140,7 +142,10 @@ SECTION .text %define block_height heightd ; Store bilin_filter and pw_8 location in stack - GET_GOT eax + %if GET_GOT_DEFINED == 1 + GET_GOT eax + add esp, 4 ; restore esp + %endif lea ecx, [GLOBAL(bilin_filter_m)] mov g_bilin_filterm, ecx @@ -148,7 +153,6 @@ SECTION .text lea ecx, [GLOBAL(pw_8)] mov g_pw_8m, ecx - RESTORE_GOT ; restore esp LOAD_IF_USED 0, 1 ; load eax, ecx back %endif %else diff --git a/vpx_dsp/x86/subpel_variance_sse2.asm b/vpx_dsp/x86/subpel_variance_sse2.asm index 1176a2f4c..c655e4b34 100644 --- a/vpx_dsp/x86/subpel_variance_sse2.asm +++ b/vpx_dsp/x86/subpel_variance_sse2.asm @@ -139,7 +139,10 @@ SECTION .text %define sec_str sec_stridemp ;Store bilin_filter and pw_8 location in stack - GET_GOT eax + %if GET_GOT_DEFINED == 1 + GET_GOT eax + add esp, 4 ; restore esp + %endif lea ecx, [GLOBAL(bilin_filter_m)] mov g_bilin_filterm, ecx @@ -147,7 +150,6 @@ SECTION .text lea ecx, [GLOBAL(pw_8)] mov g_pw_8m, ecx - RESTORE_GOT ; restore esp LOAD_IF_USED 0, 1 ; load eax, ecx back %else cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, \ @@ -156,7 +158,10 @@ SECTION .text %define block_height heightd ;Store bilin_filter and pw_8 location in stack - GET_GOT eax + %if GET_GOT_DEFINED == 1 + GET_GOT eax + add esp, 4 ; restore esp + %endif lea ecx, [GLOBAL(bilin_filter_m)] mov g_bilin_filterm, ecx @@ -164,7 +169,6 @@ SECTION .text lea ecx, [GLOBAL(pw_8)] mov g_pw_8m, ecx - RESTORE_GOT ; restore esp LOAD_IF_USED 0, 1 ; load eax, ecx back %endif %else diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm index c94b76a06..708fa101c 100644 --- a/vpx_ports/x86_abi_support.asm +++ b/vpx_ports/x86_abi_support.asm @@ -189,7 +189,6 @@ %if ABI_IS_32BIT %if CONFIG_PIC=1 %ifidn __OUTPUT_FORMAT__,elf32 - %define GET_GOT_SAVE_ARG 1 %define WRT_PLT wrt ..plt %macro GET_GOT 1 extern _GLOBAL_OFFSET_TABLE_ @@ -208,7 +207,6 @@ %define RESTORE_GOT pop %1 %endmacro %elifidn __OUTPUT_FORMAT__,macho32 - %define GET_GOT_SAVE_ARG 1 %macro GET_GOT 1 push %1 call %%get_got |