summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--third_party/x86inc/x86inc.asm6
-rw-r--r--vp10/common/reconintra.c2
-rw-r--r--vp10/decoder/decodeframe.c16
-rw-r--r--vp10/encoder/bitstream.c2
-rw-r--r--vp10/encoder/encodeframe.c20
-rw-r--r--vp9/encoder/vp9_temporal_filter.c111
-rw-r--r--vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm12
-rw-r--r--vpx_dsp/x86/subpel_variance_sse2.asm12
-rw-r--r--vpx_ports/x86_abi_support.asm2
9 files changed, 102 insertions, 81 deletions
diff --git a/third_party/x86inc/x86inc.asm b/third_party/x86inc/x86inc.asm
index be59de311..e7d3fa5eb 100644
--- a/third_party/x86inc/x86inc.asm
+++ b/third_party/x86inc/x86inc.asm
@@ -119,7 +119,7 @@
%if ABI_IS_32BIT
%if CONFIG_PIC=1
%ifidn __OUTPUT_FORMAT__,elf32
- %define GET_GOT_SAVE_ARG 1
+ %define GET_GOT_DEFINED 1
%define WRT_PLT wrt ..plt
%macro GET_GOT 1
extern _GLOBAL_OFFSET_TABLE_
@@ -138,7 +138,7 @@
%define RESTORE_GOT pop %1
%endmacro
%elifidn __OUTPUT_FORMAT__,macho32
- %define GET_GOT_SAVE_ARG 1
+ %define GET_GOT_DEFINED 1
%macro GET_GOT 1
push %1
call %%get_got
@@ -149,6 +149,8 @@
%undef RESTORE_GOT
%define RESTORE_GOT pop %1
%endmacro
+ %else
+ %define GET_GOT_DEFINED 0
%endif
%endif
diff --git a/vp10/common/reconintra.c b/vp10/common/reconintra.c
index b852a655f..e9e3949ad 100644
--- a/vp10/common/reconintra.c
+++ b/vp10/common/reconintra.c
@@ -262,7 +262,7 @@ static void vp10_init_intra_predictors_internal(void) {
}
#if CONFIG_MISC_FIXES
-static inline void memset16(uint16_t *dst, int val, int n) {
+static INLINE void memset16(uint16_t *dst, int val, int n) {
while (n--)
*dst++ = val;
}
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index 80f1778de..70d012b27 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -1152,16 +1152,12 @@ static void setup_quantization(VP10_COMMON *const cm, MACROBLOCKD *const xd,
cm->uv_dc_delta_q = read_delta_q(rb);
cm->uv_ac_delta_q = read_delta_q(rb);
cm->dequant_bit_depth = cm->bit_depth;
- for (i = 0; i < (cm->seg.enabled ? MAX_SEGMENTS : 1); ++i) {
-#if CONFIG_MISC_FIXES
- const int qindex = vp10_get_qindex(&cm->seg, i, cm->base_qindex);
-#endif
- xd->lossless[i] = cm->y_dc_delta_q == 0 &&
-#if CONFIG_MISC_FIXES
- qindex == 0 &&
-#else
- cm->base_qindex == 0 &&
-#endif
+ for (i = 0; i < MAX_SEGMENTS; ++i) {
+ const int qindex = CONFIG_MISC_FIXES && cm->seg.enabled ?
+ vp10_get_qindex(&cm->seg, i, cm->base_qindex) :
+ cm->base_qindex;
+ xd->lossless[i] = qindex == 0 &&
+ cm->y_dc_delta_q == 0 &&
cm->uv_dc_delta_q == 0 &&
cm->uv_ac_delta_q == 0;
}
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index 32275d4ae..5f6d9d342 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -1473,7 +1473,7 @@ void vp10_pack_bitstream(VP10_COMP *const cpi, uint8_t *dest, size_t *size) {
assert(n_log2_tiles > 0);
vpx_wb_write_literal(&saved_wb, mag, 2);
if (mag < 3)
- data_sz = remux_tiles(data, data_sz, 1 << n_log2_tiles, mag);
+ data_sz = remux_tiles(data, (int)data_sz, 1 << n_log2_tiles, mag);
} else {
assert(n_log2_tiles == 0);
}
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index bcdcff3d3..44ca2762e 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -1155,7 +1155,7 @@ static void rd_pick_sb_modes(VP10_COMP *cpi,
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
x->source_variance =
vp10_high_get_sby_perpixel_variance(cpi, &x->plane[0].src,
- bsize, xd->bd);
+ bsize, xd->bd);
} else {
x->source_variance =
vp10_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
@@ -2579,7 +2579,7 @@ static MV_REFERENCE_FRAME get_frame_type(const VP10_COMP *cpi) {
}
static TX_MODE select_tx_mode(const VP10_COMP *cpi, MACROBLOCKD *const xd) {
- if (!cpi->common.seg.enabled && xd->lossless[0])
+ if (xd->lossless[0])
return ONLY_4X4;
if (cpi->sf.tx_size_search_method == USE_LARGESTALL)
return ALLOW_32X32;
@@ -2702,16 +2702,12 @@ static void encode_frame_internal(VP10_COMP *cpi) {
rdc->m_search_count = 0; // Count of motion search hits.
rdc->ex_search_count = 0; // Exhaustive mesh search hits.
- for (i = 0; i < (cm->seg.enabled ? MAX_SEGMENTS : 1); ++i) {
-#if CONFIG_MISC_FIXES
- const int qindex = vp10_get_qindex(&cm->seg, i, cm->base_qindex);
-#endif
- xd->lossless[i] = cm->y_dc_delta_q == 0 &&
-#if CONFIG_MISC_FIXES
- qindex == 0 &&
-#else
- cm->base_qindex == 0 &&
-#endif
+ for (i = 0; i < MAX_SEGMENTS; ++i) {
+ const int qindex = CONFIG_MISC_FIXES && cm->seg.enabled ?
+ vp10_get_qindex(&cm->seg, i, cm->base_qindex) :
+ cm->base_qindex;
+ xd->lossless[i] = qindex == 0 &&
+ cm->y_dc_delta_q == 0 &&
cm->uv_dc_delta_q == 0 &&
cm->uv_ac_delta_q == 0;
}
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 7e4c61fbc..015dbc0ca 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -205,15 +205,34 @@ void vp9_highbd_temporal_filter_apply_c(uint8_t *frame1_8,
for (i = 0, k = 0; i < block_height; i++) {
for (j = 0; j < block_width; j++, k++) {
- int src_byte = frame1[byte];
- int pixel_value = *frame2++;
-
- modifier = src_byte - pixel_value;
- // This is an integer approximation of:
- // float coeff = (3.0 * modifer * modifier) / pow(2, strength);
- // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff);
- modifier *= modifier;
+ int pixel_value = *frame2;
+ int diff_sse[9] = { 0 };
+ int idx, idy, index = 0;
+
+ for (idy = -1; idy <= 1; ++idy) {
+ for (idx = -1; idx <= 1; ++idx) {
+ int row = i + idy;
+ int col = j + idx;
+
+ if (row >= 0 && row < (int)block_height &&
+ col >= 0 && col < (int)block_width) {
+ int diff = frame1[byte + idy * (int)stride + idx] -
+ frame2[idy * (int)block_width + idx];
+ diff_sse[index] = diff * diff;
+ ++index;
+ }
+ }
+ }
+ assert(index > 0);
+
+ modifier = 0;
+ for (idx = 0; idx < 9; ++idx)
+ modifier += diff_sse[idx];
+
modifier *= 3;
+ modifier /= index;
+
+ ++frame2;
modifier += rounding;
modifier >>= strength;
@@ -406,56 +425,58 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int adj_strength = strength + 2 * (mbd->bd - 8);
// Apply the filter (YUV)
- vp9_highbd_temporal_filter_apply(f->y_buffer + mb_y_offset,
- f->y_stride,
- predictor, 16, 16, adj_strength,
- filter_weight,
- accumulator, count);
- vp9_highbd_temporal_filter_apply(f->u_buffer + mb_uv_offset,
- f->uv_stride, predictor + 256,
- mb_uv_width, mb_uv_height,
- adj_strength,
- filter_weight, accumulator + 256,
- count + 256);
- vp9_highbd_temporal_filter_apply(f->v_buffer + mb_uv_offset,
- f->uv_stride, predictor + 512,
- mb_uv_width, mb_uv_height,
- adj_strength, filter_weight,
- accumulator + 512, count + 512);
+ vp9_highbd_temporal_filter_apply_c(f->y_buffer + mb_y_offset,
+ f->y_stride,
+ predictor, 16, 16, adj_strength,
+ filter_weight,
+ accumulator, count);
+ vp9_highbd_temporal_filter_apply_c(f->u_buffer + mb_uv_offset,
+ f->uv_stride, predictor + 256,
+ mb_uv_width, mb_uv_height,
+ adj_strength,
+ filter_weight, accumulator + 256,
+ count + 256);
+ vp9_highbd_temporal_filter_apply_c(f->v_buffer + mb_uv_offset,
+ f->uv_stride, predictor + 512,
+ mb_uv_width, mb_uv_height,
+ adj_strength, filter_weight,
+ accumulator + 512, count + 512);
} else {
// Apply the filter (YUV)
- vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
+ vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
+ predictor, 16, 16,
+ strength, filter_weight,
+ accumulator, count);
+ vp9_temporal_filter_apply_c(f->u_buffer + mb_uv_offset,
+ f->uv_stride,
+ predictor + 256,
+ mb_uv_width, mb_uv_height, strength,
+ filter_weight, accumulator + 256,
+ count + 256);
+ vp9_temporal_filter_apply_c(f->v_buffer + mb_uv_offset,
+ f->uv_stride,
+ predictor + 512,
+ mb_uv_width, mb_uv_height, strength,
+ filter_weight, accumulator + 512,
+ count + 512);
+ }
+#else
+ // Apply the filter (YUV)
+ // TODO(jingning): Need SIMD optimization for this.
+ vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
predictor, 16, 16,
strength, filter_weight,
accumulator, count);
- vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride,
+ vp9_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, f->uv_stride,
predictor + 256,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 256,
count + 256);
- vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride,
+ vp9_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, f->uv_stride,
predictor + 512,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 512,
count + 512);
- }
-#else
- // Apply the filter (YUV)
- // TODO(jingning): Need SIMD optimization for this.
- vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
- predictor, 16, 16,
- strength, filter_weight,
- accumulator, count);
- vp9_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, f->uv_stride,
- predictor + 256,
- mb_uv_width, mb_uv_height, strength,
- filter_weight, accumulator + 256,
- count + 256);
- vp9_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, f->uv_stride,
- predictor + 512,
- mb_uv_width, mb_uv_height, strength,
- filter_weight, accumulator + 512,
- count + 512);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
diff --git a/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm b/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
index 5f9c963d4..22d52a2af 100644
--- a/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
+++ b/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
@@ -123,7 +123,10 @@ SECTION .text
%define sec_str sec_stridemp
; Store bilin_filter and pw_8 location in stack
- GET_GOT eax
+ %if GET_GOT_DEFINED == 1
+ GET_GOT eax
+ add esp, 4 ; restore esp
+ %endif
lea ecx, [GLOBAL(bilin_filter_m)]
mov g_bilin_filterm, ecx
@@ -131,7 +134,6 @@ SECTION .text
lea ecx, [GLOBAL(pw_8)]
mov g_pw_8m, ecx
- RESTORE_GOT ; restore esp
LOAD_IF_USED 0, 1 ; load eax, ecx back
%else
cglobal highbd_sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
@@ -140,7 +142,10 @@ SECTION .text
%define block_height heightd
; Store bilin_filter and pw_8 location in stack
- GET_GOT eax
+ %if GET_GOT_DEFINED == 1
+ GET_GOT eax
+ add esp, 4 ; restore esp
+ %endif
lea ecx, [GLOBAL(bilin_filter_m)]
mov g_bilin_filterm, ecx
@@ -148,7 +153,6 @@ SECTION .text
lea ecx, [GLOBAL(pw_8)]
mov g_pw_8m, ecx
- RESTORE_GOT ; restore esp
LOAD_IF_USED 0, 1 ; load eax, ecx back
%endif
%else
diff --git a/vpx_dsp/x86/subpel_variance_sse2.asm b/vpx_dsp/x86/subpel_variance_sse2.asm
index 1176a2f4c..c655e4b34 100644
--- a/vpx_dsp/x86/subpel_variance_sse2.asm
+++ b/vpx_dsp/x86/subpel_variance_sse2.asm
@@ -139,7 +139,10 @@ SECTION .text
%define sec_str sec_stridemp
;Store bilin_filter and pw_8 location in stack
- GET_GOT eax
+ %if GET_GOT_DEFINED == 1
+ GET_GOT eax
+ add esp, 4 ; restore esp
+ %endif
lea ecx, [GLOBAL(bilin_filter_m)]
mov g_bilin_filterm, ecx
@@ -147,7 +150,6 @@ SECTION .text
lea ecx, [GLOBAL(pw_8)]
mov g_pw_8m, ecx
- RESTORE_GOT ; restore esp
LOAD_IF_USED 0, 1 ; load eax, ecx back
%else
cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, \
@@ -156,7 +158,10 @@ SECTION .text
%define block_height heightd
;Store bilin_filter and pw_8 location in stack
- GET_GOT eax
+ %if GET_GOT_DEFINED == 1
+ GET_GOT eax
+ add esp, 4 ; restore esp
+ %endif
lea ecx, [GLOBAL(bilin_filter_m)]
mov g_bilin_filterm, ecx
@@ -164,7 +169,6 @@ SECTION .text
lea ecx, [GLOBAL(pw_8)]
mov g_pw_8m, ecx
- RESTORE_GOT ; restore esp
LOAD_IF_USED 0, 1 ; load eax, ecx back
%endif
%else
diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm
index c94b76a06..708fa101c 100644
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -189,7 +189,6 @@
%if ABI_IS_32BIT
%if CONFIG_PIC=1
%ifidn __OUTPUT_FORMAT__,elf32
- %define GET_GOT_SAVE_ARG 1
%define WRT_PLT wrt ..plt
%macro GET_GOT 1
extern _GLOBAL_OFFSET_TABLE_
@@ -208,7 +207,6 @@
%define RESTORE_GOT pop %1
%endmacro
%elifidn __OUTPUT_FORMAT__,macho32
- %define GET_GOT_SAVE_ARG 1
%macro GET_GOT 1
push %1
call %%get_got