summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Koleszar <jkoleszar@google.com>2011-03-15 00:05:07 -0400
committerJohn Koleszar <jkoleszar@google.com>2011-03-15 00:05:07 -0400
commit54c59a03f37193f7139ed4e58b126d8891b01643 (patch)
tree91c6ad6cec15fe1470bb700d85003d3672380e6e
parenteeb8c8004e8d9400609e328db1ae7ca652e532cc (diff)
parent8c48c943e7c082b4330061e74ce85547253de301 (diff)
downloadlibvpx-54c59a03f37193f7139ed4e58b126d8891b01643.tar
libvpx-54c59a03f37193f7139ed4e58b126d8891b01643.tar.gz
libvpx-54c59a03f37193f7139ed4e58b126d8891b01643.tar.bz2
libvpx-54c59a03f37193f7139ed4e58b126d8891b01643.zip
Merge remote branch 'origin/master' into experimental
Change-Id: Ice13978071e98a88cf8ae5c069c6423d74425dea
-rw-r--r--build/make/obj_int_extract.c16
-rw-r--r--vp8/decoder/threading.c44
-rw-r--r--vp8/encoder/arm/arm_csystemdependent.c4
-rw-r--r--vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm133
-rw-r--r--vp8/encoder/arm/variance_arm.h4
-rw-r--r--vp8/vp8cx_arm.mk1
-rw-r--r--vpx/internal/vpx_codec_internal.h2
-rw-r--r--vpx_ports/x86_abi_support.asm10
8 files changed, 174 insertions, 40 deletions
diff --git a/build/make/obj_int_extract.c b/build/make/obj_int_extract.c
index 01b3129d7..c46d9d58b 100644
--- a/build/make/obj_int_extract.c
+++ b/build/make/obj_int_extract.c
@@ -918,15 +918,23 @@ int parse_coff(unsigned __int8 *buf, size_t sz)
char name[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
strncpy(name, ptr, 8);
//log_msg("COFF: Parsing symbol %s\n",name);
- /* +1 to avoid printing leading underscore */
- printf("%-40s EQU ", name + 1);
+ /* The 64bit Windows compiler doesn't prefix with an _.
+ * Check what's there, and bump if necessary
+ */
+ if (name[0] == '_')
+ printf("%-40s EQU ", name + 1);
+ else
+ printf("%-40s EQU ", name);
}
else
{
//log_msg("COFF: Parsing symbol %s\n",
// buf + strtab_ptr + get_le32(ptr+4));
- /* +1 to avoid printing leading underscore */
- printf("%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4) + 1);
+ if ((buf + strtab_ptr + get_le32(ptr + 4))[0] == '_')
+ printf("%-40s EQU ",
+ buf + strtab_ptr + get_le32(ptr + 4) + 1);
+ else
+ printf("%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4));
}
if (!(strcmp(sectionlist[section-1], ".bss")))
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 7fc901054..314a8d7fd 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -296,18 +296,6 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
}
}
- if(pbi->common.filter_level)
- {
- /*update loopfilter info*/
- Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
- filter_level = pbi->mt_baseline_filter_level[Segment];
- /* Distance of Mb to the various image edges.
- * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
- * Apply any context driven MB level adjustment
- */
- filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
- }
-
/* Distance of Mb to the various image edges.
* These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
*/
@@ -362,7 +350,16 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
}
}
- /* loopfilter on this macroblock. */
+ /* update loopfilter info */
+ Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
+ filter_level = pbi->mt_baseline_filter_level[Segment];
+ /* Distance of Mb to the various image edges.
+ * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+ * Apply any context driven MB level adjustment
+ */
+ filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
+
+ /* loopfilter on this macroblock. */
if (filter_level)
{
if (mb_col > 0)
@@ -778,18 +775,6 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
}
}
- if(pbi->common.filter_level)
- {
- /* update loopfilter info */
- Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
- filter_level = pbi->mt_baseline_filter_level[Segment];
- /* Distance of Mb to the various image edges.
- * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
- * Apply any context driven MB level adjustment
- */
- filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
- }
-
/* Distance of Mb to the various image edges.
* These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
*/
@@ -853,6 +838,15 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
}
}
+ /* update loopfilter info */
+ Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
+ filter_level = pbi->mt_baseline_filter_level[Segment];
+ /* Distance of Mb to the various image edges.
+ * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+ * Apply any context driven MB level adjustment
+ */
+ filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
+
/* loopfilter on this macroblock. */
if (filter_level)
{
diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c
index b643137a6..5ba14f375 100644
--- a/vp8/encoder/arm/arm_csystemdependent.c
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@@ -50,8 +50,8 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_armv6;
cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_armv6;
- /*cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c;
- cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
+ cpi->rtcd.variance.mse16x16 = vp8_mse16x16_armv6;
+ /*cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
/*cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c;
cpi->rtcd.variance.get8x8var = vp8_get8x8var_c;
diff --git a/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm b/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
new file mode 100644
index 000000000..a9060d76f
--- /dev/null
+++ b/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
@@ -0,0 +1,133 @@
+;
+; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_mse16x16_armv6|
+
+ ARM
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; r0 unsigned char *src_ptr
+; r1 int source_stride
+; r2 unsigned char *ref_ptr
+; r3 int recon_stride
+; stack unsigned int *sse
+;
+;note: Based on vp8_variance16x16_armv6. In this function, sum is never used.
+; So, we can remove this part of calculation.
+
+|vp8_mse16x16_armv6| PROC
+
+ push {r4-r9, lr}
+ mov r12, #16 ; set loop counter to 16 (=block height)
+
+ mov r4, #0 ; initialize sse = 0
+
+loop
+ ; 1st 4 pixels
+ ldr r5, [r0, #0x0] ; load 4 src pixels
+ ldr r6, [r2, #0x0] ; load 4 ref pixels
+
+ mov lr, #0 ; constant zero
+
+ usub8 r8, r5, r6 ; calculate difference
+ sel r7, r8, lr ; select bytes with positive difference
+ usub8 r9, r6, r5 ; calculate difference with reversed operands
+ sel r8, r9, lr ; select bytes with negative difference
+
+ ; calculate partial sums
+ usad8 r5, r7, lr ; calculate sum of positive differences
+ usad8 r6, r8, lr ; calculate sum of negative differences
+ orr r8, r8, r7 ; differences of all 4 pixels
+
+ ldr r5, [r0, #0x4] ; load 4 src pixels
+
+ ; calculate sse
+ uxtb16 r6, r8 ; byte (two pixels) to halfwords
+ uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
+ smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
+
+ ; 2nd 4 pixels
+ ldr r6, [r2, #0x4] ; load 4 ref pixels
+ smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
+
+ usub8 r8, r5, r6 ; calculate difference
+ sel r7, r8, lr ; select bytes with positive difference
+ usub8 r9, r6, r5 ; calculate difference with reversed operands
+ sel r8, r9, lr ; select bytes with negative difference
+
+ ; calculate partial sums
+ usad8 r5, r7, lr ; calculate sum of positive differences
+ usad8 r6, r8, lr ; calculate sum of negative differences
+ orr r8, r8, r7 ; differences of all 4 pixels
+ ldr r5, [r0, #0x8] ; load 4 src pixels
+ ; calculate sse
+ uxtb16 r6, r8 ; byte (two pixels) to halfwords
+ uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
+ smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
+
+ ; 3rd 4 pixels
+ ldr r6, [r2, #0x8] ; load 4 ref pixels
+ smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
+
+ usub8 r8, r5, r6 ; calculate difference
+ sel r7, r8, lr ; select bytes with positive difference
+ usub8 r9, r6, r5 ; calculate difference with reversed operands
+ sel r8, r9, lr ; select bytes with negative difference
+
+ ; calculate partial sums
+ usad8 r5, r7, lr ; calculate sum of positive differences
+ usad8 r6, r8, lr ; calculate sum of negative differences
+ orr r8, r8, r7 ; differences of all 4 pixels
+
+ ldr r5, [r0, #0xc] ; load 4 src pixels
+
+ ; calculate sse
+ uxtb16 r6, r8 ; byte (two pixels) to halfwords
+ uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
+ smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
+
+ ; 4th 4 pixels
+ ldr r6, [r2, #0xc] ; load 4 ref pixels
+ smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
+
+ usub8 r8, r5, r6 ; calculate difference
+ add r0, r0, r1 ; set src_ptr to next row
+ sel r7, r8, lr ; select bytes with positive difference
+ usub8 r9, r6, r5 ; calculate difference with reversed operands
+ add r2, r2, r3 ; set dst_ptr to next row
+ sel r8, r9, lr ; select bytes with negative difference
+
+ ; calculate partial sums
+ usad8 r5, r7, lr ; calculate sum of positive differences
+ usad8 r6, r8, lr ; calculate sum of negative differences
+ orr r8, r8, r7 ; differences of all 4 pixels
+
+ subs r12, r12, #1 ; next row
+
+ ; calculate sse
+ uxtb16 r6, r8 ; byte (two pixels) to halfwords
+ uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
+ smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
+ smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
+
+ bne loop
+
+ ; return stuff
+ ldr r1, [sp, #28] ; get address of sse
+ mov r0, r4 ; return sse
+ str r4, [r1] ; store sse
+
+ pop {r4-r9, pc}
+
+ ENDP
+
+ END
diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h
index 7ac0ac08e..7ad7c76d3 100644
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -20,6 +20,7 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_armv6);
extern prototype_variance(vp8_variance_halfpixvar16x16_h_armv6);
extern prototype_variance(vp8_variance_halfpixvar16x16_v_armv6);
extern prototype_variance(vp8_variance_halfpixvar16x16_hv_armv6);
+extern prototype_variance(vp8_mse16x16_armv6);
#if !CONFIG_RUNTIME_CPU_DETECT
@@ -32,6 +33,9 @@ extern prototype_variance(vp8_variance_halfpixvar16x16_hv_armv6);
#undef vp8_variance_var16x16
#define vp8_variance_var16x16 vp8_variance16x16_armv6
+#undef vp8_variance_mse16x16
+#define vp8_variance_mse16x16 vp8_mse16x16_armv6
+
#undef vp8_variance_halfpixvar16x16_h
#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_armv6
diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk
index f8979f4f7..b07ee8ffb 100644
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -37,6 +37,7 @@ VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_partitions_ar
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_fast_quantize_b_armv6$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_sad16x16_armv6$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_variance16x16_armv6$(ASM)
+VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_mse16x16_armv6$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/walsh_v6$(ASM)
#File list for neon
diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h
index 470ea7344..a1ff1921e 100644
--- a/vpx/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h
@@ -332,7 +332,7 @@ typedef struct vpx_codec_priv_cb_pair
* extended in one of two ways. First, a second, algorithm specific structure
* can be allocated and the priv member pointed to it. Alternatively, this
* structure can be made the first member of the algorithm specific structure,
- * and the pointer casted to the proper type.
+ * and the pointer cast to the proper type.
*/
struct vpx_codec_priv
{
diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm
index a872b280e..be64cd7c7 100644
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -168,15 +168,10 @@
%macro GET_GOT 1
push %1
call %%get_got
- %%sub_offset:
- jmp %%exitGG
%%get_got:
- mov %1, [esp]
- add %1, fake_got - %%sub_offset
- ret
- %%exitGG:
+ pop %1
%undef GLOBAL
- %define GLOBAL(x) x + %1 - fake_got
+ %define GLOBAL(x) x + %1 - %%get_got
%undef RESTORE_GOT
%define RESTORE_GOT pop %1
%endmacro
@@ -289,7 +284,6 @@
%elifidn __OUTPUT_FORMAT__,macho32
%macro SECTION_RODATA 0
section .text
-fake_got:
%endmacro
%else
%define SECTION_RODATA section .rodata