summaryrefslogtreecommitdiff
path: root/vpx_dsp
diff options
context:
space:
mode:
authorJingning Han <jingning@google.com>2015-07-28 15:57:40 -0700
committerJingning Han <jingning@google.com>2015-07-28 16:06:44 -0700
commit4b5109cd73946835016550d8ec6cca13995e0ec7 (patch)
tree0f6472dda691faf7f91324874acc1529c7d16abc /vpx_dsp
parenta7e9178d8000be0fe884275e8c8c1d4fc29dfc09 (diff)
downloadlibvpx-4b5109cd73946835016550d8ec6cca13995e0ec7.tar
libvpx-4b5109cd73946835016550d8ec6cca13995e0ec7.tar.gz
libvpx-4b5109cd73946835016550d8ec6cca13995e0ec7.tar.bz2
libvpx-4b5109cd73946835016550d8ec6cca13995e0ec7.zip
Replace vp9_ prefix in 2D-DCT functions with vpx_
Clean up the forward 2D-DCT function names in vpx_dsp. Change-Id: I3117978596d198b690036e7eb05fe429caf3bc25
Diffstat (limited to 'vpx_dsp')
-rw-r--r--vpx_dsp/arm/fwd_txfm_neon.c4
-rw-r--r--vpx_dsp/fwd_txfm.c60
-rw-r--r--vpx_dsp/fwd_txfm.h2
-rw-r--r--vpx_dsp/mips/fwd_dct32x32_msa.c6
-rw-r--r--vpx_dsp/mips/fwd_txfm_msa.c10
-rw-r--r--vpx_dsp/vpx_dsp_rtcd_defs.pl104
-rw-r--r--vpx_dsp/x86/fwd_dct32x32_impl_sse2.h16
-rw-r--r--vpx_dsp/x86/fwd_txfm_avx2.c4
-rw-r--r--vpx_dsp/x86/fwd_txfm_impl_sse2.h48
-rw-r--r--vpx_dsp/x86/fwd_txfm_sse2.c28
-rw-r--r--vpx_dsp/x86/fwd_txfm_ssse3_x86_64.asm3
11 files changed, 144 insertions, 141 deletions
diff --git a/vpx_dsp/arm/fwd_txfm_neon.c b/vpx_dsp/arm/fwd_txfm_neon.c
index 406b10d4c..79afc91b2 100644
--- a/vpx_dsp/arm/fwd_txfm_neon.c
+++ b/vpx_dsp/arm/fwd_txfm_neon.c
@@ -13,7 +13,7 @@
#include "./vpx_config.h"
#include "vpx_dsp/txfm_common.h"
-void vp9_fdct8x8_neon(const int16_t *input, int16_t *final_output, int stride) {
+void vpx_fdct8x8_neon(const int16_t *input, int16_t *final_output, int stride) {
int i;
// stage 1
int16x8_t input_0 = vshlq_n_s16(vld1q_s16(&input[0 * stride]), 2);
@@ -202,7 +202,7 @@ void vp9_fdct8x8_neon(const int16_t *input, int16_t *final_output, int stride) {
}
}
-void vp9_fdct8x8_1_neon(const int16_t *input, int16_t *output, int stride) {
+void vpx_fdct8x8_1_neon(const int16_t *input, int16_t *output, int stride) {
int r;
int16x8_t sum = vld1q_s16(&input[0]);
for (r = 1; r < 8; ++r) {
diff --git a/vpx_dsp/fwd_txfm.c b/vpx_dsp/fwd_txfm.c
index 558ca9a53..c82e1c1d5 100644
--- a/vpx_dsp/fwd_txfm.c
+++ b/vpx_dsp/fwd_txfm.c
@@ -10,7 +10,7 @@
#include "vpx_dsp/fwd_txfm.h"
-void vp9_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
+void vpx_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
@@ -77,7 +77,7 @@ void vp9_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
}
}
-void vp9_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride) {
+void vpx_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride) {
int r, c;
tran_low_t sum = 0;
for (r = 0; r < 4; ++r)
@@ -88,7 +88,7 @@ void vp9_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride) {
output[1] = 0;
}
-void vp9_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) {
+void vpx_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) {
int i, j;
tran_low_t intermediate[64];
int pass;
@@ -174,7 +174,7 @@ void vp9_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) {
}
}
-void vp9_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride) {
+void vpx_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride) {
int r, c;
tran_low_t sum = 0;
for (r = 0; r < 8; ++r)
@@ -185,7 +185,7 @@ void vp9_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride) {
output[1] = 0;
}
-void vp9_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
+void vpx_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
@@ -365,7 +365,7 @@ void vp9_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
}
}
-void vp9_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride) {
+void vpx_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride) {
int r, c;
tran_low_t sum = 0;
for (r = 0; r < 16; ++r)
@@ -389,7 +389,7 @@ static INLINE tran_high_t half_round_shift(tran_high_t input) {
return rv;
}
-void vp9_fdct32(const tran_high_t *input, tran_high_t *output, int round) {
+void vpx_fdct32(const tran_high_t *input, tran_high_t *output, int round) {
tran_high_t step[32];
// Stage 1
step[0] = input[0] + input[(32 - 1)];
@@ -712,7 +712,7 @@ void vp9_fdct32(const tran_high_t *input, tran_high_t *output, int round) {
output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64);
}
-void vp9_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) {
+void vpx_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) {
int i, j;
tran_high_t output[32 * 32];
@@ -721,7 +721,7 @@ void vp9_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) {
tran_high_t temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = input[j * stride + i] * 4;
- vp9_fdct32(temp_in, temp_out, 0);
+ vpx_fdct32(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j)
output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
}
@@ -731,7 +731,7 @@ void vp9_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) {
tran_high_t temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = output[j + i * 32];
- vp9_fdct32(temp_in, temp_out, 0);
+ vpx_fdct32(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j)
out[j + i * 32] =
(tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
@@ -741,7 +741,7 @@ void vp9_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) {
// Note that although we use dct_32_round in dct32 computation flow,
// this 2d fdct32x32 for rate-distortion optimization loop is operating
// within 16 bits precision.
-void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) {
+void vpx_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) {
int i, j;
tran_high_t output[32 * 32];
@@ -750,7 +750,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) {
tran_high_t temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = input[j * stride + i] * 4;
- vp9_fdct32(temp_in, temp_out, 0);
+ vpx_fdct32(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j)
// TODO(cd): see quality impact of only doing
// output[j * 32 + i] = (temp_out[j] + 1) >> 2;
@@ -763,13 +763,13 @@ void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) {
tran_high_t temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = output[j + i * 32];
- vp9_fdct32(temp_in, temp_out, 1);
+ vpx_fdct32(temp_in, temp_out, 1);
for (j = 0; j < 32; ++j)
out[j + i * 32] = (tran_low_t)temp_out[j];
}
}
-void vp9_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride) {
+void vpx_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride) {
int r, c;
tran_low_t sum = 0;
for (r = 0; r < 32; ++r)
@@ -781,42 +781,42 @@ void vp9_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride) {
}
#if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_fdct4x4_c(const int16_t *input, tran_low_t *output,
+void vpx_highbd_fdct4x4_c(const int16_t *input, tran_low_t *output,
int stride) {
- vp9_fdct4x4_c(input, output, stride);
+ vpx_fdct4x4_c(input, output, stride);
}
-void vp9_highbd_fdct8x8_c(const int16_t *input, tran_low_t *final_output,
+void vpx_highbd_fdct8x8_c(const int16_t *input, tran_low_t *final_output,
int stride) {
- vp9_fdct8x8_c(input, final_output, stride);
+ vpx_fdct8x8_c(input, final_output, stride);
}
-void vp9_highbd_fdct8x8_1_c(const int16_t *input, tran_low_t *final_output,
+void vpx_highbd_fdct8x8_1_c(const int16_t *input, tran_low_t *final_output,
int stride) {
- vp9_fdct8x8_1_c(input, final_output, stride);
+ vpx_fdct8x8_1_c(input, final_output, stride);
}
-void vp9_highbd_fdct16x16_c(const int16_t *input, tran_low_t *output,
+void vpx_highbd_fdct16x16_c(const int16_t *input, tran_low_t *output,
int stride) {
- vp9_fdct16x16_c(input, output, stride);
+ vpx_fdct16x16_c(input, output, stride);
}
-void vp9_highbd_fdct16x16_1_c(const int16_t *input, tran_low_t *output,
+void vpx_highbd_fdct16x16_1_c(const int16_t *input, tran_low_t *output,
int stride) {
- vp9_fdct16x16_1_c(input, output, stride);
+ vpx_fdct16x16_1_c(input, output, stride);
}
-void vp9_highbd_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) {
- vp9_fdct32x32_c(input, out, stride);
+void vpx_highbd_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) {
+ vpx_fdct32x32_c(input, out, stride);
}
-void vp9_highbd_fdct32x32_rd_c(const int16_t *input, tran_low_t *out,
+void vpx_highbd_fdct32x32_rd_c(const int16_t *input, tran_low_t *out,
int stride) {
- vp9_fdct32x32_rd_c(input, out, stride);
+ vpx_fdct32x32_rd_c(input, out, stride);
}
-void vp9_highbd_fdct32x32_1_c(const int16_t *input, tran_low_t *out,
+void vpx_highbd_fdct32x32_1_c(const int16_t *input, tran_low_t *out,
int stride) {
- vp9_fdct32x32_1_c(input, out, stride);
+ vpx_fdct32x32_1_c(input, out, stride);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/vpx_dsp/fwd_txfm.h b/vpx_dsp/fwd_txfm.h
index 729a289d1..29e139c73 100644
--- a/vpx_dsp/fwd_txfm.h
+++ b/vpx_dsp/fwd_txfm.h
@@ -21,5 +21,5 @@ static INLINE tran_high_t fdct_round_shift(tran_high_t input) {
return rv;
}
-void vp9_fdct32(const tran_high_t *input, tran_high_t *output, int round);
+void vpx_fdct32(const tran_high_t *input, tran_high_t *output, int round);
#endif // VPX_DSP_FWD_TXFM_H_
diff --git a/vpx_dsp/mips/fwd_dct32x32_msa.c b/vpx_dsp/mips/fwd_dct32x32_msa.c
index 0219571c7..2115a348c 100644
--- a/vpx_dsp/mips/fwd_dct32x32_msa.c
+++ b/vpx_dsp/mips/fwd_dct32x32_msa.c
@@ -675,7 +675,7 @@ static void fdct32x8_1d_row_4x(int16_t *tmp_buf_big, int16_t *tmp_buf,
fdct8x32_1d_row_transpose_store(tmp_buf, output);
}
-void vp9_fdct32x32_msa(const int16_t *input, int16_t *output,
+void vpx_fdct32x32_msa(const int16_t *input, int16_t *output,
int32_t src_stride) {
int32_t i;
DECLARE_ALIGNED(32, int16_t, tmp_buf_big[1024]);
@@ -913,7 +913,7 @@ static void fdct32x8_1d_row_rd(int16_t *tmp_buf_big, int16_t *tmp_buf,
fdct8x32_1d_row_transpose_store(tmp_buf, output);
}
-void vp9_fdct32x32_rd_msa(const int16_t *input, int16_t *out,
+void vpx_fdct32x32_rd_msa(const int16_t *input, int16_t *out,
int32_t src_stride) {
int32_t i;
DECLARE_ALIGNED(32, int16_t, tmp_buf_big[1024]);
@@ -932,7 +932,7 @@ void vp9_fdct32x32_rd_msa(const int16_t *input, int16_t *out,
}
}
-void vp9_fdct32x32_1_msa(const int16_t *input, int16_t *out, int32_t stride) {
+void vpx_fdct32x32_1_msa(const int16_t *input, int16_t *out, int32_t stride) {
out[1] = 0;
out[0] = LD_HADD(input, stride);
diff --git a/vpx_dsp/mips/fwd_txfm_msa.c b/vpx_dsp/mips/fwd_txfm_msa.c
index 1e35542f7..f66dd5fce 100644
--- a/vpx_dsp/mips/fwd_txfm_msa.c
+++ b/vpx_dsp/mips/fwd_txfm_msa.c
@@ -166,7 +166,7 @@ void fdct16x8_1d_row(int16_t *input, int16_t *output) {
ST_SH8(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, output + 8, 16);
}
-void vp9_fdct4x4_msa(const int16_t *input, int16_t *output,
+void vpx_fdct4x4_msa(const int16_t *input, int16_t *output,
int32_t src_stride) {
v8i16 in0, in1, in2, in3;
@@ -196,7 +196,7 @@ void vp9_fdct4x4_msa(const int16_t *input, int16_t *output,
ST_SH2(in0, in2, output, 8);
}
-void vp9_fdct8x8_msa(const int16_t *input, int16_t *output,
+void vpx_fdct8x8_msa(const int16_t *input, int16_t *output,
int32_t src_stride) {
v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
@@ -215,12 +215,12 @@ void vp9_fdct8x8_msa(const int16_t *input, int16_t *output,
ST_SH8(in0, in1, in2, in3, in4, in5, in6, in7, output, 8);
}
-void vp9_fdct8x8_1_msa(const int16_t *input, int16_t *out, int32_t stride) {
+void vpx_fdct8x8_1_msa(const int16_t *input, int16_t *out, int32_t stride) {
out[0] = LD_HADD(input, stride);
out[1] = 0;
}
-void vp9_fdct16x16_msa(const int16_t *input, int16_t *output,
+void vpx_fdct16x16_msa(const int16_t *input, int16_t *output,
int32_t src_stride) {
int32_t i;
DECLARE_ALIGNED(32, int16_t, tmp_buf[16 * 16]);
@@ -236,7 +236,7 @@ void vp9_fdct16x16_msa(const int16_t *input, int16_t *output,
}
}
-void vp9_fdct16x16_1_msa(const int16_t *input, int16_t *out, int32_t stride) {
+void vpx_fdct16x16_1_msa(const int16_t *input, int16_t *out, int32_t stride) {
out[1] = 0;
out[0] = LD_HADD(input, stride);
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 4d6843bbd..cba80f4ae 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -129,83 +129,83 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#
if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
- add_proto qw/void vp9_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct4x4 sse2/;
+ add_proto qw/void vpx_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_fdct4x4 sse2/;
- add_proto qw/void vp9_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct4x4_1 sse2/;
+ add_proto qw/void vpx_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_fdct4x4_1 sse2/;
- add_proto qw/void vp9_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct8x8 sse2/;
+ add_proto qw/void vpx_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_fdct8x8 sse2/;
- add_proto qw/void vp9_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct8x8_1 sse2/;
+ add_proto qw/void vpx_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_fdct8x8_1 sse2/;
- add_proto qw/void vp9_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct16x16 sse2/;
+ add_proto qw/void vpx_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_fdct16x16 sse2/;
- add_proto qw/void vp9_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct16x16_1 sse2/;
+ add_proto qw/void vpx_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_fdct16x16_1 sse2/;
- add_proto qw/void vp9_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct32x32 sse2/;
+ add_proto qw/void vpx_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_fdct32x32 sse2/;
- add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct32x32_rd sse2/;
+ add_proto qw/void vpx_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_fdct32x32_rd sse2/;
- add_proto qw/void vp9_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct32x32_1 sse2/;
+ add_proto qw/void vpx_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_fdct32x32_1 sse2/;
- add_proto qw/void vp9_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_highbd_fdct4x4 sse2/;
+ add_proto qw/void vpx_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_highbd_fdct4x4 sse2/;
- add_proto qw/void vp9_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_highbd_fdct8x8 sse2/;
+ add_proto qw/void vpx_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_highbd_fdct8x8 sse2/;
- add_proto qw/void vp9_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_highbd_fdct8x8_1/;
+ add_proto qw/void vpx_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_highbd_fdct8x8_1/;
- add_proto qw/void vp9_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_highbd_fdct16x16 sse2/;
+ add_proto qw/void vpx_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_highbd_fdct16x16 sse2/;
- add_proto qw/void vp9_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_highbd_fdct16x16_1/;
+ add_proto qw/void vpx_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_highbd_fdct16x16_1/;
- add_proto qw/void vp9_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_highbd_fdct32x32 sse2/;
+ add_proto qw/void vpx_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_highbd_fdct32x32 sse2/;
- add_proto qw/void vp9_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_highbd_fdct32x32_rd sse2/;
+ add_proto qw/void vpx_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_highbd_fdct32x32_rd sse2/;
- add_proto qw/void vp9_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_highbd_fdct32x32_1/;
+ add_proto qw/void vpx_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_highbd_fdct32x32_1/;
} else {
- add_proto qw/void vp9_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct4x4 sse2 msa/;
+ add_proto qw/void vpx_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_fdct4x4 sse2 msa/;
- add_proto qw/void vp9_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct4x4_1 sse2/;
+ add_proto qw/void vpx_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_fdct4x4_1 sse2/;
- add_proto qw/void vp9_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct8x8 sse2 neon msa/, "$ssse3_x86_64_x86inc";
+ add_proto qw/void vpx_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_fdct8x8 sse2 neon msa/, "$ssse3_x86_64_x86inc";
- add_proto qw/void vp9_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct8x8_1 sse2 neon msa/;
+ add_proto qw/void vpx_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_fdct8x8_1 sse2 neon msa/;
- add_proto qw/void vp9_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct16x16 sse2 msa/;
+ add_proto qw/void vpx_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_fdct16x16 sse2 msa/;
- add_proto qw/void vp9_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct16x16_1 sse2 msa/;
+ add_proto qw/void vpx_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_fdct16x16_1 sse2 msa/;
- add_proto qw/void vp9_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct32x32 sse2 avx2 msa/;
+ add_proto qw/void vpx_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_fdct32x32 sse2 avx2 msa/;
- add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct32x32_rd sse2 avx2 msa/;
+ add_proto qw/void vpx_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_fdct32x32_rd sse2 avx2 msa/;
- add_proto qw/void vp9_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct32x32_1 sse2 msa/;
+ add_proto qw/void vpx_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vpx_fdct32x32_1 sse2 msa/;
} # CONFIG_VP9_HIGHBITDEPTH
} # CONFIG_VP9_ENCODER
diff --git a/vpx_dsp/x86/fwd_dct32x32_impl_sse2.h b/vpx_dsp/x86/fwd_dct32x32_impl_sse2.h
index e0d272d74..b85ae103f 100644
--- a/vpx_dsp/x86/fwd_dct32x32_impl_sse2.h
+++ b/vpx_dsp/x86/fwd_dct32x32_impl_sse2.h
@@ -21,34 +21,34 @@
#define ADD_EPI16 _mm_adds_epi16
#define SUB_EPI16 _mm_subs_epi16
#if FDCT32x32_HIGH_PRECISION
-void vp9_fdct32x32_rows_c(const int16_t *intermediate, tran_low_t *out) {
+void vpx_fdct32x32_rows_c(const int16_t *intermediate, tran_low_t *out) {
int i, j;
for (i = 0; i < 32; ++i) {
tran_high_t temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = intermediate[j * 32 + i];
- vp9_fdct32(temp_in, temp_out, 0);
+ vpx_fdct32(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j)
out[j + i * 32] =
(tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
}
}
- #define HIGH_FDCT32x32_2D_C vp9_highbd_fdct32x32_c
- #define HIGH_FDCT32x32_2D_ROWS_C vp9_fdct32x32_rows_c
+ #define HIGH_FDCT32x32_2D_C vpx_highbd_fdct32x32_c
+ #define HIGH_FDCT32x32_2D_ROWS_C vpx_fdct32x32_rows_c
#else
-void vp9_fdct32x32_rd_rows_c(const int16_t *intermediate, tran_low_t *out) {
+void vpx_fdct32x32_rd_rows_c(const int16_t *intermediate, tran_low_t *out) {
int i, j;
for (i = 0; i < 32; ++i) {
tran_high_t temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = intermediate[j * 32 + i];
- vp9_fdct32(temp_in, temp_out, 1);
+ vpx_fdct32(temp_in, temp_out, 1);
for (j = 0; j < 32; ++j)
out[j + i * 32] = (tran_low_t)temp_out[j];
}
}
- #define HIGH_FDCT32x32_2D_C vp9_highbd_fdct32x32_rd_c
- #define HIGH_FDCT32x32_2D_ROWS_C vp9_fdct32x32_rd_rows_c
+ #define HIGH_FDCT32x32_2D_C vpx_highbd_fdct32x32_rd_c
+ #define HIGH_FDCT32x32_2D_ROWS_C vpx_fdct32x32_rd_rows_c
#endif // FDCT32x32_HIGH_PRECISION
#else
#define ADD_EPI16 _mm_add_epi16
diff --git a/vpx_dsp/x86/fwd_txfm_avx2.c b/vpx_dsp/x86/fwd_txfm_avx2.c
index c1d4f40ea..6d9da6aa8 100644
--- a/vpx_dsp/x86/fwd_txfm_avx2.c
+++ b/vpx_dsp/x86/fwd_txfm_avx2.c
@@ -10,13 +10,13 @@
#include "./vpx_config.h"
-#define FDCT32x32_2D_AVX2 vp9_fdct32x32_rd_avx2
+#define FDCT32x32_2D_AVX2 vpx_fdct32x32_rd_avx2
#define FDCT32x32_HIGH_PRECISION 0
#include "vpx_dsp/x86/fwd_dct32x32_impl_avx2.h"
#undef FDCT32x32_2D_AVX2
#undef FDCT32x32_HIGH_PRECISION
-#define FDCT32x32_2D_AVX2 vp9_fdct32x32_avx2
+#define FDCT32x32_2D_AVX2 vpx_fdct32x32_avx2
#define FDCT32x32_HIGH_PRECISION 1
#include "vpx_dsp/x86/fwd_dct32x32_impl_avx2.h" // NOLINT
#undef FDCT32x32_2D_AVX2
diff --git a/vpx_dsp/x86/fwd_txfm_impl_sse2.h b/vpx_dsp/x86/fwd_txfm_impl_sse2.h
index 1f6b30256..69889e2e9 100644
--- a/vpx_dsp/x86/fwd_txfm_impl_sse2.h
+++ b/vpx_dsp/x86/fwd_txfm_impl_sse2.h
@@ -106,7 +106,7 @@ void FDCT4x4_2D(const int16_t *input, tran_low_t *output, int stride) {
_mm_cmplt_epi16(in1, _mm_set1_epi16(0xfc00)));
test = _mm_movemask_epi8(_mm_or_si128(cmp0, cmp1));
if (test) {
- vp9_highbd_fdct4x4_c(input, output, stride);
+ vpx_highbd_fdct4x4_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
@@ -177,7 +177,7 @@ void FDCT4x4_2D(const int16_t *input, tran_low_t *output, int stride) {
#if DCT_HIGH_BIT_DEPTH
overflow = check_epi16_overflow_x2(&x0, &x1);
if (overflow) {
- vp9_highbd_fdct4x4_c(input, output, stride);
+ vpx_highbd_fdct4x4_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
@@ -199,7 +199,7 @@ void FDCT4x4_2D(const int16_t *input, tran_low_t *output, int stride) {
#if DCT_HIGH_BIT_DEPTH
overflow = check_epi16_overflow_x2(&t0, &t1);
if (overflow) {
- vp9_highbd_fdct4x4_c(input, output, stride);
+ vpx_highbd_fdct4x4_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
@@ -238,7 +238,7 @@ void FDCT4x4_2D(const int16_t *input, tran_low_t *output, int stride) {
#if DCT_HIGH_BIT_DEPTH
overflow = check_epi16_overflow_x2(&x0, &x1);
if (overflow) {
- vp9_highbd_fdct4x4_c(input, output, stride);
+ vpx_highbd_fdct4x4_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
@@ -322,7 +322,7 @@ void FDCT8x8_2D(const int16_t *input, tran_low_t *output, int stride) {
overflow = check_epi16_overflow_x8(&q0, &q1, &q2, &q3,
&q4, &q5, &q6, &q7);
if (overflow) {
- vp9_highbd_fdct8x8_c(input, output, stride);
+ vpx_highbd_fdct8x8_c(input, output, stride);
return;
}
}
@@ -337,7 +337,7 @@ void FDCT8x8_2D(const int16_t *input, tran_low_t *output, int stride) {
#if DCT_HIGH_BIT_DEPTH
overflow = check_epi16_overflow_x4(&r0, &r1, &r2, &r3);
if (overflow) {
- vp9_highbd_fdct8x8_c(input, output, stride);
+ vpx_highbd_fdct8x8_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
@@ -380,7 +380,7 @@ void FDCT8x8_2D(const int16_t *input, tran_low_t *output, int stride) {
#if DCT_HIGH_BIT_DEPTH
overflow = check_epi16_overflow_x4(&res0, &res4, &res2, &res6);
if (overflow) {
- vp9_highbd_fdct8x8_c(input, output, stride);
+ vpx_highbd_fdct8x8_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
@@ -410,7 +410,7 @@ void FDCT8x8_2D(const int16_t *input, tran_low_t *output, int stride) {
#if DCT_HIGH_BIT_DEPTH
overflow = check_epi16_overflow_x2(&r0, &r1);
if (overflow) {
- vp9_highbd_fdct8x8_c(input, output, stride);
+ vpx_highbd_fdct8x8_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
@@ -423,7 +423,7 @@ void FDCT8x8_2D(const int16_t *input, tran_low_t *output, int stride) {
#if DCT_HIGH_BIT_DEPTH
overflow = check_epi16_overflow_x4(&x0, &x1, &x2, &x3);
if (overflow) {
- vp9_highbd_fdct8x8_c(input, output, stride);
+ vpx_highbd_fdct8x8_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
@@ -466,7 +466,7 @@ void FDCT8x8_2D(const int16_t *input, tran_low_t *output, int stride) {
#if DCT_HIGH_BIT_DEPTH
overflow = check_epi16_overflow_x4(&res1, &res7, &res5, &res3);
if (overflow) {
- vp9_highbd_fdct8x8_c(input, output, stride);
+ vpx_highbd_fdct8x8_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
@@ -729,7 +729,7 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) {
overflow = check_epi16_overflow_x8(&input0, &input1, &input2, &input3,
&input4, &input5, &input6, &input7);
if (overflow) {
- vp9_highbd_fdct16x16_c(input, output, stride);
+ vpx_highbd_fdct16x16_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
@@ -750,7 +750,7 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) {
&step1_4, &step1_5,
&step1_6, &step1_7);
if (overflow) {
- vp9_highbd_fdct16x16_c(input, output, stride);
+ vpx_highbd_fdct16x16_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
@@ -770,7 +770,7 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) {
overflow = check_epi16_overflow_x8(&q0, &q1, &q2, &q3,
&q4, &q5, &q6, &q7);
if (overflow) {
- vp9_highbd_fdct16x16_c(input, output, stride);
+ vpx_highbd_fdct16x16_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
@@ -784,7 +784,7 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) {
#if DCT_HIGH_BIT_DEPTH
overflow = check_epi16_overflow_x4(&r0, &r1, &r2, &r3);
if (overflow) {
- vp9_highbd_fdct16x16_c(input, output, stride);
+ vpx_highbd_fdct16x16_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
@@ -806,7 +806,7 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) {
#if DCT_HIGH_BIT_DEPTH
overflow = check_epi16_overflow_x4(&res00, &res08, &res04, &res12);
if (overflow) {
- vp9_highbd_fdct16x16_c(input, output, stride);
+ vpx_highbd_fdct16x16_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
@@ -827,7 +827,7 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) {
#if DCT_HIGH_BIT_DEPTH
overflow = check_epi16_overflow_x2(&r0, &r1);
if (overflow) {
- vp9_highbd_fdct16x16_c(input, output, stride);
+ vpx_highbd_fdct16x16_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
@@ -840,7 +840,7 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) {
#if DCT_HIGH_BIT_DEPTH
overflow = check_epi16_overflow_x4(&x0, &x1, &x2, &x3);
if (overflow) {
- vp9_highbd_fdct16x16_c(input, output, stride);
+ vpx_highbd_fdct16x16_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
@@ -863,7 +863,7 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) {
overflow = check_epi16_overflow_x4(&res02, &res14,
&res10, &res06);
if (overflow) {
- vp9_highbd_fdct16x16_c(input, output, stride);
+ vpx_highbd_fdct16x16_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
@@ -891,7 +891,7 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) {
overflow = check_epi16_overflow_x4(&step2_2, &step2_3, &step2_5,
&step2_4);
if (overflow) {
- vp9_highbd_fdct16x16_c(input, output, stride);
+ vpx_highbd_fdct16x16_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
@@ -912,7 +912,7 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) {
&step3_4, &step3_5,
&step3_6, &step3_7);
if (overflow) {
- vp9_highbd_fdct16x16_c(input, output, stride);
+ vpx_highbd_fdct16x16_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
@@ -935,7 +935,7 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) {
overflow = check_epi16_overflow_x4(&step2_1, &step2_2, &step2_6,
&step2_5);
if (overflow) {
- vp9_highbd_fdct16x16_c(input, output, stride);
+ vpx_highbd_fdct16x16_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
@@ -956,7 +956,7 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) {
&step1_4, &step1_5,
&step1_6, &step1_7);
if (overflow) {
- vp9_highbd_fdct16x16_c(input, output, stride);
+ vpx_highbd_fdct16x16_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
@@ -978,7 +978,7 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) {
#if DCT_HIGH_BIT_DEPTH
overflow = check_epi16_overflow_x4(&res01, &res09, &res15, &res07);
if (overflow) {
- vp9_highbd_fdct16x16_c(input, output, stride);
+ vpx_highbd_fdct16x16_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
@@ -999,7 +999,7 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) {
#if DCT_HIGH_BIT_DEPTH
overflow = check_epi16_overflow_x4(&res05, &res13, &res11, &res03);
if (overflow) {
- vp9_highbd_fdct16x16_c(input, output, stride);
+ vpx_highbd_fdct16x16_c(input, output, stride);
return;
}
#endif // DCT_HIGH_BIT_DEPTH
diff --git a/vpx_dsp/x86/fwd_txfm_sse2.c b/vpx_dsp/x86/fwd_txfm_sse2.c
index 2704e6839..bca72e874 100644
--- a/vpx_dsp/x86/fwd_txfm_sse2.c
+++ b/vpx_dsp/x86/fwd_txfm_sse2.c
@@ -14,7 +14,7 @@
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_dsp/x86/fwd_txfm_sse2.h"
-void vp9_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
+void vpx_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
__m128i in0, in1;
__m128i tmp;
const __m128i zero = _mm_setzero_si128();
@@ -43,7 +43,7 @@ void vp9_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
store_output(&in0, output);
}
-void vp9_fdct8x8_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
+void vpx_fdct8x8_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
__m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
__m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
__m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
@@ -83,7 +83,7 @@ void vp9_fdct8x8_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
store_output(&in1, output);
}
-void vp9_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output,
+void vpx_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output,
int stride) {
__m128i in0, in1, in2, in3;
__m128i u0, u1;
@@ -152,7 +152,7 @@ void vp9_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output,
store_output(&in1, output);
}
-void vp9_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output,
+void vpx_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output,
int stride) {
__m128i in0, in1, in2, in3;
__m128i u0, u1;
@@ -225,21 +225,21 @@ void vp9_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output,
}
#define DCT_HIGH_BIT_DEPTH 0
-#define FDCT4x4_2D vp9_fdct4x4_sse2
-#define FDCT8x8_2D vp9_fdct8x8_sse2
-#define FDCT16x16_2D vp9_fdct16x16_sse2
+#define FDCT4x4_2D vpx_fdct4x4_sse2
+#define FDCT8x8_2D vpx_fdct8x8_sse2
+#define FDCT16x16_2D vpx_fdct16x16_sse2
#include "vpx_dsp/x86/fwd_txfm_impl_sse2.h"
#undef FDCT4x4_2D
#undef FDCT8x8_2D
#undef FDCT16x16_2D
-#define FDCT32x32_2D vp9_fdct32x32_rd_sse2
+#define FDCT32x32_2D vpx_fdct32x32_rd_sse2
#define FDCT32x32_HIGH_PRECISION 0
#include "vpx_dsp/x86/fwd_dct32x32_impl_sse2.h"
#undef FDCT32x32_2D
#undef FDCT32x32_HIGH_PRECISION
-#define FDCT32x32_2D vp9_fdct32x32_sse2
+#define FDCT32x32_2D vpx_fdct32x32_sse2
#define FDCT32x32_HIGH_PRECISION 1
#include "vpx_dsp/x86/fwd_dct32x32_impl_sse2.h" // NOLINT
#undef FDCT32x32_2D
@@ -248,21 +248,21 @@ void vp9_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output,
#if CONFIG_VP9_HIGHBITDEPTH
#define DCT_HIGH_BIT_DEPTH 1
-#define FDCT4x4_2D vp9_highbd_fdct4x4_sse2
-#define FDCT8x8_2D vp9_highbd_fdct8x8_sse2
-#define FDCT16x16_2D vp9_highbd_fdct16x16_sse2
+#define FDCT4x4_2D vpx_highbd_fdct4x4_sse2
+#define FDCT8x8_2D vpx_highbd_fdct8x8_sse2
+#define FDCT16x16_2D vpx_highbd_fdct16x16_sse2
#include "vpx_dsp/x86/fwd_txfm_impl_sse2.h" // NOLINT
#undef FDCT4x4_2D
#undef FDCT8x8_2D
#undef FDCT16x16_2D
-#define FDCT32x32_2D vp9_highbd_fdct32x32_rd_sse2
+#define FDCT32x32_2D vpx_highbd_fdct32x32_rd_sse2
#define FDCT32x32_HIGH_PRECISION 0
#include "vpx_dsp/x86/fwd_dct32x32_impl_sse2.h" // NOLINT
#undef FDCT32x32_2D
#undef FDCT32x32_HIGH_PRECISION
-#define FDCT32x32_2D vp9_highbd_fdct32x32_sse2
+#define FDCT32x32_2D vpx_highbd_fdct32x32_sse2
#define FDCT32x32_HIGH_PRECISION 1
#include "vpx_dsp/x86/fwd_dct32x32_impl_sse2.h" // NOLINT
#undef FDCT32x32_2D
diff --git a/vpx_dsp/x86/fwd_txfm_ssse3_x86_64.asm b/vpx_dsp/x86/fwd_txfm_ssse3_x86_64.asm
index 5f6354648..0fa8ea1d5 100644
--- a/vpx_dsp/x86/fwd_txfm_ssse3_x86_64.asm
+++ b/vpx_dsp/x86/fwd_txfm_ssse3_x86_64.asm
@@ -7,6 +7,9 @@
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
+
+%define program_name vpx
+
%include "third_party/x86inc/x86inc.asm"
; This file provides SSSE3 version of the forward transformation. Part