summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/blockiness_test.cc4
-rw-r--r--test/consistency_test.cc4
-rw-r--r--test/convolve_test.cc4
-rw-r--r--test/cq_test.cc4
-rw-r--r--vp8/common/mips/mmi/idctllm_mmi.c43
-rw-r--r--vp8/common/mips/mmi/loopfilter_filters_mmi.c282
-rw-r--r--vp8/common/mips/mmi/sixtap_filter_mmi.c45
-rw-r--r--vp8/encoder/mips/mmi/dct_mmi.c53
-rw-r--r--vp8/encoder/mips/mmi/vp8_quantize_mmi.c27
-rw-r--r--vpx_dsp/mips/sad_mmi.c35
-rw-r--r--vpx_dsp/mips/variance_mmi.c146
11 files changed, 422 insertions, 225 deletions
diff --git a/test/blockiness_test.cc b/test/blockiness_test.cc
index 75aa2938e..1ad444a04 100644
--- a/test/blockiness_test.cc
+++ b/test/blockiness_test.cc
@@ -35,14 +35,14 @@ class BlockinessTestBase : public ::testing::Test {
public:
BlockinessTestBase(int width, int height) : width_(width), height_(height) {}
- static void SetUpTestCase() {
+ static void SetUpTestSuite() {
source_data_ = reinterpret_cast<uint8_t *>(
vpx_memalign(kDataAlignment, kDataBufferSize));
reference_data_ = reinterpret_cast<uint8_t *>(
vpx_memalign(kDataAlignment, kDataBufferSize));
}
- static void TearDownTestCase() {
+ static void TearDownTestSuite() {
vpx_free(source_data_);
source_data_ = NULL;
vpx_free(reference_data_);
diff --git a/test/consistency_test.cc b/test/consistency_test.cc
index 69ebaf70c..f31fd8c92 100644
--- a/test/consistency_test.cc
+++ b/test/consistency_test.cc
@@ -39,7 +39,7 @@ class ConsistencyTestBase : public ::testing::Test {
public:
ConsistencyTestBase(int width, int height) : width_(width), height_(height) {}
- static void SetUpTestCase() {
+ static void SetUpTestSuite() {
source_data_[0] = reinterpret_cast<uint8_t *>(
vpx_memalign(kDataAlignment, kDataBufferSize));
reference_data_[0] = reinterpret_cast<uint8_t *>(
@@ -52,7 +52,7 @@ class ConsistencyTestBase : public ::testing::Test {
}
static void ClearSsim() { memset(ssim_array_, 0, kDataBufferSize / 16); }
- static void TearDownTestCase() {
+ static void TearDownTestSuite() {
vpx_free(source_data_[0]);
source_data_[0] = NULL;
vpx_free(reference_data_[0]);
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index 7330e97db..6eef26f93 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -341,7 +341,7 @@ void wrapper_filter_block2d_8_c(const uint8_t *src_ptr,
class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
public:
- static void SetUpTestCase() {
+ static void SetUpTestSuite() {
// Force input_ to be unaligned, output to be 16 byte aligned.
input_ = reinterpret_cast<uint8_t *>(
vpx_memalign(kDataAlignment, kInputBufferSize + 1)) +
@@ -363,7 +363,7 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
virtual void TearDown() { libvpx_test::ClearSystemState(); }
- static void TearDownTestCase() {
+ static void TearDownTestSuite() {
vpx_free(input_ - 1);
input_ = NULL;
vpx_free(output_);
diff --git a/test/cq_test.cc b/test/cq_test.cc
index 3126f3b4e..292adb0d0 100644
--- a/test/cq_test.cc
+++ b/test/cq_test.cc
@@ -29,9 +29,9 @@ class CQTest : public ::libvpx_test::EncoderTest,
// maps the cqlevel to the bitrate produced.
typedef std::map<int, uint32_t> BitrateMap;
- static void SetUpTestCase() { bitrates_.clear(); }
+ static void SetUpTestSuite() { bitrates_.clear(); }
- static void TearDownTestCase() {
+ static void TearDownTestSuite() {
ASSERT_TRUE(!HasFailure())
<< "skipping bitrate validation due to earlier failure.";
uint32_t prev_actual_bitrate = kCQTargetBitrate;
diff --git a/vp8/common/mips/mmi/idctllm_mmi.c b/vp8/common/mips/mmi/idctllm_mmi.c
index 4fad1d347..a35689dd3 100644
--- a/vp8/common/mips/mmi/idctllm_mmi.c
+++ b/vp8/common/mips/mmi/idctllm_mmi.c
@@ -41,14 +41,18 @@ void vp8_short_idct4x4llm_mmi(int16_t *input, unsigned char *pred_ptr,
int pred_stride, unsigned char *dst_ptr,
int dst_stride) {
double ftmp[12];
- uint32_t tmp[0];
- DECLARE_ALIGNED(8, const uint64_t, ff_ph_04) = { 0x0004000400040004ULL };
- DECLARE_ALIGNED(8, const uint64_t, ff_ph_4e7b) = { 0x4e7b4e7b4e7b4e7bULL };
- DECLARE_ALIGNED(8, const uint64_t, ff_ph_22a3) = { 0x22a322a322a322a3ULL };
+ uint64_t tmp[1];
+ double ff_ph_04, ff_ph_4e7b, ff_ph_22a3;
__asm__ volatile (
+ "dli %[tmp0], 0x0004000400040004 \n\t"
+ "dmtc1 %[tmp0], %[ff_ph_04] \n\t"
+ "dli %[tmp0], 0x4e7b4e7b4e7b4e7b \n\t"
+ "dmtc1 %[tmp0], %[ff_ph_4e7b] \n\t"
+ "dli %[tmp0], 0x22a322a322a322a3 \n\t"
+ "dmtc1 %[tmp0], %[ff_ph_22a3] \n\t"
MMI_LI(%[tmp0], 0x02)
- "mtc1 %[tmp0], %[ftmp11] \n\t"
+ "dmtc1 %[tmp0], %[ftmp11] \n\t"
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t"
@@ -186,9 +190,10 @@ void vp8_short_idct4x4llm_mmi(int16_t *input, unsigned char *pred_ptr,
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]),
[ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]),
[ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]),
- [pred_ptr]"+&r"(pred_ptr), [dst_ptr]"+&r"(dst_ptr)
- : [ip]"r"(input), [ff_ph_22a3]"f"(ff_ph_22a3),
- [ff_ph_4e7b]"f"(ff_ph_4e7b), [ff_ph_04]"f"(ff_ph_04),
+ [pred_ptr]"+&r"(pred_ptr), [dst_ptr]"+&r"(dst_ptr),
+ [ff_ph_4e7b]"=&f"(ff_ph_4e7b), [ff_ph_04]"=&f"(ff_ph_04),
+ [ff_ph_22a3]"=&f"(ff_ph_22a3)
+ : [ip]"r"(input),
[pred_stride]"r"((mips_reg)pred_stride),
[dst_stride]"r"((mips_reg)dst_stride)
: "memory"
@@ -198,12 +203,13 @@ void vp8_short_idct4x4llm_mmi(int16_t *input, unsigned char *pred_ptr,
void vp8_dc_only_idct_add_mmi(int16_t input_dc, unsigned char *pred_ptr,
int pred_stride, unsigned char *dst_ptr,
int dst_stride) {
- int a1 = ((input_dc + 4) >> 3);
- double ftmp[5];
+ int a0 = ((input_dc + 4) >> 3);
+ double a1, ftmp[5];
int low32;
__asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
+ "dmtc1 %[a0], %[a1] \n\t"
"pshufh %[a1], %[a1], %[ftmp0] \n\t"
"ulw %[low32], 0x00(%[pred_ptr]) \n\t"
"mtc1 %[low32], %[ftmp1] \n\t"
@@ -244,9 +250,9 @@ void vp8_dc_only_idct_add_mmi(int16_t input_dc, unsigned char *pred_ptr,
"gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t"
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
[ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [low32]"=&r"(low32),
- [dst_ptr]"+&r"(dst_ptr), [pred_ptr]"+&r"(pred_ptr)
+ [dst_ptr]"+&r"(dst_ptr), [pred_ptr]"+&r"(pred_ptr), [a1]"=&f"(a1)
: [dst_stride]"r"((mips_reg)dst_stride),
- [pred_stride]"r"((mips_reg)pred_stride), [a1]"f"(a1)
+ [pred_stride]"r"((mips_reg)pred_stride), [a0]"r"(a0)
: "memory"
);
}
@@ -254,14 +260,15 @@ void vp8_dc_only_idct_add_mmi(int16_t input_dc, unsigned char *pred_ptr,
void vp8_short_inv_walsh4x4_mmi(int16_t *input, int16_t *mb_dqcoeff) {
int i;
int16_t output[16];
- double ftmp[12];
- uint32_t tmp[1];
- DECLARE_ALIGNED(8, const uint64_t, ff_ph_03) = { 0x0003000300030003ULL };
+ double ff_ph_03, ftmp[12];
+ uint64_t tmp[1];
__asm__ volatile (
+ "dli %[tmp0], 0x0003000300030003 \n\t"
+ "dmtc1 %[tmp0], %[ff_ph_03] \n\t"
MMI_LI(%[tmp0], 0x03)
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
- "mtc1 %[tmp0], %[ftmp11] \n\t"
+ "dmtc1 %[tmp0], %[ftmp11] \n\t"
"gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t"
"gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t"
"gsldlc1 %[ftmp2], 0x0f(%[ip]) \n\t"
@@ -317,8 +324,8 @@ void vp8_short_inv_walsh4x4_mmi(int16_t *input, int16_t *mb_dqcoeff) {
[ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]),
[ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]),
- [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0])
- : [ip]"r"(input), [op]"r"(output), [ff_ph_03]"f"(ff_ph_03)
+ [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]), [ff_ph_03]"=&f"(ff_ph_03)
+ : [ip]"r"(input), [op]"r"(output)
: "memory"
);
diff --git a/vp8/common/mips/mmi/loopfilter_filters_mmi.c b/vp8/common/mips/mmi/loopfilter_filters_mmi.c
index fc1240cc2..a07a7e3b4 100644
--- a/vp8/common/mips/mmi/loopfilter_filters_mmi.c
+++ b/vp8/common/mips/mmi/loopfilter_filters_mmi.c
@@ -13,28 +13,25 @@
#include "vp8/common/onyxc_int.h"
#include "vpx_ports/asmdefs_mmi.h"
-DECLARE_ALIGNED(8, static const uint64_t, ff_ph_01) = { 0x0001000100010001ULL };
-DECLARE_ALIGNED(8, static const uint64_t,
- ff_ph_003f) = { 0x003f003f003f003fULL };
-DECLARE_ALIGNED(8, static const uint64_t,
- ff_ph_0900) = { 0x0900090009000900ULL };
-DECLARE_ALIGNED(8, static const uint64_t,
- ff_ph_1200) = { 0x1200120012001200ULL };
-DECLARE_ALIGNED(8, static const uint64_t,
- ff_ph_1b00) = { 0x1b001b001b001b00ULL };
-DECLARE_ALIGNED(8, static const uint64_t, ff_pb_fe) = { 0xfefefefefefefefeULL };
-DECLARE_ALIGNED(8, static const uint64_t, ff_pb_80) = { 0x8080808080808080ULL };
-DECLARE_ALIGNED(8, static const uint64_t, ff_pb_04) = { 0x0404040404040404ULL };
-DECLARE_ALIGNED(8, static const uint64_t, ff_pb_03) = { 0x0303030303030303ULL };
-DECLARE_ALIGNED(8, static const uint64_t, ff_pb_01) = { 0x0101010101010101ULL };
-
void vp8_loop_filter_horizontal_edge_mmi(
unsigned char *src_ptr, int src_pixel_step, const unsigned char *blimit,
const unsigned char *limit, const unsigned char *thresh, int count) {
- uint32_t tmp[1];
+ uint64_t tmp[1];
mips_reg addr[2];
double ftmp[12];
+ double ff_ph_01, ff_pb_fe, ff_pb_80, ff_pb_04, ff_pb_03;
+ /* clang-format off */
__asm__ volatile (
+ "dli %[tmp0], 0x0001000100010001 \n\t"
+ "dmtc1 %[tmp0], %[ff_ph_01] \n\t"
+ "dli %[tmp0], 0xfefefefefefefefe \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_fe] \n\t"
+ "dli %[tmp0], 0x8080808080808080 \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_80] \n\t"
+ "dli %[tmp0], 0x0404040404040404 \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_04] \n\t"
+ "dli %[tmp0], 0x0303030303030303 \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_03] \n\t"
"1: \n\t"
"gsldlc1 %[ftmp10], 0x07(%[limit]) \n\t"
"gsldrc1 %[ftmp10], 0x00(%[limit]) \n\t"
@@ -91,9 +88,9 @@ void vp8_loop_filter_horizontal_edge_mmi(
"pasubub %[ftmp1], %[ftmp5], %[ftmp6] \n\t"
"paddusb %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
"pasubub %[ftmp2], %[ftmp4], %[ftmp7] \n\t"
- "and %[ftmp2], %[ftmp2], %[ff_pb_fe] \n\t"
- "li %[tmp0], 0x01 \n\t"
- "mtc1 %[tmp0], %[ftmp10] \n\t"
+ "pand %[ftmp2], %[ftmp2], %[ff_pb_fe] \n\t"
+ "dli %[tmp0], 0x01 \n\t"
+ "dmtc1 %[tmp0], %[ftmp10] \n\t"
"psrlh %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
"paddusb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
"gsldlc1 %[ftmp10], 0x07(%[blimit]) \n\t"
@@ -134,8 +131,8 @@ void vp8_loop_filter_horizontal_edge_mmi(
"punpcklbh %[ftmp0], %[ftmp0], %[ftmp8] \n\t"
"punpckhbh %[ftmp11], %[ftmp11], %[ftmp8] \n\t"
- "li %[tmp0], 0x0b \n\t"
- "mtc1 %[tmp0], %[ftmp10] \n\t"
+ "dli %[tmp0], 0x0b \n\t"
+ "dmtc1 %[tmp0], %[ftmp10] \n\t"
"psrah %[ftmp0], %[ftmp0], %[ftmp10] \n\t"
"psrah %[ftmp11], %[ftmp11], %[ftmp10] \n\t"
"packsshb %[ftmp8], %[ftmp0], %[ftmp11] \n\t"
@@ -149,8 +146,8 @@ void vp8_loop_filter_horizontal_edge_mmi(
"packsshb %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
"paddsh %[ftmp9], %[ftmp9], %[ff_ph_01] \n\t"
- "li %[tmp0], 0x01 \n\t"
- "mtc1 %[tmp0], %[ftmp10] \n\t"
+ "dli %[tmp0], 0x01 \n\t"
+ "dmtc1 %[tmp0], %[ftmp10] \n\t"
"psrah %[ftmp11], %[ftmp11], %[ftmp10] \n\t"
"psrah %[ftmp9], %[ftmp9], %[ftmp10] \n\t"
"packsshb %[ftmp11], %[ftmp11], %[ftmp9] \n\t"
@@ -188,17 +185,18 @@ void vp8_loop_filter_horizontal_edge_mmi(
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[tmp0]"=&r"(tmp[0]),
[addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
- [src_ptr]"+&r"(src_ptr), [count]"+&r"(count)
+ [src_ptr]"+&r"(src_ptr), [count]"+&r"(count),
+ [ff_ph_01]"=&f"(ff_ph_01), [ff_pb_fe]"=&f"(ff_pb_fe),
+ [ff_pb_80]"=&f"(ff_pb_80), [ff_pb_04]"=&f"(ff_pb_04),
+ [ff_pb_03]"=&f"(ff_pb_03)
: [limit]"r"(limit), [blimit]"r"(blimit),
[thresh]"r"(thresh),
[src_pixel_step]"r"((mips_reg)src_pixel_step),
[src_pixel_step_x2]"r"((mips_reg)(src_pixel_step<<1)),
- [src_pixel_step_x4]"r"((mips_reg)(src_pixel_step<<2)),
- [ff_ph_01]"f"(ff_ph_01), [ff_pb_fe]"f"(ff_pb_fe),
- [ff_pb_80]"f"(ff_pb_80), [ff_pb_04]"f"(ff_pb_04),
- [ff_pb_03]"f"(ff_pb_03)
+ [src_pixel_step_x4]"r"((mips_reg)(src_pixel_step<<2))
: "memory"
);
+ /* clang-format on */
}
void vp8_loop_filter_vertical_edge_mmi(unsigned char *src_ptr,
@@ -206,11 +204,23 @@ void vp8_loop_filter_vertical_edge_mmi(unsigned char *src_ptr,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh, int count) {
- uint32_t tmp[1];
+ uint64_t tmp[1];
mips_reg addr[2];
double ftmp[13];
+ double ff_pb_fe, ff_ph_01, ff_pb_03, ff_pb_04, ff_pb_80;
+ /* clang-format off */
__asm__ volatile (
+ "dli %[tmp0], 0xfefefefefefefefe \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_fe] \n\t"
+ "dli %[tmp0], 0x0001000100010001 \n\t"
+ "dmtc1 %[tmp0], %[ff_ph_01] \n\t"
+ "dli %[tmp0], 0x0303030303030303 \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_03] \n\t"
+ "dli %[tmp0], 0x0404040404040404 \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_04] \n\t"
+ "dli %[tmp0], 0x8080808080808080 \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_80] \n\t"
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
MMI_ADDU(%[src_ptr], %[src_ptr], %[tmp0])
MMI_SUBU(%[src_ptr], %[src_ptr], 0x04)
@@ -315,8 +325,8 @@ void vp8_loop_filter_vertical_edge_mmi(unsigned char *src_ptr,
/* abs (p1-q1) */
"pasubub %[ftmp12], %[ftmp10], %[ftmp5] \n\t"
"pand %[ftmp12], %[ftmp12], %[ff_pb_fe] \n\t"
- "li %[tmp0], 0x01 \n\t"
- "mtc1 %[tmp0], %[ftmp1] \n\t"
+ "dli %[tmp0], 0x01 \n\t"
+ "dmtc1 %[tmp0], %[ftmp1] \n\t"
"psrlh %[ftmp12], %[ftmp12], %[ftmp1] \n\t"
"paddusb %[ftmp1], %[ftmp11], %[ftmp12] \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
@@ -354,8 +364,8 @@ void vp8_loop_filter_vertical_edge_mmi(unsigned char *src_ptr,
"paddsb %[ftmp11], %[ftmp2], %[ff_pb_04] \n\t"
"paddsb %[ftmp12], %[ftmp2], %[ff_pb_03] \n\t"
- "li %[tmp0], 0x0b \n\t"
- "mtc1 %[tmp0], %[ftmp7] \n\t"
+ "dli %[tmp0], 0x0b \n\t"
+ "dmtc1 %[tmp0], %[ftmp7] \n\t"
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
"punpcklbh %[ftmp0], %[ftmp0], %[ftmp12] \n\t"
@@ -379,8 +389,8 @@ void vp8_loop_filter_vertical_edge_mmi(unsigned char *src_ptr,
"paddsh %[ftmp0], %[ftmp0], %[ff_ph_01] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ff_ph_01] \n\t"
- "li %[tmp0], 0x01 \n\t"
- "mtc1 %[tmp0], %[ftmp7] \n\t"
+ "dli %[tmp0], 0x01 \n\t"
+ "dmtc1 %[tmp0], %[ftmp7] \n\t"
"psrah %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
"psrah %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
"packsshb %[ftmp2], %[ftmp0], %[ftmp8] \n\t"
@@ -450,15 +460,16 @@ void vp8_loop_filter_vertical_edge_mmi(unsigned char *src_ptr,
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[ftmp12]"=&f"(ftmp[12]), [tmp0]"=&r"(tmp[0]),
[addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
- [src_ptr]"+&r"(src_ptr), [count]"+&r"(count)
+ [src_ptr]"+&r"(src_ptr), [count]"+&r"(count),
+ [ff_ph_01]"=&f"(ff_ph_01), [ff_pb_03]"=&f"(ff_pb_03),
+ [ff_pb_04]"=&f"(ff_pb_04), [ff_pb_80]"=&f"(ff_pb_80),
+ [ff_pb_fe]"=&f"(ff_pb_fe)
: [limit]"r"(limit), [blimit]"r"(blimit),
[thresh]"r"(thresh),
- [src_pixel_step]"r"((mips_reg)src_pixel_step),
- [ff_ph_01]"f"(ff_ph_01), [ff_pb_03]"f"(ff_pb_03),
- [ff_pb_04]"f"(ff_pb_04), [ff_pb_80]"f"(ff_pb_80),
- [ff_pb_fe]"f"(ff_pb_fe)
+ [src_pixel_step]"r"((mips_reg)src_pixel_step)
: "memory"
);
+ /* clang-format on */
}
/* clang-format off */
@@ -484,10 +495,29 @@ void vp8_loop_filter_vertical_edge_mmi(unsigned char *src_ptr,
void vp8_mbloop_filter_horizontal_edge_mmi(
unsigned char *src_ptr, int src_pixel_step, const unsigned char *blimit,
const unsigned char *limit, const unsigned char *thresh, int count) {
- uint32_t tmp[1];
+ uint64_t tmp[1];
double ftmp[13];
+ double ff_pb_fe, ff_pb_80, ff_pb_04, ff_pb_03, ff_ph_003f, ff_ph_0900,
+ ff_ph_1200, ff_ph_1b00;
+ /* clang-format off */
__asm__ volatile (
+ "dli %[tmp0], 0xfefefefefefefefe \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_fe] \n\t"
+ "dli %[tmp0], 0x8080808080808080 \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_80] \n\t"
+ "dli %[tmp0], 0x0404040404040404 \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_04] \n\t"
+ "dli %[tmp0], 0x0303030303030303 \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_03] \n\t"
+ "dli %[tmp0], 0x003f003f003f003f \n\t"
+ "dmtc1 %[tmp0], %[ff_ph_003f] \n\t"
+ "dli %[tmp0], 0x0900090009000900 \n\t"
+ "dmtc1 %[tmp0], %[ff_ph_0900] \n\t"
+ "dli %[tmp0], 0x1200120012001200 \n\t"
+ "dmtc1 %[tmp0], %[ff_ph_1200] \n\t"
+ "dli %[tmp0], 0x1b001b001b001b00 \n\t"
+ "dmtc1 %[tmp0], %[ff_ph_1b00] \n\t"
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
MMI_SUBU(%[src_ptr], %[src_ptr], %[tmp0])
"1: \n\t"
@@ -550,8 +580,8 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
"paddusb %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
"pasubub %[ftmp2], %[ftmp4], %[ftmp7] \n\t"
"pand %[ftmp2], %[ftmp2], %[ff_pb_fe] \n\t"
- "li %[tmp0], 0x01 \n\t"
- "mtc1 %[tmp0], %[ftmp9] \n\t"
+ "dli %[tmp0], 0x01 \n\t"
+ "dmtc1 %[tmp0], %[ftmp9] \n\t"
"psrlh %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
"paddusb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp12] \n\t"
@@ -584,8 +614,8 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
"pandn %[ftmp12], %[ftmp1], %[ftmp2] \n\t"
"pand %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
- "li %[tmp0], 0x0b \n\t"
- "mtc1 %[tmp0], %[ftmp9] \n\t"
+ "dli %[tmp0], 0x0b \n\t"
+ "dmtc1 %[tmp0], %[ftmp9] \n\t"
"paddsb %[ftmp0], %[ftmp2], %[ff_pb_03] \n\t"
VP8_MBLOOP_HPSRAB
"paddsb %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
@@ -593,8 +623,8 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
VP8_MBLOOP_HPSRAB
"psubsb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
- "li %[tmp0], 0x07 \n\t"
- "mtc1 %[tmp0], %[ftmp9] \n\t"
+ "dli %[tmp0], 0x07 \n\t"
+ "dmtc1 %[tmp0], %[ftmp9] \n\t"
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
VP8_MBLOOP_HPSRAB_ADD(%[ff_ph_1b00])
@@ -649,18 +679,20 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[ftmp12]"=&f"(ftmp[12]), [tmp0]"=&r"(tmp[0]),
- [src_ptr]"+&r"(src_ptr), [count]"+&r"(count)
+ [src_ptr]"+&r"(src_ptr), [count]"+&r"(count),
+ [ff_pb_fe]"=&f"(ff_pb_fe), [ff_pb_80]"=&f"(ff_pb_80),
+ [ff_pb_04]"=&f"(ff_pb_04), [ff_pb_03]"=&f"(ff_pb_03),
+ [ff_ph_0900]"=&f"(ff_ph_0900), [ff_ph_1b00]"=&f"(ff_ph_1b00),
+ [ff_ph_1200]"=&f"(ff_ph_1200), [ff_ph_003f]"=&f"(ff_ph_003f)
: [limit]"r"(limit), [blimit]"r"(blimit),
[thresh]"r"(thresh),
- [src_pixel_step]"r"((mips_reg)src_pixel_step),
- [ff_pb_fe]"f"(ff_pb_fe), [ff_pb_80]"f"(ff_pb_80),
- [ff_pb_04]"f"(ff_pb_04), [ff_pb_03]"f"(ff_pb_03),
- [ff_ph_0900]"f"(ff_ph_0900), [ff_ph_1b00]"f"(ff_ph_1b00),
- [ff_ph_1200]"f"(ff_ph_1200), [ff_ph_003f]"f"(ff_ph_003f)
+ [src_pixel_step]"r"((mips_reg)src_pixel_step)
: "memory"
);
+ /* clang-format on */
}
+/* clang-format off */
#define VP8_MBLOOP_VPSRAB_ADDH \
"pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t" \
"pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" \
@@ -673,15 +705,30 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
"psrah %[ftmp7], %[ftmp7], %[ftmp12] \n\t" \
"psrah %[ftmp8], %[ftmp8], %[ftmp12] \n\t" \
"packsshb %[ftmp3], %[ftmp7], %[ftmp8] \n\t"
+/* clang-format on */
void vp8_mbloop_filter_vertical_edge_mmi(
unsigned char *src_ptr, int src_pixel_step, const unsigned char *blimit,
const unsigned char *limit, const unsigned char *thresh, int count) {
mips_reg tmp[1];
- DECLARE_ALIGNED(8, const uint64_t, srct[1]);
+ DECLARE_ALIGNED(8, const uint64_t, srct[2]);
double ftmp[14];
+ double ff_ph_003f, ff_ph_0900, ff_pb_fe, ff_pb_80, ff_pb_04, ff_pb_03;
+ /* clang-format off */
__asm__ volatile (
+ "dli %[tmp0], 0x003f003f003f003f \n\t"
+ "dmtc1 %[tmp0], %[ff_ph_003f] \n\t"
+ "dli %[tmp0], 0x0900090009000900 \n\t"
+ "dmtc1 %[tmp0], %[ff_ph_0900] \n\t"
+ "dli %[tmp0], 0xfefefefefefefefe \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_fe] \n\t"
+ "dli %[tmp0], 0x8080808080808080 \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_80] \n\t"
+ "dli %[tmp0], 0x0404040404040404 \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_04] \n\t"
+ "dli %[tmp0], 0x0303030303030303 \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_03] \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], 0x04)
"1: \n\t"
@@ -783,8 +830,8 @@ void vp8_mbloop_filter_vertical_edge_mmi(
/* abs (p1-q1) / 2 */
"pasubub %[ftmp12], %[ftmp10], %[ftmp5] \n\t"
"pand %[ftmp12], %[ftmp12], %[ff_pb_fe] \n\t"
- "li %[tmp0], 0x01 \n\t"
- "mtc1 %[tmp0], %[ftmp8] \n\t"
+ "dli %[tmp0], 0x01 \n\t"
+ "dmtc1 %[tmp0], %[ftmp8] \n\t"
"psrlh %[ftmp12], %[ftmp12], %[ftmp8] \n\t"
"paddusb %[ftmp12], %[ftmp1], %[ftmp12] \n\t"
"psubusb %[ftmp12], %[ftmp12], %[ftmp13] \n\t"
@@ -824,8 +871,8 @@ void vp8_mbloop_filter_vertical_edge_mmi(
"pandn %[ftmp0], %[ftmp1], %[ftmp0] \n\t"
"paddsb %[ftmp4], %[ftmp3], %[ff_pb_04] \n\t"
- "li %[tmp0], 0x0b \n\t"
- "mtc1 %[tmp0], %[ftmp12] \n\t"
+ "dli %[tmp0], 0x0b \n\t"
+ "dmtc1 %[tmp0], %[ftmp12] \n\t"
"punpcklbh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
"punpckhbh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
"psrah %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
@@ -842,8 +889,8 @@ void vp8_mbloop_filter_vertical_edge_mmi(
/* ftmp6: ps0 */
"paddsb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
- "li %[tmp0], 0x07 \n\t"
- "mtc1 %[tmp0], %[ftmp12] \n\t"
+ "dli %[tmp0], 0x07 \n\t"
+ "dmtc1 %[tmp0], %[ftmp12] \n\t"
VP8_MBLOOP_VPSRAB_ADDH
"paddh %[ftmp1], %[ff_ph_0900], %[ff_ph_0900] \n\t"
"paddh %[ftmp1], %[ftmp1], %[ff_ph_0900] \n\t"
@@ -948,17 +995,19 @@ void vp8_mbloop_filter_vertical_edge_mmi(
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
[tmp0]"=&r"(tmp[0]), [src_ptr]"+&r"(src_ptr),
- [count]"+&r"(count)
+ [count]"+&r"(count),
+ [ff_ph_003f]"=&f"(ff_ph_003f), [ff_ph_0900]"=&f"(ff_ph_0900),
+ [ff_pb_03]"=&f"(ff_pb_03), [ff_pb_04]"=&f"(ff_pb_04),
+ [ff_pb_80]"=&f"(ff_pb_80), [ff_pb_fe]"=&f"(ff_pb_fe)
: [limit]"r"(limit), [blimit]"r"(blimit),
[srct]"r"(srct), [thresh]"r"(thresh),
- [src_pixel_step]"r"((mips_reg)src_pixel_step),
- [ff_ph_003f]"f"(ff_ph_003f), [ff_ph_0900]"f"(ff_ph_0900),
- [ff_pb_03]"f"(ff_pb_03), [ff_pb_04]"f"(ff_pb_04),
- [ff_pb_80]"f"(ff_pb_80), [ff_pb_fe]"f"(ff_pb_fe)
+ [src_pixel_step]"r"((mips_reg)src_pixel_step)
: "memory"
);
+ /* clang-format on */
}
+/* clang-format off */
#define VP8_SIMPLE_HPSRAB \
"psllh %[ftmp0], %[ftmp5], %[ftmp8] \n\t" \
"psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t" \
@@ -966,23 +1015,38 @@ void vp8_mbloop_filter_vertical_edge_mmi(
"psrah %[ftmp1], %[ftmp5], %[ftmp10] \n\t" \
"psllh %[ftmp1], %[ftmp1], %[ftmp8] \n\t" \
"por %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
+/* clang-format on */
void vp8_loop_filter_simple_horizontal_edge_mmi(unsigned char *src_ptr,
int src_pixel_step,
const unsigned char *blimit) {
- uint32_t tmp[1], count = 2;
+ uint64_t tmp[1], count = 2;
mips_reg addr[2];
double ftmp[12];
+ double ff_pb_fe, ff_pb_80, ff_pb_04, ff_pb_01;
+ /* clang-format off */
__asm__ volatile (
- "li %[tmp0], 0x08 \n\t"
- "mtc1 %[tmp0], %[ftmp8] \n\t"
- "li %[tmp0], 0x03 \n\t"
- "mtc1 %[tmp0], %[ftmp9] \n\t"
- "li %[tmp0], 0x0b \n\t"
- "mtc1 %[tmp0], %[ftmp10] \n\t"
- "li %[tmp0], 0x01 \n\t"
- "mtc1 %[tmp0], %[ftmp11] \n\t"
+ "dli %[tmp0], 0x0b \n\t"
+ "dmtc1 %[tmp0], %[ftmp10] \n\t"
+ "dli %[tmp0], 0x01 \n\t"
+ "dmtc1 %[tmp0], %[ftmp11] \n\t"
+ "dli %[tmp0], 0x08 \n\t"
+ "dmtc1 %[tmp0], %[ftmp8] \n\t"
+ "dli %[tmp0], 0x03 \n\t"
+ "dmtc1 %[tmp0], %[ftmp9] \n\t"
+ "dli %[tmp0], 0x0b \n\t"
+ "dmtc1 %[tmp0], %[ftmp10] \n\t"
+ "dli %[tmp0], 0x01 \n\t"
+ "dmtc1 %[tmp0], %[ftmp11] \n\t"
+ "dli %[tmp0], 0xfefefefefefefefe \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_fe] \n\t"
+ "dli %[tmp0], 0x8080808080808080 \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_80] \n\t"
+ "dli %[tmp0], 0x0404040404040404 \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_04] \n\t"
+ "dli %[tmp0], 0x0101010101010101 \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_01] \n\t"
"1: \n\t"
"gsldlc1 %[ftmp3], 0x07(%[blimit]) \n\t"
@@ -996,7 +1060,7 @@ void vp8_loop_filter_simple_horizontal_edge_mmi(unsigned char *src_ptr,
"gsldlc1 %[ftmp7], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp7], 0x00(%[addr0]) \n\t"
"pasubub %[ftmp1], %[ftmp7], %[ftmp2] \n\t"
- "and %[ftmp1], %[ftmp1], %[ff_pb_fe] \n\t"
+ "pand %[ftmp1], %[ftmp1], %[ff_pb_fe] \n\t"
"psrlh %[ftmp1], %[ftmp1], %[ftmp11] \n\t"
MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step])
@@ -1020,7 +1084,7 @@ void vp8_loop_filter_simple_horizontal_edge_mmi(unsigned char *src_ptr,
"paddsb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
"paddsb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
"paddsb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
- "and %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
+ "pand %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
"paddsb %[ftmp5], %[ftmp5], %[ff_pb_04] \n\t"
VP8_SIMPLE_HPSRAB
@@ -1048,30 +1112,43 @@ void vp8_loop_filter_simple_horizontal_edge_mmi(unsigned char *src_ptr,
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[tmp0]"=&r"(tmp[0]),
[addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
- [src_ptr]"+&r"(src_ptr), [count]"+&r"(count)
+ [src_ptr]"+&r"(src_ptr), [count]"+&r"(count),
+ [ff_pb_fe]"=&f"(ff_pb_fe), [ff_pb_80]"=&f"(ff_pb_80),
+ [ff_pb_04]"=&f"(ff_pb_04), [ff_pb_01]"=&f"(ff_pb_01)
: [blimit]"r"(blimit),
[src_pixel_step]"r"((mips_reg)src_pixel_step),
- [src_pixel_step_x2]"r"((mips_reg)(src_pixel_step<<1)),
- [ff_pb_fe]"f"(ff_pb_fe), [ff_pb_80]"f"(ff_pb_80),
- [ff_pb_04]"f"(ff_pb_04), [ff_pb_01]"f"(ff_pb_01)
+ [src_pixel_step_x2]"r"((mips_reg)(src_pixel_step<<1))
: "memory"
);
+ /* clang-format on */
}
void vp8_loop_filter_simple_vertical_edge_mmi(unsigned char *src_ptr,
int src_pixel_step,
const unsigned char *blimit) {
- uint32_t tmp[1], count = 2;
+ uint64_t tmp[1], count = 2;
mips_reg addr[2];
- DECLARE_ALIGNED(8, const uint64_t, srct[1]);
- double ftmp[12];
+ DECLARE_ALIGNED(8, const uint64_t, srct[2]);
+ double ftmp[12], ff_pb_fe, ff_pb_80, ff_pb_04, ff_pb_01;
+ /* clang-format off */
__asm__ volatile (
- "li %[tmp0], 0x08 \n\t"
- "mtc1 %[tmp0], %[ftmp8] \n\t"
- "li %[tmp0], 0x20 \n\t"
- "mtc1 %[tmp0], %[ftmp10] \n\t"
-
+ "dli %[tmp0], 0x08 \n\t"
+ "dmtc1 %[tmp0], %[ftmp8] \n\t"
+ "dli %[tmp0], 0x20 \n\t"
+ "dmtc1 %[tmp0], %[ftmp10] \n\t"
+ "dli %[tmp0], 0x08 \n\t"
+ "dmtc1 %[tmp0], %[ftmp8] \n\t"
+ "dli %[tmp0], 0x20 \n\t"
+ "dmtc1 %[tmp0], %[ftmp10] \n\t"
+ "dli %[tmp0], 0xfefefefefefefefe \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_fe] \n\t"
+ "dli %[tmp0], 0x8080808080808080 \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_80] \n\t"
+ "dli %[tmp0], 0x0404040404040404 \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_04] \n\t"
+ "dli %[tmp0], 0x0101010101010101 \n\t"
+ "dmtc1 %[tmp0], %[ff_pb_01] \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step_x4])
MMI_SUBU(%[src_ptr], %[src_ptr], 0x02)
@@ -1118,8 +1195,8 @@ void vp8_loop_filter_simple_vertical_edge_mmi(unsigned char *src_ptr,
"punpckhwd %[ftmp3], %[ftmp2], %[ftmp5] \n\t"
"punpcklwd %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
- "li %[tmp0], 0x01 \n\t"
- "mtc1 %[tmp0], %[ftmp9] \n\t"
+ "dli %[tmp0], 0x01 \n\t"
+ "dmtc1 %[tmp0], %[ftmp9] \n\t"
"pasubub %[ftmp6], %[ftmp3], %[ftmp0] \n\t"
"pand %[ftmp6], %[ftmp6], %[ff_pb_fe] \n\t"
"psrlh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
@@ -1149,14 +1226,14 @@ void vp8_loop_filter_simple_vertical_edge_mmi(unsigned char *src_ptr,
"pand %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
"paddsb %[ftmp5], %[ftmp5], %[ff_pb_04] \n\t"
- "li %[tmp0], 0x03 \n\t"
- "mtc1 %[tmp0], %[ftmp9] \n\t"
+ "dli %[tmp0], 0x03 \n\t"
+ "dmtc1 %[tmp0], %[ftmp9] \n\t"
"psllh %[ftmp0], %[ftmp5], %[ftmp8] \n\t"
"psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
"psrlh %[ftmp0], %[ftmp0], %[ftmp8] \n\t"
- "li %[tmp0], 0x0b \n\t"
- "mtc1 %[tmp0], %[ftmp9] \n\t"
+ "dli %[tmp0], 0x0b \n\t"
+ "dmtc1 %[tmp0], %[ftmp9] \n\t"
"psrah %[ftmp7], %[ftmp5], %[ftmp9] \n\t"
"psllh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
"por %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
@@ -1164,14 +1241,14 @@ void vp8_loop_filter_simple_vertical_edge_mmi(unsigned char *src_ptr,
"pxor %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t"
"psubsb %[ftmp5], %[ftmp5], %[ff_pb_01] \n\t"
- "li %[tmp0], 0x03 \n\t"
- "mtc1 %[tmp0], %[ftmp9] \n\t"
+ "dli %[tmp0], 0x03 \n\t"
+ "dmtc1 %[tmp0], %[ftmp9] \n\t"
"psllh %[ftmp0], %[ftmp5], %[ftmp8] \n\t"
"psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
"psrlh %[ftmp0], %[ftmp0], %[ftmp8] \n\t"
- "li %[tmp0], 0x0b \n\t"
- "mtc1 %[tmp0], %[ftmp9] \n\t"
+ "dli %[tmp0], 0x0b \n\t"
+ "dmtc1 %[tmp0], %[ftmp9] \n\t"
"psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
"psllh %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
"por %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
@@ -1235,16 +1312,17 @@ void vp8_loop_filter_simple_vertical_edge_mmi(unsigned char *src_ptr,
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[tmp0]"=&r"(tmp[0]),
[addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
- [src_ptr]"+&r"(src_ptr), [count]"+&r"(count)
+ [src_ptr]"+&r"(src_ptr), [count]"+&r"(count),
+ [ff_pb_fe]"=&f"(ff_pb_fe), [ff_pb_80]"=&f"(ff_pb_80),
+ [ff_pb_04]"=&f"(ff_pb_04), [ff_pb_01]"=&f"(ff_pb_01)
: [blimit]"r"(blimit), [srct]"r"(srct),
[src_pixel_step]"r"((mips_reg)src_pixel_step),
[src_pixel_step_x2]"r"((mips_reg)(src_pixel_step<<1)),
[src_pixel_step_x4]"r"((mips_reg)(src_pixel_step<<2)),
- [src_pixel_step_x8]"r"((mips_reg)(src_pixel_step<<3)),
- [ff_pb_fe]"f"(ff_pb_fe), [ff_pb_80]"f"(ff_pb_80),
- [ff_pb_04]"f"(ff_pb_04), [ff_pb_01]"f"(ff_pb_01)
+ [src_pixel_step_x8]"r"((mips_reg)(src_pixel_step<<3))
: "memory"
);
+ /* clang-format on */
}
/* Horizontal MB filtering */
diff --git a/vp8/common/mips/mmi/sixtap_filter_mmi.c b/vp8/common/mips/mmi/sixtap_filter_mmi.c
index dbe35d09f..b85f73fdf 100644
--- a/vp8/common/mips/mmi/sixtap_filter_mmi.c
+++ b/vp8/common/mips/mmi/sixtap_filter_mmi.c
@@ -70,9 +70,8 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
unsigned int output_height,
unsigned int output_width,
const int16_t *vp8_filter) {
- uint32_t tmp[1];
- DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL };
-
+ uint64_t tmp[1];
+ double ff_ph_40;
#if _MIPS_SIM == _ABIO32
register double fzero asm("$f0");
register double ftmp0 asm("$f2");
@@ -103,7 +102,10 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
register double ftmp11 asm("$f12");
#endif // _MIPS_SIM == _ABIO32
+ /* clang-format off */
__asm__ volatile (
+ "dli %[tmp0], 0x0040004000400040 \n\t"
+ "dmtc1 %[tmp0], %[ff_ph_40] \n\t"
"ldc1 %[ftmp0], 0x00(%[vp8_filter]) \n\t"
"ldc1 %[ftmp1], 0x10(%[vp8_filter]) \n\t"
"ldc1 %[ftmp2], 0x20(%[vp8_filter]) \n\t"
@@ -111,10 +113,10 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
"ldc1 %[ftmp4], 0x40(%[vp8_filter]) \n\t"
"ldc1 %[ftmp5], 0x50(%[vp8_filter]) \n\t"
"pxor %[fzero], %[fzero], %[fzero] \n\t"
- "li %[tmp0], 0x07 \n\t"
- "mtc1 %[tmp0], %[ftmp7] \n\t"
- "li %[tmp0], 0x08 \n\t"
- "mtc1 %[tmp0], %[ftmp11] \n\t"
+ "dli %[tmp0], 0x07 \n\t"
+ "dmtc1 %[tmp0], %[ftmp7] \n\t"
+ "dli %[tmp0], 0x08 \n\t"
+ "dmtc1 %[tmp0], %[ftmp11] \n\t"
"1: \n\t"
"gsldlc1 %[ftmp9], 0x05(%[src_ptr]) \n\t"
@@ -166,21 +168,22 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
[ftmp9]"=&f"(ftmp9), [ftmp10]"=&f"(ftmp10),
[ftmp11]"=&f"(ftmp11), [tmp0]"=&r"(tmp[0]),
[output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height),
- [src_ptr]"+&r"(src_ptr)
+ [src_ptr]"+&r"(src_ptr), [ff_ph_40]"=&f"(ff_ph_40)
: [src_pixels_per_line]"r"((mips_reg)src_pixels_per_line),
- [vp8_filter]"r"(vp8_filter), [output_width]"r"(output_width),
- [ff_ph_40]"f"(ff_ph_40)
+ [vp8_filter]"r"(vp8_filter), [output_width]"r"(output_width)
: "memory"
);
+ /* clang-format on */
}
/* Horizontal filter: pixel_step is always W */
static INLINE void vp8_filter_block1dc_v6_mmi(
uint16_t *src_ptr, unsigned char *output_ptr, unsigned int output_height,
int output_pitch, unsigned int pixels_per_line, const int16_t *vp8_filter) {
- DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL };
- uint32_t tmp[1];
+ double ff_ph_40;
+ uint64_t tmp[1];
mips_reg addr[1];
+
#if _MIPS_SIM == _ABIO32
register double fzero asm("$f0");
register double ftmp0 asm("$f2");
@@ -215,7 +218,10 @@ static INLINE void vp8_filter_block1dc_v6_mmi(
register double ftmp13 asm("$f14");
#endif // _MIPS_SIM == _ABIO32
+ /* clang-format off */
__asm__ volatile (
+ "dli %[tmp0], 0x0040004000400040 \n\t"
+ "dmtc1 %[tmp0], %[ff_ph_40] \n\t"
"ldc1 %[ftmp0], 0x00(%[vp8_filter]) \n\t"
"ldc1 %[ftmp1], 0x10(%[vp8_filter]) \n\t"
"ldc1 %[ftmp2], 0x20(%[vp8_filter]) \n\t"
@@ -223,8 +229,8 @@ static INLINE void vp8_filter_block1dc_v6_mmi(
"ldc1 %[ftmp4], 0x40(%[vp8_filter]) \n\t"
"ldc1 %[ftmp5], 0x50(%[vp8_filter]) \n\t"
"pxor %[fzero], %[fzero], %[fzero] \n\t"
- "li %[tmp0], 0x07 \n\t"
- "mtc1 %[tmp0], %[ftmp13] \n\t"
+ "dli %[tmp0], 0x07 \n\t"
+ "dmtc1 %[tmp0], %[ftmp13] \n\t"
/* In order to make full use of memory load delay slot,
* Operation of memory loading and calculating has been rearranged.
@@ -285,15 +291,16 @@ static INLINE void vp8_filter_block1dc_v6_mmi(
[ftmp11]"=&f"(ftmp11), [ftmp12]"=&f"(ftmp12),
[ftmp13]"=&f"(ftmp13), [tmp0]"=&r"(tmp[0]),
[addr0]"=&r"(addr[0]), [src_ptr]"+&r"(src_ptr),
- [output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height)
+ [output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height),
+ [ff_ph_40]"=&f"(ff_ph_40)
: [pixels_per_line]"r"((mips_reg)pixels_per_line),
[pixels_per_line_x2]"r"((mips_reg)(pixels_per_line<<1)),
[pixels_per_line_x4]"r"((mips_reg)(pixels_per_line<<2)),
[vp8_filter]"r"(vp8_filter),
- [output_pitch]"r"((mips_reg)output_pitch),
- [ff_ph_40]"f"(ff_ph_40)
+ [output_pitch]"r"((mips_reg)output_pitch)
: "memory"
);
+ /* clang-format on */
}
/* When xoffset == 0, vp8_filter= {0,0,128,0,0,0},
@@ -313,6 +320,7 @@ static INLINE void vp8_filter_block1d_h6_filter0_mmi(
register double ftmp1 asm("$f2");
#endif // _MIPS_SIM == _ABIO32
+ /* clang-format off */
__asm__ volatile (
"pxor %[fzero], %[fzero], %[fzero] \n\t"
@@ -335,6 +343,7 @@ static INLINE void vp8_filter_block1d_h6_filter0_mmi(
[output_width]"r"(output_width)
: "memory"
);
+ /* clang-format on */
}
static INLINE void vp8_filter_block1dc_v6_filter0_mmi(
@@ -350,6 +359,7 @@ static INLINE void vp8_filter_block1dc_v6_filter0_mmi(
register double ftmp1 asm("$f2");
#endif // _MIPS_SIM == _ABIO32
+ /* clang-format on */
__asm__ volatile (
"pxor %[fzero], %[fzero], %[fzero] \n\t"
@@ -371,6 +381,7 @@ static INLINE void vp8_filter_block1dc_v6_filter0_mmi(
[output_pitch]"r"((mips_reg)output_pitch)
: "memory"
);
+ /* clang-format on */
}
#define sixtapNxM(n, m) \
diff --git a/vp8/encoder/mips/mmi/dct_mmi.c b/vp8/encoder/mips/mmi/dct_mmi.c
index b5ecf0f1c..0fd25fcda 100644
--- a/vp8/encoder/mips/mmi/dct_mmi.c
+++ b/vp8/encoder/mips/mmi/dct_mmi.c
@@ -46,6 +46,7 @@
void vp8_short_fdct4x4_mmi(int16_t *input, int16_t *output, int pitch) {
uint64_t tmp[1];
int16_t *ip = input;
+ double ff_ph_op1, ff_ph_op3;
#if _MIPS_SIM == _ABIO32
register double ftmp0 asm("$f0");
@@ -83,13 +84,16 @@ void vp8_short_fdct4x4_mmi(int16_t *input, int16_t *output, int pitch) {
DECLARE_ALIGNED(8, const uint64_t, ff_pw_51000) = { 0x0000c7380000c738ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pw_14500) = { 0x000038a4000038a4ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pw_7500) = { 0x00001d4c00001d4cULL };
- DECLARE_ALIGNED(8, const uint64_t, ff_ph_op1) = { 0x14e808a914e808a9ULL };
- DECLARE_ALIGNED(8, const uint64_t, ff_ph_op3) = { 0xeb1808a9eb1808a9ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pw_5352) = { 0x000014e8000014e8ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pw_2217) = { 0x000008a9000008a9ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_ph_8) = { 0x0008000800080008ULL };
+ /* clang-format off */
__asm__ volatile (
+ "dli %[tmp0], 0x14e808a914e808a9 \n\t"
+ "dmtc1 %[tmp0], %[ff_ph_op1] \n\t"
+ "dli %[tmp0], 0xeb1808a9eb1808a9 \n\t"
+ "dmtc1 %[tmp0], %[ff_ph_op3] \n\t"
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t"
"gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t"
@@ -129,7 +133,7 @@ void vp8_short_fdct4x4_mmi(int16_t *input, int16_t *output, int pitch) {
// op[1] = (c1 * 2217 + d1 * 5352 + 14500) >> 12
MMI_LI(%[tmp0], 0x0c)
- "mtc1 %[tmp0], %[ftmp11] \n\t"
+ "dmtc1 %[tmp0], %[ftmp11] \n\t"
"ldc1 %[ftmp12], %[ff_pw_14500] \n\t"
"punpcklhw %[ftmp9], %[ftmp7], %[ftmp8] \n\t"
"pmaddhw %[ftmp5], %[ftmp9], %[ff_ph_op1] \n\t"
@@ -169,7 +173,7 @@ void vp8_short_fdct4x4_mmi(int16_t *input, int16_t *output, int pitch) {
"paddh %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"paddh %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
MMI_LI(%[tmp0], 0x04)
- "mtc1 %[tmp0], %[ftmp9] \n\t"
+ "dmtc1 %[tmp0], %[ftmp9] \n\t"
"psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"psrah %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
@@ -211,15 +215,16 @@ void vp8_short_fdct4x4_mmi(int16_t *input, int16_t *output, int pitch) {
[ftmp3] "=&f"(ftmp3), [ftmp4] "=&f"(ftmp4), [ftmp5] "=&f"(ftmp5),
[ftmp6] "=&f"(ftmp6), [ftmp7] "=&f"(ftmp7), [ftmp8] "=&f"(ftmp8),
[ftmp9] "=&f"(ftmp9), [ftmp10] "=&f"(ftmp10), [ftmp11] "=&f"(ftmp11),
- [ftmp12] "=&f"(ftmp12), [tmp0] "=&r"(tmp[0]), [ip]"+&r"(ip)
+ [ftmp12] "=&f"(ftmp12), [tmp0] "=&r"(tmp[0]), [ip]"+&r"(ip),
+ [ff_ph_op1] "=&f"(ff_ph_op1), [ff_ph_op3] "=&f"(ff_ph_op3)
: [ff_ph_01] "m"(ff_ph_01), [ff_ph_07] "m"(ff_ph_07),
- [ff_ph_op1] "f"(ff_ph_op1), [ff_ph_op3] "f"(ff_ph_op3),
[ff_pw_14500] "m"(ff_pw_14500), [ff_pw_7500] "m"(ff_pw_7500),
[ff_pw_12000] "m"(ff_pw_12000), [ff_pw_51000] "m"(ff_pw_51000),
[ff_pw_5352]"m"(ff_pw_5352), [ff_pw_2217]"m"(ff_pw_2217),
[ff_ph_8]"m"(ff_ph_8), [pitch]"r"(pitch), [output] "r"(output)
: "memory"
);
+ /* clang-format on */
}
void vp8_short_fdct8x4_mmi(int16_t *input, int16_t *output, int pitch) {
@@ -228,17 +233,22 @@ void vp8_short_fdct8x4_mmi(int16_t *input, int16_t *output, int pitch) {
}
void vp8_short_walsh4x4_mmi(int16_t *input, int16_t *output, int pitch) {
- double ftmp[13];
- uint32_t tmp[1];
- DECLARE_ALIGNED(8, const uint64_t, ff_ph_01) = { 0x0001000100010001ULL };
- DECLARE_ALIGNED(8, const uint64_t, ff_pw_01) = { 0x0000000100000001ULL };
- DECLARE_ALIGNED(8, const uint64_t, ff_pw_03) = { 0x0000000300000003ULL };
- DECLARE_ALIGNED(8, const uint64_t, ff_pw_mask) = { 0x0001000000010000ULL };
+ double ftmp[13], ff_ph_01, ff_pw_01, ff_pw_03, ff_pw_mask;
+ uint64_t tmp[1];
+ /* clang-format off */
__asm__ volatile (
+ "dli %[tmp0], 0x0001000100010001 \n\t"
+ "dmtc1 %[tmp0], %[ff_ph_01] \n\t"
+ "dli %[tmp0], 0x0000000100000001 \n\t"
+ "dmtc1 %[tmp0], %[ff_pw_01] \n\t"
+ "dli %[tmp0], 0x0000000300000003 \n\t"
+ "dmtc1 %[tmp0], %[ff_pw_03] \n\t"
+ "dli %[tmp0], 0x0001000000010000 \n\t"
+ "dmtc1 %[tmp0], %[ff_pw_mask] \n\t"
MMI_LI(%[tmp0], 0x02)
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
- "mtc1 %[tmp0], %[ftmp11] \n\t"
+ "dmtc1 %[tmp0], %[ftmp11] \n\t"
"gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t"
"gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t"
@@ -337,7 +347,7 @@ void vp8_short_walsh4x4_mmi(int16_t *input, int16_t *output, int pitch) {
"psubw %[ftmp4], %[ftmp9], %[ftmp10] \n\t"
MMI_LI(%[tmp0], 0x03)
- "mtc1 %[tmp0], %[ftmp11] \n\t"
+ "dmtc1 %[tmp0], %[ftmp11] \n\t"
"pcmpgtw %[ftmp9], %[ftmp0], %[ftmp1] \n\t"
"pand %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t"
@@ -393,7 +403,7 @@ void vp8_short_walsh4x4_mmi(int16_t *input, int16_t *output, int pitch) {
"packsswh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
MMI_LI(%[tmp0], 0x72)
- "mtc1 %[tmp0], %[ftmp11] \n\t"
+ "dmtc1 %[tmp0], %[ftmp11] \n\t"
"pshufh %[ftmp1], %[ftmp1], %[ftmp11] \n\t"
"pshufh %[ftmp2], %[ftmp2], %[ftmp11] \n\t"
"pshufh %[ftmp3], %[ftmp3], %[ftmp11] \n\t"
@@ -413,13 +423,12 @@ void vp8_short_walsh4x4_mmi(int16_t *input, int16_t *output, int pitch) {
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
- [ftmp12]"=&f"(ftmp[12]),
- [tmp0]"=&r"(tmp[0]),
- [ip]"+&r"(input)
- : [op]"r"(output),
- [ff_pw_01]"f"(ff_pw_01), [pitch]"r"((mips_reg)pitch),
- [ff_pw_03]"f"(ff_pw_03), [ff_pw_mask]"f"(ff_pw_mask),
- [ff_ph_01]"f"(ff_ph_01)
+ [ftmp12]"=&f"(ftmp[12]), [ff_pw_mask]"=&f"(ff_pw_mask),
+ [tmp0]"=&r"(tmp[0]), [ff_pw_01]"=&f"(ff_pw_01),
+ [ip]"+&r"(input), [ff_pw_03]"=&f"(ff_pw_03),
+ [ff_ph_01]"=&f"(ff_ph_01)
+ : [op]"r"(output), [pitch]"r"((mips_reg)pitch)
: "memory"
);
+ /* clang-format on */
}
diff --git a/vp8/encoder/mips/mmi/vp8_quantize_mmi.c b/vp8/encoder/mips/mmi/vp8_quantize_mmi.c
index 69a9e5e01..1986444aa 100644
--- a/vp8/encoder/mips/mmi/vp8_quantize_mmi.c
+++ b/vp8/encoder/mips/mmi/vp8_quantize_mmi.c
@@ -42,16 +42,17 @@ void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
double ftmp[13];
uint64_t tmp[1];
- DECLARE_ALIGNED(8, const uint64_t, ones) = { 0xffffffffffffffffULL };
- int eob = 0;
+ int64_t eob = 0;
+ double ones;
__asm__ volatile(
// loop 0 ~ 7
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
+ "pcmpeqh %[ones], %[ones], %[ones] \n\t"
"gsldlc1 %[ftmp1], 0x07(%[coeff_ptr]) \n\t"
"gsldrc1 %[ftmp1], 0x00(%[coeff_ptr]) \n\t"
- "li %[tmp0], 0x0f \n\t"
- "mtc1 %[tmp0], %[ftmp9] \n\t"
+ "dli %[tmp0], 0x0f \n\t"
+ "dmtc1 %[tmp0], %[ftmp9] \n\t"
"gsldlc1 %[ftmp2], 0x0f(%[coeff_ptr]) \n\t"
"gsldrc1 %[ftmp2], 0x08(%[coeff_ptr]) \n\t"
@@ -165,18 +166,18 @@ void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
"gssdlc1 %[ftmp6], 0x1f(%[dqcoeff_ptr]) \n\t"
"gssdrc1 %[ftmp6], 0x18(%[dqcoeff_ptr]) \n\t"
- "li %[tmp0], 0x10 \n\t"
- "mtc1 %[tmp0], %[ftmp9] \n\t"
+ "dli %[tmp0], 0x10 \n\t"
+ "dmtc1 %[tmp0], %[ftmp9] \n\t"
"pmaxsh %[ftmp10], %[ftmp10], %[ftmp11] \n\t"
"psrlw %[ftmp11], %[ftmp10], %[ftmp9] \n\t"
"pmaxsh %[ftmp10], %[ftmp10], %[ftmp11] \n\t"
- "li %[tmp0], 0xaa \n\t"
- "mtc1 %[tmp0], %[ftmp9] \n\t"
+ "dli %[tmp0], 0xaa \n\t"
+ "dmtc1 %[tmp0], %[ftmp9] \n\t"
"pshufh %[ftmp11], %[ftmp10], %[ftmp9] \n\t"
"pmaxsh %[ftmp10], %[ftmp10], %[ftmp11] \n\t"
- "li %[tmp0], 0xffff \n\t"
- "mtc1 %[tmp0], %[ftmp9] \n\t"
+ "dli %[tmp0], 0xffff \n\t"
+ "dmtc1 %[tmp0], %[ftmp9] \n\t"
"pand %[ftmp10], %[ftmp10], %[ftmp9] \n\t"
"gssdlc1 %[ftmp10], 0x07(%[eob]) \n\t"
"gssdrc1 %[ftmp10], 0x00(%[eob]) \n\t"
@@ -184,15 +185,15 @@ void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
[ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
[ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]), [ftmp8] "=&f"(ftmp[8]),
[ftmp9] "=&f"(ftmp[9]), [ftmp10] "=&f"(ftmp[10]),
- [ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
+ [ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]),
+ [tmp0] "=&r"(tmp[0]), [ones] "=&f"(ones)
: [coeff_ptr] "r"((mips_reg)coeff_ptr),
[qcoeff_ptr] "r"((mips_reg)qcoeff_ptr),
[dequant_ptr] "r"((mips_reg)dequant_ptr),
[round_ptr] "r"((mips_reg)round_ptr),
[quant_ptr] "r"((mips_reg)quant_ptr),
[dqcoeff_ptr] "r"((mips_reg)dqcoeff_ptr),
- [inv_zig_zag] "r"((mips_reg)inv_zig_zag), [eob] "r"((mips_reg)&eob),
- [ones] "f"(ones)
+ [inv_zig_zag] "r"((mips_reg)inv_zig_zag), [eob] "r"((mips_reg)&eob)
: "memory");
*d->eob = eob;
diff --git a/vpx_dsp/mips/sad_mmi.c b/vpx_dsp/mips/sad_mmi.c
index 5dee3164b..eaca4773f 100644
--- a/vpx_dsp/mips/sad_mmi.c
+++ b/vpx_dsp/mips/sad_mmi.c
@@ -364,6 +364,7 @@ static inline unsigned int vpx_sad64x(const uint8_t *src, int src_stride,
double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
mips_reg l_counter = counter;
+ /* clang-format off */
__asm__ volatile (
"pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
"1: \n\t"
@@ -383,6 +384,7 @@ static inline unsigned int vpx_sad64x(const uint8_t *src, int src_stride,
: [src_stride]"r"((mips_reg)src_stride),
[ref_stride]"r"((mips_reg)ref_stride)
);
+ /* clang-format on */
return sad;
}
@@ -405,7 +407,9 @@ static inline unsigned int vpx_sad_avg64x(const uint8_t *src, int src_stride,
unsigned int sad;
double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
mips_reg l_counter = counter;
+ mips_reg l_second_pred = (mips_reg)second_pred;
+ /* clang-format off */
__asm__ volatile (
"pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
"1: \n\t"
@@ -424,11 +428,12 @@ static inline unsigned int vpx_sad_avg64x(const uint8_t *src, int src_stride,
: [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
[ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
[src]"+&r"(src), [ref]"+&r"(ref),
- [second_pred]"+&r"((mips_reg)second_pred),
+ [second_pred]"+&r"(l_second_pred),
[sad]"=&r"(sad)
: [src_stride]"r"((mips_reg)src_stride),
[ref_stride]"r"((mips_reg)ref_stride)
);
+ /* clang-format on */
return sad;
}
@@ -450,6 +455,7 @@ static inline unsigned int vpx_sad32x(const uint8_t *src, int src_stride,
double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
mips_reg l_counter = counter;
+ /* clang-format off */
__asm__ volatile (
"pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
"1: \n\t"
@@ -469,6 +475,7 @@ static inline unsigned int vpx_sad32x(const uint8_t *src, int src_stride,
: [src_stride]"r"((mips_reg)src_stride),
[ref_stride]"r"((mips_reg)ref_stride)
);
+ /* clang-format on */
return sad;
}
@@ -493,7 +500,9 @@ static inline unsigned int vpx_sad_avg32x(const uint8_t *src, int src_stride,
unsigned int sad;
double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
mips_reg l_counter = counter;
+ mips_reg l_second_pred = (mips_reg)second_pred;
+ /* clang-format off */
__asm__ volatile (
"pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
"1: \n\t"
@@ -512,11 +521,12 @@ static inline unsigned int vpx_sad_avg32x(const uint8_t *src, int src_stride,
: [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
[ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
[src]"+&r"(src), [ref]"+&r"(ref),
- [second_pred]"+&r"((mips_reg)second_pred),
+ [second_pred]"+&r"(l_second_pred),
[sad]"=&r"(sad)
: [src_stride]"r"((mips_reg)src_stride),
[ref_stride]"r"((mips_reg)ref_stride)
);
+ /* clang-format on */
return sad;
}
@@ -539,6 +549,7 @@ static inline unsigned int vpx_sad16x(const uint8_t *src, int src_stride,
double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
mips_reg l_counter = counter;
+ /* clang-format off */
__asm__ volatile (
"pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
"1: \n\t"
@@ -558,6 +569,7 @@ static inline unsigned int vpx_sad16x(const uint8_t *src, int src_stride,
: [src_stride]"r"((mips_reg)src_stride),
[ref_stride]"r"((mips_reg)ref_stride)
);
+ /* clang-format on */
return sad;
}
@@ -586,7 +598,9 @@ static inline unsigned int vpx_sad_avg16x(const uint8_t *src, int src_stride,
unsigned int sad;
double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
mips_reg l_counter = counter;
+ mips_reg l_second_pred = (mips_reg)second_pred;
+ /* clang-format off */
__asm__ volatile (
"pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
"1: \n\t"
@@ -605,11 +619,12 @@ static inline unsigned int vpx_sad_avg16x(const uint8_t *src, int src_stride,
: [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
[ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
[src]"+&r"(src), [ref]"+&r"(ref),
- [second_pred]"+&r"((mips_reg)second_pred),
+ [second_pred]"+&r"(l_second_pred),
[sad]"=&r"(sad)
: [src_stride]"r"((mips_reg)src_stride),
[ref_stride]"r"((mips_reg)ref_stride)
);
+ /* clang-format on */
return sad;
}
@@ -632,6 +647,7 @@ static inline unsigned int vpx_sad8x(const uint8_t *src, int src_stride,
double ftmp1, ftmp2, ftmp3;
mips_reg l_counter = counter;
+ /* clang-format off */
__asm__ volatile (
"pxor %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
"1: \n\t"
@@ -651,6 +667,7 @@ static inline unsigned int vpx_sad8x(const uint8_t *src, int src_stride,
: [src_stride]"r"((mips_reg)src_stride),
[ref_stride]"r"((mips_reg)ref_stride)
);
+ /* clang-format on */
return sad;
}
@@ -679,7 +696,9 @@ static inline unsigned int vpx_sad_avg8x(const uint8_t *src, int src_stride,
unsigned int sad;
double ftmp1, ftmp2, ftmp3;
mips_reg l_counter = counter;
+ mips_reg l_second_pred = (mips_reg)second_pred;
+ /* clang-format off */
__asm__ volatile (
"pxor %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
"1: \n\t"
@@ -697,11 +716,12 @@ static inline unsigned int vpx_sad_avg8x(const uint8_t *src, int src_stride,
"mfc1 %[sad], %[ftmp3] \n\t"
: [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
[counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
- [second_pred]"+&r"((mips_reg)second_pred),
+ [second_pred]"+&r"(l_second_pred),
[sad]"=&r"(sad)
: [src_stride]"r"((mips_reg)src_stride),
[ref_stride]"r"((mips_reg)ref_stride)
);
+ /* clang-format on */
return sad;
}
@@ -724,6 +744,7 @@ static inline unsigned int vpx_sad4x(const uint8_t *src, int src_stride,
double ftmp1, ftmp2, ftmp3;
mips_reg l_counter = counter;
+ /* clang-format off */
__asm__ volatile (
"pxor %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
"1: \n\t"
@@ -743,6 +764,7 @@ static inline unsigned int vpx_sad4x(const uint8_t *src, int src_stride,
: [src_stride]"r"((mips_reg)src_stride),
[ref_stride]"r"((mips_reg)ref_stride)
);
+ /* clang-format on */
return sad;
}
@@ -767,7 +789,9 @@ static inline unsigned int vpx_sad_avg4x(const uint8_t *src, int src_stride,
unsigned int sad;
double ftmp1, ftmp2, ftmp3;
mips_reg l_counter = counter;
+ mips_reg l_second_pred = (mips_reg)second_pred;
+ /* clang-format off */
__asm__ volatile (
"pxor %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
"1: \n\t"
@@ -785,11 +809,12 @@ static inline unsigned int vpx_sad_avg4x(const uint8_t *src, int src_stride,
"mfc1 %[sad], %[ftmp3] \n\t"
: [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
[counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
- [second_pred]"+&r"((mips_reg)second_pred),
+ [second_pred]"+&r"(l_second_pred),
[sad]"=&r"(sad)
: [src_stride]"r"((mips_reg)src_stride),
[ref_stride]"r"((mips_reg)ref_stride)
);
+ /* clang-format on */
return sad;
}
diff --git a/vpx_dsp/mips/variance_mmi.c b/vpx_dsp/mips/variance_mmi.c
index 29e52a1a8..c2adcfa01 100644
--- a/vpx_dsp/mips/variance_mmi.c
+++ b/vpx_dsp/mips/variance_mmi.c
@@ -414,6 +414,7 @@ static inline uint32_t vpx_variance64x(const uint8_t *src_ptr, int src_stride,
*sse = 0;
+ /* clang-format off */
__asm__ volatile (
"li %[tmp0], 0x20 \n\t"
"mtc1 %[tmp0], %[ftmp11] \n\t"
@@ -496,6 +497,7 @@ static inline uint32_t vpx_variance64x(const uint8_t *src_ptr, int src_stride,
[high]"r"(&high), [sse]"r"(sse)
: "memory"
);
+ /* clang-format on */
return *sse - (((int64_t)sum * sum) / (64 * high));
}
@@ -519,6 +521,7 @@ uint32_t vpx_variance32x64_mmi(const uint8_t *src_ptr, int src_stride,
*sse = 0;
+ /* clang-format off */
__asm__ volatile (
"li %[tmp0], 0x20 \n\t"
"mtc1 %[tmp0], %[ftmp11] \n\t"
@@ -577,6 +580,7 @@ uint32_t vpx_variance32x64_mmi(const uint8_t *src_ptr, int src_stride,
[sse]"r"(sse)
: "memory"
);
+ /* clang-format on */
return *sse - (((int64_t)sum * sum) / 2048);
}
@@ -590,6 +594,7 @@ static inline uint32_t vpx_variance32x(const uint8_t *src_ptr, int src_stride,
*sse = 0;
+ /* clang-format off */
__asm__ volatile (
"li %[tmp0], 0x20 \n\t"
"mtc1 %[tmp0], %[ftmp11] \n\t"
@@ -653,6 +658,7 @@ static inline uint32_t vpx_variance32x(const uint8_t *src_ptr, int src_stride,
[high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum)
: "memory"
);
+ /* clang-format on */
return *sse - (((int64_t)sum * sum) / (32 * high));
}
@@ -676,6 +682,7 @@ static inline uint32_t vpx_variance16x(const uint8_t *src_ptr, int src_stride,
*sse = 0;
+ /* clang-format off */
__asm__ volatile (
"li %[tmp0], 0x20 \n\t"
"mtc1 %[tmp0], %[ftmp11] \n\t"
@@ -729,6 +736,7 @@ static inline uint32_t vpx_variance16x(const uint8_t *src_ptr, int src_stride,
[high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum)
: "memory"
);
+ /* clang-format on */
return *sse - (((int64_t)sum * sum) / (16 * high));
}
@@ -753,6 +761,7 @@ static inline uint32_t vpx_variance8x(const uint8_t *src_ptr, int src_stride,
*sse = 0;
+ /* clang-format off */
__asm__ volatile (
"li %[tmp0], 0x20 \n\t"
"mtc1 %[tmp0], %[ftmp11] \n\t"
@@ -801,6 +810,7 @@ static inline uint32_t vpx_variance8x(const uint8_t *src_ptr, int src_stride,
[high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum)
: "memory"
);
+ /* clang-format on */
return *sse - (((int64_t)sum * sum) / (8 * high));
}
@@ -825,6 +835,7 @@ static inline uint32_t vpx_variance4x(const uint8_t *src_ptr, int src_stride,
*sse = 0;
+ /* clang-format off */
__asm__ volatile (
"li %[tmp0], 0x20 \n\t"
"mtc1 %[tmp0], %[ftmp10] \n\t"
@@ -872,6 +883,7 @@ static inline uint32_t vpx_variance4x(const uint8_t *src_ptr, int src_stride,
[high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum)
: "memory"
);
+ /* clang-format on */
return *sse - (((int64_t)sum * sum) / (4 * high));
}
@@ -894,6 +906,7 @@ static inline uint32_t vpx_mse16x(const uint8_t *src_ptr, int src_stride,
*sse = 0;
+ /* clang-format off */
__asm__ volatile (
"li %[tmp0], 0x20 \n\t"
"mtc1 %[tmp0], %[ftmp11] \n\t"
@@ -925,6 +938,7 @@ static inline uint32_t vpx_mse16x(const uint8_t *src_ptr, int src_stride,
[high]"r"(&high), [sse]"r"(sse)
: "memory"
);
+ /* clang-format on */
return *sse;
}
@@ -947,6 +961,7 @@ static inline uint32_t vpx_mse8x(const uint8_t *src_ptr, int src_stride,
*sse = 0;
+ /* clang-format off */
__asm__ volatile (
"li %[tmp0], 0x20 \n\t"
"mtc1 %[tmp0], %[ftmp11] \n\t"
@@ -978,6 +993,7 @@ static inline uint32_t vpx_mse8x(const uint8_t *src_ptr, int src_stride,
[high]"r"(&high), [sse]"r"(sse)
: "memory"
);
+ /* clang-format on */
return *sse;
}
@@ -1021,22 +1037,39 @@ static inline void var_filter_block2d_bil_16x(const uint8_t *src_ptr,
uint8_t *temp2_ptr = temp2;
mips_reg l_counter = counter;
double ftmp[15];
+ double ff_ph_40, mask;
+ double filter_x0, filter_x1, filter_y0, filter_y1;
mips_reg tmp[2];
- DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL };
- DECLARE_ALIGNED(8, const uint64_t, mask) = { 0x00ff00ff00ff00ffULL };
+ uint64_t x0, x1, y0, y1, all;
const uint8_t *filter_x = bilinear_filters[x_offset];
const uint8_t *filter_y = bilinear_filters[y_offset];
+ x0 = (uint64_t)filter_x[0];
+ x1 = (uint64_t)filter_x[1];
+ y0 = (uint64_t)filter_y[0];
+ y1 = (uint64_t)filter_y[1];
+ all = x0 | x1 << 8 | y0 << 16 | y1 << 24;
+ /* clang-format off */
__asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
+ MMI_MTC1(%[all], %[ftmp14])
+ "punpcklbh %[ftmp14], %[ftmp14], %[ftmp0] \n\t"
+ "pshufh %[filter_x0], %[ftmp14], %[ftmp0] \n\t"
+ MMI_LI(%[tmp0], 0x10)
+ MMI_MTC1(%[tmp0], %[mask])
+ "ssrld %[ftmp14], %[ftmp14], %[mask] \n\t"
+ "pshufh %[filter_x1], %[ftmp14], %[ftmp0] \n\t"
+ "ssrld %[ftmp14], %[ftmp14], %[mask] \n\t"
+ "pshufh %[filter_y0], %[ftmp14], %[ftmp0] \n\t"
+ "ssrld %[ftmp14], %[ftmp14], %[mask] \n\t"
+ "pshufh %[filter_y1], %[ftmp14], %[ftmp0] \n\t"
MMI_LI(%[tmp0], 0x07)
MMI_MTC1(%[tmp0], %[ftmp14])
- "pshufh %[filter_x0], %[filter_x0], %[ftmp0] \n\t"
- "pshufh %[filter_x1], %[filter_x1], %[ftmp0] \n\t"
- "pshufh %[filter_y0], %[filter_y0], %[ftmp0] \n\t"
- "pshufh %[filter_y1], %[filter_y1], %[ftmp0] \n\t"
-
+ MMI_LI(%[tmp0], 0x0040004000400040)
+ MMI_MTC1(%[tmp0], %[ff_ph_40])
+ MMI_LI(%[tmp0], 0x00ff00ff00ff00ff)
+ MMI_MTC1(%[tmp0], %[mask])
// fdata3: fdata3[0] ~ fdata3[15]
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_A
@@ -1072,15 +1105,13 @@ static inline void var_filter_block2d_bil_16x(const uint8_t *src_ptr,
[ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]),
[ftmp13] "=&f"(ftmp[13]), [ftmp14] "=&f"(ftmp[14]),
[tmp0] "=&r"(tmp[0]), [src_ptr] "+&r"(src_ptr), [temp2_ptr] "+&r"(temp2_ptr),
- [counter]"+&r"(l_counter)
- : [filter_x0] "f"((uint64_t)filter_x[0]),
- [filter_x1] "f"((uint64_t)filter_x[1]),
- [filter_y0] "f"((uint64_t)filter_y[0]),
- [filter_y1] "f"((uint64_t)filter_y[1]),
- [src_stride] "r"((mips_reg)src_stride), [ff_ph_40] "f"(ff_ph_40),
- [mask] "f"(mask)
+ [counter]"+&r"(l_counter), [ff_ph_40] "=&f"(ff_ph_40), [mask] "=&f"(mask),
+ [filter_x0] "=&f"(filter_x0), [filter_x1] "=&f"(filter_x1),
+ [filter_y0] "=&f"(filter_y0), [filter_y1] "=&f"(filter_y1)
+ : [src_stride] "r"((mips_reg)src_stride), [all] "r"(all)
: "memory"
);
+ /* clang-format on */
}
#define SUBPIX_VAR16XN(H) \
@@ -1105,19 +1136,38 @@ static inline void var_filter_block2d_bil_8x(const uint8_t *src_ptr,
mips_reg l_counter = counter;
double ftmp[15];
mips_reg tmp[2];
- DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL };
- DECLARE_ALIGNED(8, const uint64_t, mask) = { 0x00ff00ff00ff00ffULL };
+ double ff_ph_40, mask;
+ uint64_t x0, x1, y0, y1, all;
+ double filter_x0, filter_x1, filter_y0, filter_y1;
const uint8_t *filter_x = bilinear_filters[x_offset];
const uint8_t *filter_y = bilinear_filters[y_offset];
+ x0 = (uint64_t)filter_x[0];
+ x1 = (uint64_t)filter_x[1];
+ y0 = (uint64_t)filter_y[0];
+ y1 = (uint64_t)filter_y[1];
+ all = x0 | x1 << 8 | y0 << 16 | y1 << 24;
+ /* clang-format off */
__asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
+ MMI_MTC1(%[all], %[ftmp14])
+ "punpcklbh %[ftmp14], %[ftmp14], %[ftmp0] \n\t"
+ "pshufh %[filter_x0], %[ftmp14], %[ftmp0] \n\t"
+ MMI_LI(%[tmp0], 0x10)
+ MMI_MTC1(%[tmp0], %[mask])
+ "ssrld %[ftmp14], %[ftmp14], %[mask] \n\t"
+ "pshufh %[filter_x1], %[ftmp14], %[ftmp0] \n\t"
+ "ssrld %[ftmp14], %[ftmp14], %[mask] \n\t"
+ "pshufh %[filter_y0], %[ftmp14], %[ftmp0] \n\t"
+ "ssrld %[ftmp14], %[ftmp14], %[mask] \n\t"
+ "pshufh %[filter_y1], %[ftmp14], %[ftmp0] \n\t"
+ "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
MMI_LI(%[tmp0], 0x07)
MMI_MTC1(%[tmp0], %[ftmp14])
- "pshufh %[filter_x0], %[filter_x0], %[ftmp0] \n\t"
- "pshufh %[filter_x1], %[filter_x1], %[ftmp0] \n\t"
- "pshufh %[filter_y0], %[filter_y0], %[ftmp0] \n\t"
- "pshufh %[filter_y1], %[filter_y1], %[ftmp0] \n\t"
+ MMI_LI(%[tmp0], 0x0040004000400040)
+ MMI_MTC1(%[tmp0], %[ff_ph_40])
+ MMI_LI(%[tmp0], 0x00ff00ff00ff00ff)
+ MMI_MTC1(%[tmp0], %[mask])
// fdata3: fdata3[0] ~ fdata3[7]
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A
@@ -1154,15 +1204,13 @@ static inline void var_filter_block2d_bil_8x(const uint8_t *src_ptr,
[ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]),
[ftmp13] "=&f"(ftmp[13]), [ftmp14] "=&f"(ftmp[14]),
[tmp0] "=&r"(tmp[0]), [src_ptr] "+&r"(src_ptr), [temp2_ptr] "+&r"(temp2_ptr),
- [counter]"+&r"(l_counter)
- : [filter_x0] "f"((uint64_t)filter_x[0]),
- [filter_x1] "f"((uint64_t)filter_x[1]),
- [filter_y0] "f"((uint64_t)filter_y[0]),
- [filter_y1] "f"((uint64_t)filter_y[1]),
- [src_stride] "r"((mips_reg)src_stride), [ff_ph_40] "f"(ff_ph_40),
- [mask] "f"(mask)
+ [counter]"+&r"(l_counter), [ff_ph_40] "=&f"(ff_ph_40), [mask] "=&f"(mask),
+ [filter_x0] "=&f"(filter_x0), [filter_x1] "=&f"(filter_x1),
+ [filter_y0] "=&f"(filter_y0), [filter_y1] "=&f"(filter_y1)
+ : [src_stride] "r"((mips_reg)src_stride), [all] "r"(all)
: "memory"
);
+ /* clang-format on */
}
#define SUBPIX_VAR8XN(H) \
@@ -1188,19 +1236,38 @@ static inline void var_filter_block2d_bil_4x(const uint8_t *src_ptr,
mips_reg l_counter = counter;
double ftmp[7];
mips_reg tmp[2];
- DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL };
- DECLARE_ALIGNED(8, const uint64_t, mask) = { 0x00ff00ff00ff00ffULL };
+ double ff_ph_40, mask;
+ uint64_t x0, x1, y0, y1, all;
+ double filter_x0, filter_x1, filter_y0, filter_y1;
const uint8_t *filter_x = bilinear_filters[x_offset];
const uint8_t *filter_y = bilinear_filters[y_offset];
+ x0 = (uint64_t)filter_x[0];
+ x1 = (uint64_t)filter_x[1];
+ y0 = (uint64_t)filter_y[0];
+ y1 = (uint64_t)filter_y[1];
+ all = x0 | x1 << 8 | y0 << 16 | y1 << 24;
+ /* clang-format off */
__asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
+ MMI_MTC1(%[all], %[ftmp6])
+ "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
+ "pshufh %[filter_x0], %[ftmp6], %[ftmp0] \n\t"
+ MMI_LI(%[tmp0], 0x10)
+ MMI_MTC1(%[tmp0], %[mask])
+ "ssrld %[ftmp6], %[ftmp6], %[mask] \n\t"
+ "pshufh %[filter_x1], %[ftmp6], %[ftmp0] \n\t"
+ "ssrld %[ftmp6], %[ftmp6], %[mask] \n\t"
+ "pshufh %[filter_y0], %[ftmp6], %[ftmp0] \n\t"
+ "ssrld %[ftmp6], %[ftmp6], %[mask] \n\t"
+ "pshufh %[filter_y1], %[ftmp6], %[ftmp0] \n\t"
+ "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
MMI_LI(%[tmp0], 0x07)
MMI_MTC1(%[tmp0], %[ftmp6])
- "pshufh %[filter_x0], %[filter_x0], %[ftmp0] \n\t"
- "pshufh %[filter_x1], %[filter_x1], %[ftmp0] \n\t"
- "pshufh %[filter_y0], %[filter_y0], %[ftmp0] \n\t"
- "pshufh %[filter_y1], %[filter_y1], %[ftmp0] \n\t"
+ MMI_LI(%[tmp0], 0x0040004000400040)
+ MMI_MTC1(%[tmp0], %[ff_ph_40])
+ MMI_LI(%[tmp0], 0x00ff00ff00ff00ff)
+ MMI_MTC1(%[tmp0], %[mask])
// fdata3: fdata3[0] ~ fdata3[3]
VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_A
@@ -1232,15 +1299,14 @@ static inline void var_filter_block2d_bil_4x(const uint8_t *src_ptr,
: [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]),
[ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
[ftmp6] "=&f"(ftmp[6]), [tmp0] "=&r"(tmp[0]), [src_ptr] "+&r"(src_ptr),
- [temp2_ptr] "+&r"(temp2_ptr), [counter]"+&r"(l_counter)
- : [filter_x0] "f"((uint64_t)filter_x[0]),
- [filter_x1] "f"((uint64_t)filter_x[1]),
- [filter_y0] "f"((uint64_t)filter_y[0]),
- [filter_y1] "f"((uint64_t)filter_y[1]),
- [src_stride] "r"((mips_reg)src_stride), [ff_ph_40] "f"(ff_ph_40),
- [mask] "f"(mask)
+ [temp2_ptr] "+&r"(temp2_ptr), [counter]"+&r"(l_counter),
+ [ff_ph_40] "=&f"(ff_ph_40), [mask] "=&f"(mask),
+ [filter_x0] "=&f"(filter_x0), [filter_x1] "=&f"(filter_x1),
+ [filter_y0] "=&f"(filter_y0), [filter_y1] "=&f"(filter_y1)
+ : [src_stride] "r"((mips_reg)src_stride), [all] "r"(all)
: "memory"
);
+ /* clang-format on */
}
#define SUBPIX_VAR4XN(H) \