summaryrefslogtreecommitdiff
path: root/vp9/common
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/common')
-rw-r--r--vp9/common/vp9_idct.c78
-rw-r--r--vp9/common/vp9_idct.h30
2 files changed, 54 insertions, 54 deletions
diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c
index b2c78c790..1a3fefc5f 100644
--- a/vp9/common/vp9_idct.c
+++ b/vp9/common/vp9_idct.c
@@ -11,39 +11,9 @@
#include <math.h>
#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_systemdependent.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_idct.h"
-
-#if CONFIG_EMULATE_HARDWARE
-// When CONFIG_EMULATE_HARDWARE is 1 the transform performs a
-// non-normative method to handle overflows. A stream that causes
-// overflows in the inverse transform is considered invalid in VP9,
-// and a hardware implementer is free to choose any reasonable
-// method to handle overflows. However to aid in hardware
-// verification they can use a specific implementation of the
-// WRAPLOW() macro below that is identical to their intended
-// hardware implementation (and also use configure options to trigger
-// the C-implementation of the transform).
-//
-// The particular WRAPLOW implementation below performs strict
-// overflow wrapping to match common hardware implementations.
-// bd of 8 uses trans_low with 16bits, need to remove 16bits
-// bd of 10 uses trans_low with 18bits, need to remove 14bits
-// bd of 12 uses trans_low with 20bits, need to remove 12bits
-// bd of x uses trans_low with 8+x bits, need to remove 24-x bits
-#define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd))
-#else
-#define WRAPLOW(x, bd) ((int32_t)(x))
-#endif // CONFIG_EMULATE_HARDWARE
-
-#if CONFIG_VP9_HIGHBITDEPTH
-static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
- int bd) {
- trans = WRAPLOW(trans, bd);
- return clip_pixel_highbd(WRAPLOW(dest + trans, bd), bd);
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
+#include "vp9/common/vp9_systemdependent.h"
static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) {
trans = WRAPLOW(trans, 8);
@@ -1540,7 +1510,7 @@ void vp9_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8,
}
}
-static void highbd_idct4(const tran_low_t *input, tran_low_t *output, int bd) {
+void vp9_highbd_idct4(const tran_low_t *input, tran_low_t *output, int bd) {
tran_low_t step[4];
tran_high_t temp1, temp2;
(void) bd;
@@ -1571,7 +1541,7 @@ void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
// Rows
for (i = 0; i < 4; ++i) {
- highbd_idct4(input, outptr, bd);
+ vp9_highbd_idct4(input, outptr, bd);
input += 4;
outptr += 4;
}
@@ -1580,7 +1550,7 @@ void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j)
temp_in[j] = out[j * 4 + i];
- highbd_idct4(temp_in, temp_out, bd);
+ vp9_highbd_idct4(temp_in, temp_out, bd);
for (j = 0; j < 4; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
@@ -1607,7 +1577,7 @@ void vp9_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8,
}
}
-static void highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd) {
+void vp9_highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd) {
tran_low_t step1[8], step2[8];
tran_high_t temp1, temp2;
// stage 1
@@ -1625,7 +1595,7 @@ static void highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd) {
step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd);
// stage 2 & stage 3 - even half
- highbd_idct4(step1, step1, bd);
+ vp9_highbd_idct4(step1, step1, bd);
// stage 2 - odd half
step2[4] = WRAPLOW(step1[4] + step1[5], bd);
@@ -1662,7 +1632,7 @@ void vp9_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
// First transform rows.
for (i = 0; i < 8; ++i) {
- highbd_idct8(input, outptr, bd);
+ vp9_highbd_idct8(input, outptr, bd);
input += 8;
outptr += 8;
}
@@ -1671,7 +1641,7 @@ void vp9_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j)
temp_in[j] = out[j * 8 + i];
- highbd_idct8(temp_in, temp_out, bd);
+ vp9_highbd_idct8(temp_in, temp_out, bd);
for (j = 0; j < 8; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
@@ -1735,9 +1705,9 @@ static void highbd_iadst4(const tran_low_t *input, tran_low_t *output, int bd) {
void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
const highbd_transform_2d IHT_4[] = {
- { highbd_idct4, highbd_idct4 }, // DCT_DCT = 0
- { highbd_iadst4, highbd_idct4 }, // ADST_DCT = 1
- { highbd_idct4, highbd_iadst4 }, // DCT_ADST = 2
+ { vp9_highbd_idct4, vp9_highbd_idct4 }, // DCT_DCT = 0
+ { highbd_iadst4, vp9_highbd_idct4 }, // ADST_DCT = 1
+ { vp9_highbd_idct4, highbd_iadst4 }, // DCT_ADST = 2
{ highbd_iadst4, highbd_iadst4 } // ADST_ADST = 3
};
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
@@ -1844,9 +1814,9 @@ static void highbd_iadst8(const tran_low_t *input, tran_low_t *output, int bd) {
}
static const highbd_transform_2d HIGH_IHT_8[] = {
- { highbd_idct8, highbd_idct8 }, // DCT_DCT = 0
- { highbd_iadst8, highbd_idct8 }, // ADST_DCT = 1
- { highbd_idct8, highbd_iadst8 }, // DCT_ADST = 2
+ { vp9_highbd_idct8, vp9_highbd_idct8 }, // DCT_DCT = 0
+ { highbd_iadst8, vp9_highbd_idct8 }, // ADST_DCT = 1
+ { vp9_highbd_idct8, highbd_iadst8 }, // DCT_ADST = 2
{ highbd_iadst8, highbd_iadst8 } // ADST_ADST = 3
};
@@ -1889,7 +1859,7 @@ void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8,
// First transform rows.
// Only first 4 row has non-zero coefs.
for (i = 0; i < 4; ++i) {
- highbd_idct8(input, outptr, bd);
+ vp9_highbd_idct8(input, outptr, bd);
input += 8;
outptr += 8;
}
@@ -1897,7 +1867,7 @@ void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8,
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j)
temp_in[j] = out[j * 8 + i];
- highbd_idct8(temp_in, temp_out, bd);
+ vp9_highbd_idct8(temp_in, temp_out, bd);
for (j = 0; j < 8; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
@@ -1905,7 +1875,7 @@ void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8,
}
}
-static void highbd_idct16(const tran_low_t *input, tran_low_t *output, int bd) {
+void vp9_highbd_idct16(const tran_low_t *input, tran_low_t *output, int bd) {
tran_low_t step1[16], step2[16];
tran_high_t temp1, temp2;
(void) bd;
@@ -2081,7 +2051,7 @@ void vp9_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
// First transform rows.
for (i = 0; i < 16; ++i) {
- highbd_idct16(input, outptr, bd);
+ vp9_highbd_idct16(input, outptr, bd);
input += 16;
outptr += 16;
}
@@ -2090,7 +2060,7 @@ void vp9_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
temp_in[j] = out[j * 16 + i];
- highbd_idct16(temp_in, temp_out, bd);
+ vp9_highbd_idct16(temp_in, temp_out, bd);
for (j = 0; j < 16; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
@@ -2270,9 +2240,9 @@ static void highbd_iadst16(const tran_low_t *input, tran_low_t *output,
}
static const highbd_transform_2d HIGH_IHT_16[] = {
- { highbd_idct16, highbd_idct16 }, // DCT_DCT = 0
- { highbd_iadst16, highbd_idct16 }, // ADST_DCT = 1
- { highbd_idct16, highbd_iadst16 }, // DCT_ADST = 2
+ { vp9_highbd_idct16, vp9_highbd_idct16 }, // DCT_DCT = 0
+ { highbd_iadst16, vp9_highbd_idct16 }, // ADST_DCT = 1
+ { vp9_highbd_idct16, highbd_iadst16 }, // DCT_ADST = 2
{ highbd_iadst16, highbd_iadst16 } // ADST_ADST = 3
};
@@ -2315,7 +2285,7 @@ void vp9_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8,
// First transform rows. Since all non-zero dct coefficients are in
// upper-left 4x4 area, we only need to calculate first 4 rows here.
for (i = 0; i < 4; ++i) {
- highbd_idct16(input, outptr, bd);
+ vp9_highbd_idct16(input, outptr, bd);
input += 16;
outptr += 16;
}
@@ -2324,7 +2294,7 @@ void vp9_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8,
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
temp_in[j] = out[j*16 + i];
- highbd_idct16(temp_in, temp_out, bd);
+ vp9_highbd_idct16(temp_in, temp_out, bd);
for (j = 0; j < 16; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h
index fcabaa34f..1d8836cf3 100644
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -118,6 +118,28 @@ typedef struct {
} highbd_transform_2d;
#endif // CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_EMULATE_HARDWARE
+// When CONFIG_EMULATE_HARDWARE is 1 the transform performs a
+// non-normative method to handle overflows. A stream that causes
+// overflows in the inverse transform is considered invalid in VP9,
+// and a hardware implementer is free to choose any reasonable
+// method to handle overflows. However to aid in hardware
+// verification they can use a specific implementation of the
+// WRAPLOW() macro below that is identical to their intended
+// hardware implementation (and also use configure options to trigger
+// the C-implementation of the transform).
+//
+// The particular WRAPLOW implementation below performs strict
+// overflow wrapping to match common hardware implementations.
+// bd of 8 uses trans_low with 16bits, need to remove 16bits
+// bd of 10 uses trans_low with 18bits, need to remove 14bits
+// bd of 12 uses trans_low with 20bits, need to remove 12bits
+// bd of x uses trans_low with 8+x bits, need to remove 24-x bits
+#define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd))
+#else
+#define WRAPLOW(x, bd) (x)
+#endif // CONFIG_EMULATE_HARDWARE
+
void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob);
void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
@@ -137,6 +159,9 @@ void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
int stride, int eob);
#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_highbd_idct4(const tran_low_t *input, tran_low_t *output, int bd);
+void vp9_highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd);
+void vp9_highbd_idct16(const tran_low_t *input, tran_low_t *output, int bd);
void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob, int bd);
void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
@@ -153,6 +178,11 @@ void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
uint8_t *dest, int stride, int eob, int bd);
void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
uint8_t *dest, int stride, int eob, int bd);
+static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
+ int bd) {
+ trans = WRAPLOW(trans, bd);
+ return clip_pixel_highbd(WRAPLOW(dest + trans, bd), bd);
+}
#endif // CONFIG_VP9_HIGHBITDEPTH
#ifdef __cplusplus
} // extern "C"