summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJingning Han <jingning@google.com>2012-08-02 09:07:33 -0700
committerJingning Han <jingning@google.com>2012-08-03 12:02:07 -0700
commitfcbff9ee04f5b67ce79fd329333c8b1970d9318d (patch)
tree1e14daa8f99e3ea35df579e55a8b7ec75ad75ef3
parente6de9c2e5ddfe36324fbbb4f57971245066a32cf (diff)
downloadlibvpx-fcbff9ee04f5b67ce79fd329333c8b1970d9318d.tar
libvpx-fcbff9ee04f5b67ce79fd329333c8b1970d9318d.tar.gz
libvpx-fcbff9ee04f5b67ce79fd329333c8b1970d9318d.tar.bz2
libvpx-fcbff9ee04f5b67ce79fd329333c8b1970d9318d.zip
Replacing the 8x8 DCT with 8x8 ADST/DCT for I8x8
Fixed the code review comments. Under the htrans8x8 experiment the 8X8 DCT in the I8X8 mode is replaced with a combination of 8X8 ADST and DCT. Overall coding gains with the htrans8x8 experiment are: derf: 0.486 std-hd: 1.040 hd: 1.063 yt: 0.506 Note that part of the gain comes from bigger transforms (8x8 instead of 4x4) and part comes from replacing the DCT wth the ADST. Change-Id: I92ca6bbfce11b4165d612b81d9adfad4d010c775
-rwxr-xr-xconfigure2
-rw-r--r--vp8/common/blockd.h71
-rw-r--r--vp8/common/default_coef_probs.h2
-rw-r--r--vp8/common/entropy.h4
-rw-r--r--vp8/common/idctllm.c152
-rw-r--r--vp8/decoder/decodframe.c37
-rw-r--r--vp8/decoder/dequantize.c45
-rw-r--r--vp8/decoder/detokenize.c6
-rw-r--r--vp8/encoder/dct.c147
-rw-r--r--vp8/encoder/encodeintra.c38
-rw-r--r--vp8/encoder/rdopt.c35
-rw-r--r--vp8/encoder/tokenize.c4
12 files changed, 452 insertions, 91 deletions
diff --git a/configure b/configure
index 75b93f4d1..fc998d05e 100755
--- a/configure
+++ b/configure
@@ -223,8 +223,8 @@ EXPERIMENT_LIST="
pred_filter
lossless
hybridtransform
+ hybridtransform8x8
switchable_interp
- htrans8x8
tx16x16
"
CONFIG_LIST="
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 64fc06546..3c43a1e9a 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -54,7 +54,6 @@ typedef struct {
#define PLANE_TYPE_UV 2
#define PLANE_TYPE_Y_WITH_DC 3
-
typedef char ENTROPY_CONTEXT;
typedef struct {
ENTROPY_CONTEXT y1[4];
@@ -179,6 +178,50 @@ typedef enum {
B_MODE_COUNT
} B_PREDICTION_MODE;
+#if CONFIG_HYBRIDTRANSFORM8X8
+// convert MB_PREDICTION_MODE to B_PREDICTION_MODE
+static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) {
+ B_PREDICTION_MODE b_mode;
+ switch (mode) {
+ case DC_PRED:
+ b_mode = B_DC_PRED;
+ break;
+ case V_PRED:
+ b_mode = B_VE_PRED;
+ break;
+ case H_PRED:
+ b_mode = B_HE_PRED;
+ break;
+ case TM_PRED:
+ b_mode = B_TM_PRED;
+ break;
+ case D45_PRED:
+ b_mode = B_LD_PRED;
+ break;
+ case D135_PRED:
+ b_mode = B_RD_PRED;
+ break;
+ case D117_PRED:
+ b_mode = B_VR_PRED;
+ break;
+ case D153_PRED:
+ b_mode = B_HD_PRED;
+ break;
+ case D27_PRED:
+ b_mode = B_VL_PRED;
+ break;
+ case D63_PRED:
+ b_mode = B_HU_PRED;
+ break;
+ default :
+ // for debug purpose, to be removed after full testing
+ assert(0);
+ break;
+ }
+ return b_mode;
+}
+#endif
+
#define VP8_BINTRAMODES (B_HU_PRED + 1) /* 10 */
#define VP8_SUBMVREFS (1 + NEW4X4 - LEFT4X4)
@@ -389,6 +432,32 @@ typedef struct MacroBlockD {
} MACROBLOCKD;
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
+// transform mapping
+static void txfm_map(BLOCKD *b, B_PREDICTION_MODE bmode) {
+ switch (bmode) {
+ case B_TM_PRED :
+ case B_RD_PRED :
+ b->bmi.as_mode.tx_type = ADST_ADST;
+ break;
+
+ case B_VE_PRED :
+ case B_VR_PRED :
+ b->bmi.as_mode.tx_type = ADST_DCT;
+ break;
+
+ case B_HE_PRED :
+ case B_HD_PRED :
+ case B_HU_PRED :
+ b->bmi.as_mode.tx_type = DCT_ADST;
+ break;
+
+ default :
+ b->bmi.as_mode.tx_type = DCT_DCT;
+ break;
+ }
+}
+#endif
extern void vp8_build_block_doffsets(MACROBLOCKD *x);
extern void vp8_setup_block_dptrs(MACROBLOCKD *x);
diff --git a/vp8/common/default_coef_probs.h b/vp8/common/default_coef_probs.h
index dfb0e5ea7..940e971b7 100644
--- a/vp8/common/default_coef_probs.h
+++ b/vp8/common/default_coef_probs.h
@@ -434,7 +434,7 @@ vp8_default_coef_probs_8x8[BLOCK_TYPES_8X8]
{ 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128}
}
}
-#if CONFIG_HTRANS8X8
+#if CONFIG_HYBRIDTRANSFORM8X8
,
{ /* block Type 3 */
{ /* Coeff Band 0 */
diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h
index 4497a3d47..190221c16 100644
--- a/vp8/common/entropy.h
+++ b/vp8/common/entropy.h
@@ -60,9 +60,9 @@ extern vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */
/* Coefficients are predicted via a 3-dimensional probability table. */
/* Outside dimension. 0 = Y no DC, 1 = Y2, 2 = UV, 3 = Y with DC */
-
#define BLOCK_TYPES 4
-#if CONFIG_HTRANS8X8
+
+#if CONFIG_HYBRIDTRANSFORM8X8
#define BLOCK_TYPES_8X8 4
#else
#define BLOCK_TYPES_8X8 3
diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c
index e549fe098..616e4938e 100644
--- a/vp8/common/idctllm.c
+++ b/vp8/common/idctllm.c
@@ -35,6 +35,8 @@ static const int cospi8sqrt2minus1 = 20091;
static const int sinpi8sqrt2 = 35468;
static const int rounding = 0;
+// TODO: these transforms can be further converted into integer forms
+// for complexity optimization
#if CONFIG_HYBRIDTRANSFORM
float idct_4[16] = {
0.500000000000000, 0.653281482438188, 0.500000000000000, 0.270598050073099,
@@ -51,11 +53,52 @@ float iadst_4[16] = {
};
#endif
+#if CONFIG_HYBRIDTRANSFORM8X8
+float idct_8[64] = {
+ 0.353553390593274, 0.490392640201615, 0.461939766255643, 0.415734806151273,
+ 0.353553390593274, 0.277785116509801, 0.191341716182545, 0.097545161008064,
+ 0.353553390593274, 0.415734806151273, 0.191341716182545, -0.097545161008064,
+ -0.353553390593274, -0.490392640201615, -0.461939766255643, -0.277785116509801,
+ 0.353553390593274, 0.277785116509801, -0.191341716182545, -0.490392640201615,
+ -0.353553390593274, 0.097545161008064, 0.461939766255643, 0.415734806151273,
+ 0.353553390593274, 0.097545161008064, -0.461939766255643, -0.277785116509801,
+ 0.353553390593274, 0.415734806151273, -0.191341716182545, -0.490392640201615,
+ 0.353553390593274, -0.097545161008064, -0.461939766255643, 0.277785116509801,
+ 0.353553390593274, -0.415734806151273, -0.191341716182545, 0.490392640201615,
+ 0.353553390593274, -0.277785116509801, -0.191341716182545, 0.490392640201615,
+ -0.353553390593274, -0.097545161008064, 0.461939766255643, -0.415734806151273,
+ 0.353553390593274, -0.415734806151273, 0.191341716182545, 0.097545161008064,
+ -0.353553390593274, 0.490392640201615, -0.461939766255643, 0.277785116509801,
+ 0.353553390593274, -0.490392640201615, 0.461939766255643, -0.415734806151273,
+ 0.353553390593274, -0.277785116509801, 0.191341716182545, -0.097545161008064
+};
+
+float iadst_8[64] = {
+ 0.089131608307533, 0.255357107325376, 0.387095214016349, 0.466553967085785,
+ 0.483002021635509, 0.434217976756762, 0.326790388032145, 0.175227946595735,
+ 0.175227946595735, 0.434217976756762, 0.466553967085785, 0.255357107325376,
+ -0.089131608307533, -0.387095214016348, -0.483002021635509, -0.326790388032145,
+ 0.255357107325376, 0.483002021635509, 0.175227946595735, -0.326790388032145,
+ -0.466553967085785, -0.089131608307533, 0.387095214016349, 0.434217976756762,
+ 0.326790388032145, 0.387095214016349, -0.255357107325376, -0.434217976756762,
+ 0.175227946595735, 0.466553967085786, -0.089131608307534, -0.483002021635509,
+ 0.387095214016349, 0.175227946595735, -0.483002021635509, 0.089131608307533,
+ 0.434217976756762, -0.326790388032145, -0.255357107325377, 0.466553967085785,
+ 0.434217976756762, -0.089131608307533, -0.326790388032145, 0.483002021635509,
+ -0.255357107325376, -0.175227946595735, 0.466553967085785, -0.387095214016348,
+ 0.466553967085785, -0.326790388032145, 0.089131608307533, 0.175227946595735,
+ -0.387095214016348, 0.483002021635509, -0.434217976756762, 0.255357107325376,
+ 0.483002021635509, -0.466553967085785, 0.434217976756762, -0.387095214016348,
+ 0.326790388032145, -0.255357107325375, 0.175227946595736, -0.089131608307532
+};
+#endif
+
#if CONFIG_HYBRIDTRANSFORM
void vp8_iht4x4llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
int i, j, k;
float bufa[16], bufb[16]; // buffers are for floating-point test purpose
- // the implementation could be simplified in conjunction with integer transform
+ // the implementation could be simplified in
+ // conjunction with integer transform
short *ip = input;
short *op = output;
int shortpitch = pitch >> 1;
@@ -158,6 +201,113 @@ void vp8_iht4x4llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
}
#endif
+#if CONFIG_HYBRIDTRANSFORM8X8
+void vp8_iht8x8llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
+ int i, j, k;
+ float bufa[64], bufb[64]; // buffers are for floating-point test purpose
+ // the implementation could be simplified in
+ // conjunction with integer transform
+ short *ip = input;
+ short *op = output;
+ int shortpitch = pitch >> 1;
+
+ float *pfa = &bufa[0];
+ float *pfb = &bufb[0];
+
+ // pointers to vertical and horizontal transforms
+ float *ptv, *pth;
+
+ // load and convert residual array into floating-point
+ for(j = 0; j < 8; j++) {
+ for(i = 0; i < 8; i++) {
+ pfa[i] = (float)ip[i];
+ }
+ pfa += 8;
+ ip += 8;
+ }
+
+ // vertical transformation
+ pfa = &bufa[0];
+ pfb = &bufb[0];
+
+ switch(tx_type) {
+ case ADST_ADST :
+ case ADST_DCT :
+ ptv = &iadst_8[0];
+ break;
+
+ default :
+ ptv = &idct_8[0];
+ break;
+ }
+
+ for(j = 0; j < 8; j++) {
+ for(i = 0; i < 8; i++) {
+ pfb[i] = 0 ;
+ for(k = 0; k < 8; k++) {
+ pfb[i] += ptv[k] * pfa[(k<<3)];
+ }
+ pfa += 1;
+ }
+
+ pfb += 8;
+ ptv += 8;
+ pfa = &bufa[0];
+ }
+
+ // horizontal transformation
+ pfa = &bufa[0];
+ pfb = &bufb[0];
+
+ switch(tx_type) {
+ case ADST_ADST :
+ case DCT_ADST :
+ pth = &iadst_8[0];
+ break;
+
+ default :
+ pth = &idct_8[0];
+ break;
+ }
+
+ for(j = 0; j < 8; j++) {
+ for(i = 0; i < 8; i++) {
+ pfa[i] = 0;
+ for(k = 0; k < 8; k++) {
+ pfa[i] += pfb[k] * pth[k];
+ }
+ pth += 8;
+ }
+
+ pfa += 8;
+ pfb += 8;
+
+ switch(tx_type) {
+ case ADST_ADST :
+ case DCT_ADST :
+ pth = &iadst_8[0];
+ break;
+
+ default :
+ pth = &idct_8[0];
+ break;
+ }
+ }
+
+ // convert to short integer format and load BLOCKD buffer
+ op = output;
+ pfa = &bufa[0];
+
+ for(j = 0; j < 8; j++) {
+ for(i = 0; i < 8; i++) {
+ op[i] = (pfa[i] > 0 ) ? (short)( pfa[i] / 8 + 0.49) :
+ -(short)( - pfa[i] / 8 + 0.49);
+ }
+ op += shortpitch;
+ pfa += 8;
+ }
+}
+#endif
void vp8_short_idct4x4llm_c(short *input, short *output, int pitch) {
int i;
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index d50e1dfb3..0588d002b 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -46,7 +46,6 @@ int dec_debug = 0;
#define COEFCOUNT_TESTING
-
static int merge_index(int v, int n, int modulus) {
int max1 = (n - 1 - modulus / 2) / modulus + 1;
if (v < max1) v = v * modulus + modulus / 2;
@@ -260,7 +259,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
}
}
-#if CONFIG_HTRANS8X8
+#if CONFIG_HYBRIDTRANSFORM8X8
if (xd->mode_info_context->mbmi.mode == I8X8_PRED) {
xd->mode_info_context->mbmi.txfm_size = TX_8X8;
}
@@ -336,29 +335,8 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
for (i = 0; i < 16; i++) {
BLOCKD *b = &xd->block[i];
int b_mode = xd->mode_info_context->bmi[i].as_mode.first;
- if(active_ht) {
- switch(b_mode) {
- case B_TM_PRED :
- case B_RD_PRED :
- b->bmi.as_mode.tx_type = ADST_ADST;
- break;
-
- case B_VE_PRED :
- case B_VR_PRED :
- b->bmi.as_mode.tx_type = ADST_DCT;
- break ;
-
- case B_HE_PRED :
- case B_HD_PRED :
- case B_HU_PRED :
- b->bmi.as_mode.tx_type = DCT_ADST;
- break;
-
- default :
- b->bmi.as_mode.tx_type = DCT_DCT;
- break;
- }
- }
+ if(active_ht)
+ txfm_map(b, b_mode);
} // loop over 4x4 blocks
}
#endif
@@ -392,7 +370,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
int i8x8mode;
BLOCKD *b;
-#if CONFIG_HTRANS8X8
+#if CONFIG_HYBRIDTRANSFORM8X8
int idx = (ib & 0x02) ? (ib + 2) : ib;
short *q = xd->block[idx].qcoeff;
@@ -410,8 +388,11 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
RECON_INVOKE(RTCD_VTABLE(recon), intra8x8_predict)
(b, i8x8mode, b->predictor);
-#if CONFIG_HTRANS8X8
- vp8_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride);
+#if CONFIG_HYBRIDTRANSFORM8X8
+ txfm_map(b, pred_mode_conv(i8x8mode));
+ vp8_ht_dequant_idct_add_8x8_c(b->bmi.as_mode.tx_type,
+ q, dq, pre, dst, 16, stride);
+ // vp8_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride);
q += 64;
#else
for (j = 0; j < 4; j++) {
diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c
index 655409176..bf44fd61a 100644
--- a/vp8/decoder/dequantize.c
+++ b/vp8/decoder/dequantize.c
@@ -79,6 +79,51 @@ void vp8_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, short *dq,
}
#endif
+#if CONFIG_HYBRIDTRANSFORM8X8
+void vp8_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, short *dq,
+ unsigned char *pred, unsigned char *dest,
+ int pitch, int stride) {
+ short output[64];
+ short *diff_ptr = output;
+ int b, r, c;
+ int i;
+ unsigned char *origdest = dest;
+ unsigned char *origpred = pred;
+
+ input[0] = dq[0] * input[0];
+ for (i = 1; i < 64; i++) {
+ input[i] = dq[1] * input[i];
+ }
+
+ vp8_iht8x8llm_c(input, output, 16, tx_type);
+
+ vpx_memset(input, 0, 128);
+
+ for (b = 0; b < 4; b++) {
+ for (r = 0; r < 4; r++) {
+ for (c = 0; c < 4; c++) {
+ int a = diff_ptr[c] + pred[c];
+
+ if (a < 0)
+ a = 0;
+
+ if (a > 255)
+ a = 255;
+
+ dest[c] = (unsigned char) a;
+ }
+
+ dest += stride;
+ diff_ptr += 8;
+ pred += pitch;
+ }
+ diff_ptr = output + (b + 1) / 2 * 4 * 8 + (b + 1) % 2 * 4;
+ dest = origdest + (b + 1) / 2 * 4 * stride + (b + 1) % 2 * 4;
+ pred = origpred + (b + 1) / 2 * 4 * pitch + (b + 1) % 2 * 4;
+ }
+}
+#endif
+
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred,
unsigned char *dest, int pitch, int stride) {
short output[16];
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index c93b8e9c5..5f9768d41 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -473,7 +473,7 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd) {
const int seg_active = segfeature_active(xd, segment_id, SEG_LVL_EOB);
INT16 *qcoeff_ptr = &xd->qcoeff[0];
-#if CONFIG_HTRANS8X8
+#if CONFIG_HYBRIDTRANSFORM8X8
int bufthred = (xd->mode_info_context->mbmi.mode == I8X8_PRED) ? 16 : 24;
if (xd->mode_info_context->mbmi.mode != B_PRED &&
xd->mode_info_context->mbmi.mode != SPLITMV &&
@@ -506,7 +506,7 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd) {
else
seg_eob = 64;
-#if CONFIG_HTRANS8X8
+#if CONFIG_HYBRIDTRANSFORM8X8
for (i = 0; i < bufthred ; i += 4) {
#else
for (i = 0; i < 24; i += 4) {
@@ -528,7 +528,7 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd) {
qcoeff_ptr += 64;
}
-#if CONFIG_HTRANS8X8
+#if CONFIG_HYBRIDTRANSFORM8X8
if (xd->mode_info_context->mbmi.mode == I8X8_PRED) {
type = PLANE_TYPE_UV;
seg_eob = 16;
diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c
index ba2a692d1..ad5258552 100644
--- a/vp8/encoder/dct.c
+++ b/vp8/encoder/dct.c
@@ -17,6 +17,8 @@
#include "vp8/common/blockd.h"
+// TODO: these transforms can be converted into integer forms to reduce
+// the complexity
float dct_4[16] = {
0.500000000000000, 0.500000000000000, 0.500000000000000, 0.500000000000000,
0.653281482438188, 0.270598050073099, -0.270598050073099, -0.653281482438188,
@@ -32,6 +34,45 @@ float adst_4[16] = {
};
#endif
+#if CONFIG_HYBRIDTRANSFORM8X8
+float dct_8[64] = {
+ 0.353553390593274, 0.353553390593274, 0.353553390593274, 0.353553390593274,
+ 0.353553390593274, 0.353553390593274, 0.353553390593274, 0.353553390593274,
+ 0.490392640201615, 0.415734806151273, 0.277785116509801, 0.097545161008064,
+ -0.097545161008064, -0.277785116509801, -0.415734806151273, -0.490392640201615,
+ 0.461939766255643, 0.191341716182545, -0.191341716182545, -0.461939766255643,
+ -0.461939766255643, -0.191341716182545, 0.191341716182545, 0.461939766255643,
+ 0.415734806151273, -0.097545161008064, -0.490392640201615, -0.277785116509801,
+ 0.277785116509801, 0.490392640201615, 0.097545161008064, -0.415734806151273,
+ 0.353553390593274, -0.353553390593274, -0.353553390593274, 0.353553390593274,
+ 0.353553390593274, -0.353553390593274, -0.353553390593274, 0.353553390593274,
+ 0.277785116509801, -0.490392640201615, 0.097545161008064, 0.415734806151273,
+ -0.415734806151273, -0.097545161008064, 0.490392640201615, -0.277785116509801,
+ 0.191341716182545, -0.461939766255643, 0.461939766255643, -0.191341716182545,
+ -0.191341716182545, 0.461939766255643, -0.461939766255643, 0.191341716182545,
+ 0.097545161008064, -0.277785116509801, 0.415734806151273, -0.490392640201615,
+ 0.490392640201615, -0.415734806151273, 0.277785116509801, -0.097545161008064
+};
+
+float adst_8[64] = {
+ 0.089131608307533, 0.175227946595735, 0.255357107325376, 0.326790388032145,
+ 0.387095214016349, 0.434217976756762, 0.466553967085785, 0.483002021635509,
+ 0.255357107325376, 0.434217976756762, 0.483002021635509, 0.387095214016349,
+ 0.175227946595735, -0.089131608307533, -0.326790388032145, -0.466553967085785,
+ 0.387095214016349, 0.466553967085785, 0.175227946595735, -0.255357107325376,
+ -0.483002021635509, -0.326790388032145, 0.089131608307533, 0.434217976756762,
+ 0.466553967085785, 0.255357107325376, -0.326790388032145, -0.434217976756762,
+ 0.089131608307533, 0.483002021635509, 0.175227946595735, -0.387095214016348,
+ 0.483002021635509, -0.089131608307533, -0.466553967085785, 0.175227946595735,
+ 0.434217976756762, -0.255357107325376, -0.387095214016348, 0.326790388032145,
+ 0.434217976756762, -0.387095214016348, -0.089131608307533, 0.466553967085786,
+ -0.326790388032145, -0.175227946595735, 0.483002021635509, -0.255357107325375,
+ 0.326790388032145, -0.483002021635509, 0.387095214016349, -0.089131608307534,
+ -0.255357107325377, 0.466553967085785, -0.434217976756762, 0.175227946595736,
+ 0.175227946595735, -0.326790388032145, 0.434217976756762, -0.483002021635509,
+ 0.466553967085785, -0.387095214016348, 0.255357107325376, -0.089131608307532
+};
+#endif
static const int xC1S7 = 16069;
static const int xC2S6 = 15137;
@@ -394,6 +435,112 @@ void vp8_fht4x4_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
}
#endif
+#if CONFIG_HYBRIDTRANSFORM8X8
+void vp8_fht8x8_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
+ int i, j, k;
+ float bufa[64], bufb[64]; // buffers are for floating-point test purpose
+ // the implementation could be simplified in
+ // conjunction with integer transform
+ short *ip = input;
+ short *op = output;
+
+ float *pfa = &bufa[0];
+ float *pfb = &bufb[0];
+
+ // pointers to vertical and horizontal transforms
+ float *ptv, *pth;
+
+ // load and convert residual array into floating-point
+ for(j = 0; j < 8; j++) {
+ for(i = 0; i < 8; i++) {
+ pfa[i] = (float)ip[i];
+ }
+ pfa += 8;
+ ip += pitch / 2;
+ }
+
+ // vertical transformation
+ pfa = &bufa[0];
+ pfb = &bufb[0];
+
+ switch(tx_type) {
+ case ADST_ADST :
+ case ADST_DCT :
+ ptv = &adst_8[0];
+ break;
+
+ default :
+ ptv = &dct_8[0];
+ break;
+ }
+
+ for(j = 0; j < 8; j++) {
+ for(i = 0; i < 8; i++) {
+ pfb[i] = 0;
+ for(k = 0; k < 8; k++) {
+ pfb[i] += ptv[k] * pfa[(k<<3)];
+ }
+ pfa += 1;
+ }
+ pfb += 8;
+ ptv += 8;
+ pfa = &bufa[0];
+ }
+
+ // horizontal transformation
+ pfa = &bufa[0];
+ pfb = &bufb[0];
+
+ switch(tx_type) {
+ case ADST_ADST :
+ case DCT_ADST :
+ pth = &adst_8[0];
+ break;
+
+ default :
+ pth = &dct_8[0];
+ break;
+ }
+
+ for(j = 0; j < 8; j++) {
+ for(i = 0; i < 8; i++) {
+ pfa[i] = 0;
+ for(k = 0; k < 8; k++) {
+ pfa[i] += pfb[k] * pth[k];
+ }
+ pth += 8;
+ }
+
+ pfa += 8;
+ pfb += 8;
+
+ switch(tx_type) {
+ case ADST_ADST :
+ case DCT_ADST :
+ pth = &adst_8[0];
+ break;
+
+ default :
+ pth = &dct_8[0];
+ break;
+ }
+ }
+
+ // convert to short integer format and load BLOCKD buffer
+ op = output ;
+ pfa = &bufa[0] ;
+
+ for(j = 0; j < 8; j++) {
+ for(i = 0; i < 8; i++) {
+ op[i] = (pfa[i] > 0 ) ? (short)( 8 * pfa[i] + 0.49) :
+ -(short)(- 8 * pfa[i] + 0.49);
+ }
+ op += 8;
+ pfa += 8;
+ }
+}
+#endif
+
void vp8_short_fdct4x4_c(short *input, short *output, int pitch) {
int i;
int a1, b1, c1, d1;
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 325efeb6b..964046d92 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -90,28 +90,7 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd,
#if CONFIG_HYBRIDTRANSFORM
if(active_ht) {
b->bmi.as_mode.test = b->bmi.as_mode.first;
- switch(b->bmi.as_mode.first) {
- // case B_DC_PRED :
- case B_TM_PRED :
- case B_RD_PRED :
- b->bmi.as_mode.tx_type = ADST_ADST;
- break;
-
- case B_VE_PRED :
- case B_VR_PRED :
- b->bmi.as_mode.tx_type = ADST_DCT;
- break;
-
- case B_HE_PRED :
- case B_HD_PRED :
- case B_HU_PRED :
- b->bmi.as_mode.tx_type = DCT_ADST;
- break;
-
- default :
- b->bmi.as_mode.tx_type = DCT_DCT;
- break;
- }
+ txfm_map(b, b->bmi.as_mode.first);
vp8_fht4x4_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type);
vp8_ht_quantize_b(be, b);
@@ -329,16 +308,25 @@ void vp8_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd,
}
#endif
-#if CONFIG_HTRANS8X8
+#if CONFIG_HYBRIDTRANSFORM8X8
{
MACROBLOCKD *xd = &x->e_mbd;
int idx = (ib & 0x02) ? (ib + 2) : ib;
// generate residual blocks
vp8_subtract_4b_c(be, b, 16);
- x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
+
+ txfm_map(b, pred_mode_conv(b->bmi.as_mode.first));
+
+ vp8_fht8x8_c(be->src_diff, (x->block + idx)->coeff, 32,
+ b->bmi.as_mode.tx_type);
x->quantize_b_8x8(x->block + idx, xd->block + idx);
- vp8_short_idct8x8_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);
+ vp8_iht8x8llm_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32,
+ b->bmi.as_mode.tx_type);
+
+// x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
+// x->quantize_b_8x8(x->block + idx, xd->block + idx);
+// vp8_short_idct8x8_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);
// reconstruct submacroblock
for (i = 0; i < 4; i++) {
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index a2cd2651a..6eb10f4f1 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -454,7 +454,7 @@ int vp8_block_error_c(short *coeff, short *dqcoeff) {
return error;
}
-#if CONFIG_HTRANS8X8
+#if CONFIG_HYBRIDTRANSFORM8X8
int vp8_submb_error_c(short *coeff, short *dqcoeff) {
int i;
int error = 0;
@@ -985,28 +985,7 @@ static int64_t rd_pick_intra4x4block(
#if CONFIG_HYBRIDTRANSFORM
if(active_ht) {
b->bmi.as_mode.test = mode;
- switch(mode) {
- // case B_DC_PRED :
- case B_TM_PRED :
- case B_RD_PRED :
- b->bmi.as_mode.tx_type = ADST_ADST;
- break;
-
- case B_VE_PRED :
- case B_VR_PRED :
- b->bmi.as_mode.tx_type = ADST_DCT;
- break;
-
- case B_HE_PRED :
- case B_HD_PRED :
- case B_HU_PRED :
- b->bmi.as_mode.tx_type = DCT_ADST;
- break;
-
- default :
- b->bmi.as_mode.tx_type = DCT_DCT;
- break;
- }
+ txfm_map(b, mode);
vp8_fht4x4_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type);
vp8_ht_quantize_b(be, b);
@@ -1267,7 +1246,7 @@ static int64_t rd_pick_intra8x8block(
DECLARE_ALIGNED_ARRAY(16, unsigned char, best_predictor, 16 * 8);
DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16 * 4);
-#if CONFIG_HTRANS8X8
+#if CONFIG_HYBRIDTRANSFORM8X8
// perform transformation of dimension 8x8
// note the input and output index mapping
int idx = (ib & 0x02) ? (ib + 2) : ib;
@@ -1298,8 +1277,10 @@ static int64_t rd_pick_intra8x8block(
vp8_subtract_4b_c(be, b, 16);
-#if CONFIG_HTRANS8X8
- x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
+#if CONFIG_HYBRIDTRANSFORM8X8
+ txfm_map(b, pred_mode_conv(mode));
+ vp8_fht8x8_c(be->src_diff, (x->block + idx)->coeff, 32, b->bmi.as_mode.tx_type);
+// x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
x->quantize_b_8x8(x->block + idx, xd->block + idx);
// compute quantization mse of 8x8 block
@@ -1376,7 +1357,7 @@ static int64_t rd_pick_intra8x8block(
#endif
vp8_encode_intra8x8(IF_RTCD(&cpi->rtcd), x, ib);
-#if CONFIG_HTRANS8X8
+#if CONFIG_HYBRIDTRANSFORM8X8
*(a + vp8_block2above_8x8[idx]) = besta0;
*(a + vp8_block2above_8x8[idx] + 1) = besta1;
*(l + vp8_block2left_8x8 [idx]) = bestl0;
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index dac18c6db..105aa6a7c 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -504,7 +504,7 @@ static void tokenize1st_order_ht( MACROBLOCKD *xd,
#endif
-#if CONFIG_HTRANS8X8
+#if CONFIG_HYBRIDTRANSFORM8X8
static void tokenize1st_order_chroma
(
MACROBLOCKD *xd,
@@ -886,7 +886,7 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) {
tokenize1st_order_ht(x, t, plane_type, cpi);
} else {
-#if CONFIG_HTRANS8X8
+#if CONFIG_HYBRIDTRANSFORM8X8
if (x->mode_info_context->mbmi.mode == I8X8_PRED) {
ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context;
ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)x->left_context;