diff options
Diffstat (limited to 'vp9')
32 files changed, 1215 insertions, 1806 deletions
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c index 28671c38c..f495c29f3 100644 --- a/vp9/common/vp9_alloccommon.c +++ b/vp9/common/vp9_alloccommon.c @@ -200,9 +200,6 @@ int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) { void vp9_create_common(VP9_COMMON *cm) { vp9_machine_specific_config(cm); - - cm->tx_mode = ONLY_4X4; - cm->comp_pred_mode = REFERENCE_MODE_SELECT; } void vp9_remove_common(VP9_COMMON *cm) { diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 993ee7935..898fc6f1a 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -26,8 +26,9 @@ #include "vp9/common/vp9_seg_common.h" #include "vp9/common/vp9_treecoder.h" -#define BLOCK_SIZE_GROUPS 4 +#define BLOCK_SIZE_GROUPS 4 #define MBSKIP_CONTEXTS 3 +#define INTER_MODE_CONTEXTS 7 /* Segment Feature Masks */ #define MAX_MV_REF_CANDIDATES 2 @@ -199,7 +200,6 @@ struct buf_2d { struct macroblockd_plane { int16_t *dqcoeff; - uint16_t *eobs; PLANE_TYPE plane_type; int subsampling_x; int subsampling_y; diff --git a/vp9/common/vp9_default_coef_probs.h b/vp9/common/vp9_default_coef_probs.h deleted file mode 100644 index 3b512beb9..000000000 --- a/vp9/common/vp9_default_coef_probs.h +++ /dev/null @@ -1,699 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. -*/ -#ifndef VP9_COMMON_DEFAULT_COEF_PROBS_H_ -#define VP9_COMMON_DEFAULT_COEF_PROBS_H_ - -/*Generated file, included by vp9_entropy.c*/ -static const vp9_coeff_probs_model default_coef_probs_4x4[BLOCK_TYPES] = { - { /* block Type 0 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 195, 29, 183 }, - { 84, 49, 136 }, - { 8, 42, 71 } - }, { /* Coeff Band 1 */ - { 31, 107, 169 }, - { 35, 99, 159 }, - { 17, 82, 140 }, - { 8, 66, 114 }, - { 2, 44, 76 }, - { 1, 19, 32 } - }, { /* Coeff Band 2 */ - { 40, 132, 201 }, - { 29, 114, 187 }, - { 13, 91, 157 }, - { 7, 75, 127 }, - { 3, 58, 95 }, - { 1, 28, 47 } - }, { /* Coeff Band 3 */ - { 69, 142, 221 }, - { 42, 122, 201 }, - { 15, 91, 159 }, - { 6, 67, 121 }, - { 1, 42, 77 }, - { 1, 17, 31 } - }, { /* Coeff Band 4 */ - { 102, 148, 228 }, - { 67, 117, 204 }, - { 17, 82, 154 }, - { 6, 59, 114 }, - { 2, 39, 75 }, - { 1, 15, 29 } - }, { /* Coeff Band 5 */ - { 156, 57, 233 }, - { 119, 57, 212 }, - { 58, 48, 163 }, - { 29, 40, 124 }, - { 12, 30, 81 }, - { 3, 12, 31 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 191, 107, 226 }, - { 124, 117, 204 }, - { 25, 99, 155 } - }, { /* Coeff Band 1 */ - { 29, 148, 210 }, - { 37, 126, 194 }, - { 8, 93, 157 }, - { 2, 68, 118 }, - { 1, 39, 69 }, - { 1, 17, 33 } - }, { /* Coeff Band 2 */ - { 41, 151, 213 }, - { 27, 123, 193 }, - { 3, 82, 144 }, - { 1, 58, 105 }, - { 1, 32, 60 }, - { 1, 13, 26 } - }, { /* Coeff Band 3 */ - { 59, 159, 220 }, - { 23, 126, 198 }, - { 4, 88, 151 }, - { 1, 66, 114 }, - { 1, 38, 71 }, - { 1, 18, 34 } - }, { /* Coeff Band 4 */ - { 114, 136, 232 }, - { 51, 114, 207 }, - { 11, 83, 155 }, - { 3, 56, 105 }, - { 1, 33, 65 }, - { 1, 17, 34 } - }, { /* Coeff Band 5 */ - { 149, 65, 234 }, - { 121, 57, 215 }, - { 61, 49, 166 }, - { 28, 36, 114 }, - { 12, 25, 76 }, - { 3, 16, 42 } - } - } - }, { /* block Type 1 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 214, 49, 220 }, - { 132, 63, 188 }, - { 42, 65, 137 } - }, { /* Coeff Band 1 */ - { 85, 137, 221 }, - { 104, 131, 216 }, - { 49, 111, 192 }, - { 21, 87, 155 }, - { 2, 49, 87 }, - { 1, 16, 28 } - }, { /* Coeff Band 2 */ - { 89, 163, 230 }, - { 90, 137, 220 }, - { 29, 100, 183 }, - { 10, 70, 135 }, - { 2, 42, 81 }, - { 1, 17, 33 } - }, { /* Coeff Band 3 */ - { 108, 167, 237 }, - { 55, 133, 222 }, - { 15, 97, 179 }, - { 4, 72, 135 }, - { 1, 45, 85 }, - { 1, 19, 38 } - }, { /* Coeff Band 4 */ - { 124, 146, 240 }, - { 66, 124, 224 }, - { 17, 88, 175 }, - { 4, 58, 122 }, - { 1, 36, 75 }, - { 1, 18, 37 } - }, { /* Coeff Band 5 */ - { 141, 79, 241 }, - { 126, 70, 227 }, - { 66, 58, 182 }, - { 30, 44, 136 }, - { 12, 34, 96 }, - { 2, 20, 47 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 229, 99, 249 }, - { 143, 111, 235 }, - { 46, 109, 192 } - }, { /* Coeff Band 1 */ - { 82, 158, 236 }, - { 94, 146, 224 }, - { 25, 117, 191 }, - { 9, 87, 149 }, - { 3, 56, 99 }, - { 1, 33, 57 } - }, { /* Coeff Band 2 */ - { 83, 167, 237 }, - { 68, 145, 222 }, - { 10, 103, 177 }, - { 2, 72, 131 }, - { 1, 41, 79 }, - { 1, 20, 39 } - }, { /* Coeff Band 3 */ - { 99, 167, 239 }, - { 47, 141, 224 }, - { 10, 104, 178 }, - { 2, 73, 133 }, - { 1, 44, 85 }, - { 1, 22, 47 } - }, { /* Coeff Band 4 */ - { 127, 145, 243 }, - { 71, 129, 228 }, - { 17, 93, 177 }, - { 3, 61, 124 }, - { 1, 41, 84 }, - { 1, 21, 52 } - }, { /* Coeff Band 5 */ - { 157, 78, 244 }, - { 140, 72, 231 }, - { 69, 58, 184 }, - { 31, 44, 137 }, - { 14, 38, 105 }, - { 8, 23, 61 } - } - } - } -}; -static const vp9_coeff_probs_model default_coef_probs_8x8[BLOCK_TYPES] = { - { /* block Type 0 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 125, 34, 187 }, - { 52, 41, 133 }, - { 6, 31, 56 } - }, { /* Coeff Band 1 */ - { 37, 109, 153 }, - { 51, 102, 147 }, - { 23, 87, 128 }, - { 8, 67, 101 }, - { 1, 41, 63 }, - { 1, 19, 29 } - }, { /* Coeff Band 2 */ - { 31, 154, 185 }, - { 17, 127, 175 }, - { 6, 96, 145 }, - { 2, 73, 114 }, - { 1, 51, 82 }, - { 1, 28, 45 } - }, { /* Coeff Band 3 */ - { 23, 163, 200 }, - { 10, 131, 185 }, - { 2, 93, 148 }, - { 1, 67, 111 }, - { 1, 41, 69 }, - { 1, 14, 24 } - }, { /* Coeff Band 4 */ - { 29, 176, 217 }, - { 12, 145, 201 }, - { 3, 101, 156 }, - { 1, 69, 111 }, - { 1, 39, 63 }, - { 1, 14, 23 } - }, { /* Coeff Band 5 */ - { 57, 192, 233 }, - { 25, 154, 215 }, - { 6, 109, 167 }, - { 3, 78, 118 }, - { 1, 48, 69 }, - { 1, 21, 29 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 202, 105, 245 }, - { 108, 106, 216 }, - { 18, 90, 144 } - }, { /* Coeff Band 1 */ - { 33, 172, 219 }, - { 64, 149, 206 }, - { 14, 117, 177 }, - { 5, 90, 141 }, - { 2, 61, 95 }, - { 1, 37, 57 } - }, { /* Coeff Band 2 */ - { 33, 179, 220 }, - { 11, 140, 198 }, - { 1, 89, 148 }, - { 1, 60, 104 }, - { 1, 33, 57 }, - { 1, 12, 21 } - }, { /* Coeff Band 3 */ - { 30, 181, 221 }, - { 8, 141, 198 }, - { 1, 87, 145 }, - { 1, 58, 100 }, - { 1, 31, 55 }, - { 1, 12, 20 } - }, { /* Coeff Band 4 */ - { 32, 186, 224 }, - { 7, 142, 198 }, - { 1, 86, 143 }, - { 1, 58, 100 }, - { 1, 31, 55 }, - { 1, 12, 22 } - }, { /* Coeff Band 5 */ - { 57, 192, 227 }, - { 20, 143, 204 }, - { 3, 96, 154 }, - { 1, 68, 112 }, - { 1, 42, 69 }, - { 1, 19, 32 } - } - } - }, { /* block Type 1 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 212, 35, 215 }, - { 113, 47, 169 }, - { 29, 48, 105 } - }, { /* Coeff Band 1 */ - { 74, 129, 203 }, - { 106, 120, 203 }, - { 49, 107, 178 }, - { 19, 84, 144 }, - { 4, 50, 84 }, - { 1, 15, 25 } - }, { /* Coeff Band 2 */ - { 71, 172, 217 }, - { 44, 141, 209 }, - { 15, 102, 173 }, - { 6, 76, 133 }, - { 2, 51, 89 }, - { 1, 24, 42 } - }, { /* Coeff Band 3 */ - { 64, 185, 231 }, - { 31, 148, 216 }, - { 8, 103, 175 }, - { 3, 74, 131 }, - { 1, 46, 81 }, - { 1, 18, 30 } - }, { /* Coeff Band 4 */ - { 65, 196, 235 }, - { 25, 157, 221 }, - { 5, 105, 174 }, - { 1, 67, 120 }, - { 1, 38, 69 }, - { 1, 15, 30 } - }, { /* Coeff Band 5 */ - { 65, 204, 238 }, - { 30, 156, 224 }, - { 7, 107, 177 }, - { 2, 70, 124 }, - { 1, 42, 73 }, - { 1, 18, 34 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 225, 86, 251 }, - { 144, 104, 235 }, - { 42, 99, 181 } - }, { /* Coeff Band 1 */ - { 85, 175, 239 }, - { 112, 165, 229 }, - { 29, 136, 200 }, - { 12, 103, 162 }, - { 6, 77, 123 }, - { 2, 53, 84 } - }, { /* Coeff Band 2 */ - { 75, 183, 239 }, - { 30, 155, 221 }, - { 3, 106, 171 }, - { 1, 74, 128 }, - { 1, 44, 76 }, - { 1, 17, 28 } - }, { /* Coeff Band 3 */ - { 73, 185, 240 }, - { 27, 159, 222 }, - { 2, 107, 172 }, - { 1, 75, 127 }, - { 1, 42, 73 }, - { 1, 17, 29 } - }, { /* Coeff Band 4 */ - { 62, 190, 238 }, - { 21, 159, 222 }, - { 2, 107, 172 }, - { 1, 72, 122 }, - { 1, 40, 71 }, - { 1, 18, 32 } - }, { /* Coeff Band 5 */ - { 61, 199, 240 }, - { 27, 161, 226 }, - { 4, 113, 180 }, - { 1, 76, 129 }, - { 1, 46, 80 }, - { 1, 23, 41 } - } - } - } -}; -static const vp9_coeff_probs_model default_coef_probs_16x16[BLOCK_TYPES] = { - { /* block Type 0 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 7, 27, 153 }, - { 5, 30, 95 }, - { 1, 16, 30 } - }, { /* Coeff Band 1 */ - { 50, 75, 127 }, - { 57, 75, 124 }, - { 27, 67, 108 }, - { 10, 54, 86 }, - { 1, 33, 52 }, - { 1, 12, 18 } - }, { /* Coeff Band 2 */ - { 43, 125, 151 }, - { 26, 108, 148 }, - { 7, 83, 122 }, - { 2, 59, 89 }, - { 1, 38, 60 }, - { 1, 17, 27 } - }, { /* Coeff Band 3 */ - { 23, 144, 163 }, - { 13, 112, 154 }, - { 2, 75, 117 }, - { 1, 50, 81 }, - { 1, 31, 51 }, - { 1, 14, 23 } - }, { /* Coeff Band 4 */ - { 18, 162, 185 }, - { 6, 123, 171 }, - { 1, 78, 125 }, - { 1, 51, 86 }, - { 1, 31, 54 }, - { 1, 14, 23 } - }, { /* Coeff Band 5 */ - { 15, 199, 227 }, - { 3, 150, 204 }, - { 1, 91, 146 }, - { 1, 55, 95 }, - { 1, 30, 53 }, - { 1, 11, 20 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 19, 55, 240 }, - { 19, 59, 196 }, - { 3, 52, 105 } - }, { /* Coeff Band 1 */ - { 41, 166, 207 }, - { 104, 153, 199 }, - { 31, 123, 181 }, - { 14, 101, 152 }, - { 5, 72, 106 }, - { 1, 36, 52 } - }, { /* Coeff Band 2 */ - { 35, 176, 211 }, - { 12, 131, 190 }, - { 2, 88, 144 }, - { 1, 60, 101 }, - { 1, 36, 60 }, - { 1, 16, 28 } - }, { /* Coeff Band 3 */ - { 28, 183, 213 }, - { 8, 134, 191 }, - { 1, 86, 142 }, - { 1, 56, 96 }, - { 1, 30, 53 }, - { 1, 12, 20 } - }, { /* Coeff Band 4 */ - { 20, 190, 215 }, - { 4, 135, 192 }, - { 1, 84, 139 }, - { 1, 53, 91 }, - { 1, 28, 49 }, - { 1, 11, 20 } - }, { /* Coeff Band 5 */ - { 13, 196, 216 }, - { 2, 137, 192 }, - { 1, 86, 143 }, - { 1, 57, 99 }, - { 1, 32, 56 }, - { 1, 13, 24 } - } - } - }, { /* block Type 1 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 211, 29, 217 }, - { 96, 47, 156 }, - { 22, 43, 87 } - }, { /* Coeff Band 1 */ - { 78, 120, 193 }, - { 111, 116, 186 }, - { 46, 102, 164 }, - { 15, 80, 128 }, - { 2, 49, 76 }, - { 1, 18, 28 } - }, { /* Coeff Band 2 */ - { 71, 161, 203 }, - { 42, 132, 192 }, - { 10, 98, 150 }, - { 3, 69, 109 }, - { 1, 44, 70 }, - { 1, 18, 29 } - }, { /* Coeff Band 3 */ - { 57, 186, 211 }, - { 30, 140, 196 }, - { 4, 93, 146 }, - { 1, 62, 102 }, - { 1, 38, 65 }, - { 1, 16, 27 } - }, { /* Coeff Band 4 */ - { 47, 199, 217 }, - { 14, 145, 196 }, - { 1, 88, 142 }, - { 1, 57, 98 }, - { 1, 36, 62 }, - { 1, 15, 26 } - }, { /* Coeff Band 5 */ - { 26, 219, 229 }, - { 5, 155, 207 }, - { 1, 94, 151 }, - { 1, 60, 104 }, - { 1, 36, 62 }, - { 1, 16, 28 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 233, 29, 248 }, - { 146, 47, 220 }, - { 43, 52, 140 } - }, { /* Coeff Band 1 */ - { 100, 163, 232 }, - { 179, 161, 222 }, - { 63, 142, 204 }, - { 37, 113, 174 }, - { 26, 89, 137 }, - { 18, 68, 97 } - }, { /* Coeff Band 2 */ - { 85, 181, 230 }, - { 32, 146, 209 }, - { 7, 100, 164 }, - { 3, 71, 121 }, - { 1, 45, 77 }, - { 1, 18, 30 } - }, { /* Coeff Band 3 */ - { 65, 187, 230 }, - { 20, 148, 207 }, - { 2, 97, 159 }, - { 1, 68, 116 }, - { 1, 40, 70 }, - { 1, 14, 29 } - }, { /* Coeff Band 4 */ - { 40, 194, 227 }, - { 8, 147, 204 }, - { 1, 94, 155 }, - { 1, 65, 112 }, - { 1, 39, 66 }, - { 1, 14, 26 } - }, { /* Coeff Band 5 */ - { 16, 208, 228 }, - { 3, 151, 207 }, - { 1, 98, 160 }, - { 1, 67, 117 }, - { 1, 41, 74 }, - { 1, 17, 31 } - } - } - } -}; -static const vp9_coeff_probs_model default_coef_probs_32x32[BLOCK_TYPES] = { - { /* block Type 0 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 17, 38, 140 }, - { 7, 34, 80 }, - { 1, 17, 29 } - }, { /* Coeff Band 1 */ - { 37, 75, 128 }, - { 41, 76, 128 }, - { 26, 66, 116 }, - { 12, 52, 94 }, - { 2, 32, 55 }, - { 1, 10, 16 } - }, { /* Coeff Band 2 */ - { 50, 127, 154 }, - { 37, 109, 152 }, - { 16, 82, 121 }, - { 5, 59, 85 }, - { 1, 35, 54 }, - { 1, 13, 20 } - }, { /* Coeff Band 3 */ - { 40, 142, 167 }, - { 17, 110, 157 }, - { 2, 71, 112 }, - { 1, 44, 72 }, - { 1, 27, 45 }, - { 1, 11, 17 } - }, { /* Coeff Band 4 */ - { 30, 175, 188 }, - { 9, 124, 169 }, - { 1, 74, 116 }, - { 1, 48, 78 }, - { 1, 30, 49 }, - { 1, 11, 18 } - }, { /* Coeff Band 5 */ - { 10, 222, 223 }, - { 2, 150, 194 }, - { 1, 83, 128 }, - { 1, 48, 79 }, - { 1, 27, 45 }, - { 1, 11, 17 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 36, 41, 235 }, - { 29, 36, 193 }, - { 10, 27, 111 } - }, { /* Coeff Band 1 */ - { 85, 165, 222 }, - { 177, 162, 215 }, - { 110, 135, 195 }, - { 57, 113, 168 }, - { 23, 83, 120 }, - { 10, 49, 61 } - }, { /* Coeff Band 2 */ - { 85, 190, 223 }, - { 36, 139, 200 }, - { 5, 90, 146 }, - { 1, 60, 103 }, - { 1, 38, 65 }, - { 1, 18, 30 } - }, { /* Coeff Band 3 */ - { 72, 202, 223 }, - { 23, 141, 199 }, - { 2, 86, 140 }, - { 1, 56, 97 }, - { 1, 36, 61 }, - { 1, 16, 27 } - }, { /* Coeff Band 4 */ - { 55, 218, 225 }, - { 13, 145, 200 }, - { 1, 86, 141 }, - { 1, 57, 99 }, - { 1, 35, 61 }, - { 1, 13, 22 } - }, { /* Coeff Band 5 */ - { 15, 235, 212 }, - { 1, 132, 184 }, - { 1, 84, 139 }, - { 1, 57, 97 }, - { 1, 34, 56 }, - { 1, 14, 23 } - } - } - }, { /* block Type 1 */ - { /* Intra */ - { /* Coeff Band 0 */ - { 181, 21, 201 }, - { 61, 37, 123 }, - { 10, 38, 71 } - }, { /* Coeff Band 1 */ - { 47, 106, 172 }, - { 95, 104, 173 }, - { 42, 93, 159 }, - { 18, 77, 131 }, - { 4, 50, 81 }, - { 1, 17, 23 } - }, { /* Coeff Band 2 */ - { 62, 147, 199 }, - { 44, 130, 189 }, - { 28, 102, 154 }, - { 18, 75, 115 }, - { 2, 44, 65 }, - { 1, 12, 19 } - }, { /* Coeff Band 3 */ - { 55, 153, 210 }, - { 24, 130, 194 }, - { 3, 93, 146 }, - { 1, 61, 97 }, - { 1, 31, 50 }, - { 1, 10, 16 } - }, { /* Coeff Band 4 */ - { 49, 186, 223 }, - { 17, 148, 204 }, - { 1, 96, 142 }, - { 1, 53, 83 }, - { 1, 26, 44 }, - { 1, 11, 17 } - }, { /* Coeff Band 5 */ - { 13, 217, 212 }, - { 2, 136, 180 }, - { 1, 78, 124 }, - { 1, 50, 83 }, - { 1, 29, 49 }, - { 1, 14, 23 } - } - }, { /* Inter */ - { /* Coeff Band 0 */ - { 197, 13, 247 }, - { 82, 17, 222 }, - { 25, 17, 162 } - }, { /* Coeff Band 1 */ - { 126, 186, 247 }, - { 234, 191, 243 }, - { 176, 177, 234 }, - { 104, 158, 220 }, - { 66, 128, 186 }, - { 55, 90, 137 } - }, { /* Coeff Band 2 */ - { 111, 197, 242 }, - { 46, 158, 219 }, - { 9, 104, 171 }, - { 2, 65, 125 }, - { 1, 44, 80 }, - { 1, 17, 91 } - }, { /* Coeff Band 3 */ - { 104, 208, 245 }, - { 39, 168, 224 }, - { 3, 109, 162 }, - { 1, 79, 124 }, - { 1, 50, 102 }, - { 1, 43, 102 } - }, { /* Coeff Band 4 */ - { 84, 220, 246 }, - { 31, 177, 231 }, - { 2, 115, 180 }, - { 1, 79, 134 }, - { 1, 55, 77 }, - { 1, 60, 79 } - }, { /* Coeff Band 5 */ - { 43, 243, 240 }, - { 8, 180, 217 }, - { 1, 115, 166 }, - { 1, 84, 121 }, - { 1, 51, 67 }, - { 1, 16, 6 } - } - } - } -}; - -#endif // VP9_COMMON_DEFAULT_COEF_PROBS_H_ diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index b35c43fcd..285a9c262 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -35,8 +35,7 @@ DECLARE_ALIGNED(16, const uint8_t, vp9_norm[256]) = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -DECLARE_ALIGNED(16, const uint8_t, - vp9_coefband_trans_8x8plus[1024]) = { +DECLARE_ALIGNED(16, const uint8_t, vp9_coefband_trans_8x8plus[1024]) = { 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, // beyond MAXBAND_INDEX+1 all values are filled as 5 @@ -109,13 +108,13 @@ DECLARE_ALIGNED(16, const uint8_t, vp9_coefband_trans_4x4[16]) = { 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, }; -DECLARE_ALIGNED(16, const uint8_t, vp9_pt_energy_class[MAX_ENTROPY_TOKENS]) = { +DECLARE_ALIGNED(16, const uint8_t, vp9_pt_energy_class[ENTROPY_TOKENS]) = { 0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5 }; -const vp9_tree_index vp9_coefmodel_tree[6] = { - -DCT_EOB_MODEL_TOKEN, 2, /* 0 = EOB */ - -ZERO_TOKEN, 4, /* 1 = ZERO */ +const vp9_tree_index vp9_coefmodel_tree[TREE_SIZE(UNCONSTRAINED_NODES + 1)] = { + -EOB_MODEL_TOKEN, 2, + -ZERO_TOKEN, 4, -ONE_TOKEN, -TWO_TOKEN, }; @@ -131,265 +130,601 @@ const vp9_tree_index vp9_coefmodel_tree[6] = { // Every odd line in this table can be generated from the even lines // by averaging : -// vp9_pareto8_full[l][node] = ( vp9_pareto8_full[l-1][node] + -// vp9_pareto8_full[l+1][node] ) >> 1; -const vp9_prob vp9_pareto8_full[256][MODEL_NODES] = { - { 3, 86, 128, 6, 86, 23, 88, 29}, - { 6, 86, 128, 11, 87, 42, 91, 52}, - { 9, 86, 129, 17, 88, 61, 94, 76}, - { 12, 86, 129, 22, 88, 77, 97, 93}, - { 15, 87, 129, 28, 89, 93, 100, 110}, - { 17, 87, 129, 33, 90, 105, 103, 123}, - { 20, 88, 130, 38, 91, 118, 106, 136}, - { 23, 88, 130, 43, 91, 128, 108, 146}, - { 26, 89, 131, 48, 92, 139, 111, 156}, - { 28, 89, 131, 53, 93, 147, 114, 163}, - { 31, 90, 131, 58, 94, 156, 117, 171}, - { 34, 90, 131, 62, 94, 163, 119, 177}, - { 37, 90, 132, 66, 95, 171, 122, 184}, - { 39, 90, 132, 70, 96, 177, 124, 189}, - { 42, 91, 132, 75, 97, 183, 127, 194}, - { 44, 91, 132, 79, 97, 188, 129, 198}, - { 47, 92, 133, 83, 98, 193, 132, 202}, - { 49, 92, 133, 86, 99, 197, 134, 205}, - { 52, 93, 133, 90, 100, 201, 137, 208}, - { 54, 93, 133, 94, 100, 204, 139, 211}, - { 57, 94, 134, 98, 101, 208, 142, 214}, - { 59, 94, 134, 101, 102, 211, 144, 216}, - { 62, 94, 135, 105, 103, 214, 146, 218}, - { 64, 94, 135, 108, 103, 216, 148, 220}, - { 66, 95, 135, 111, 104, 219, 151, 222}, - { 68, 95, 135, 114, 105, 221, 153, 223}, - { 71, 96, 136, 117, 106, 224, 155, 225}, - { 73, 96, 136, 120, 106, 225, 157, 226}, - { 76, 97, 136, 123, 107, 227, 159, 228}, - { 78, 97, 136, 126, 108, 229, 160, 229}, - { 80, 98, 137, 129, 109, 231, 162, 231}, - { 82, 98, 137, 131, 109, 232, 164, 232}, - { 84, 98, 138, 134, 110, 234, 166, 233}, - { 86, 98, 138, 137, 111, 235, 168, 234}, - { 89, 99, 138, 140, 112, 236, 170, 235}, - { 91, 99, 138, 142, 112, 237, 171, 235}, - { 93, 100, 139, 145, 113, 238, 173, 236}, - { 95, 100, 139, 147, 114, 239, 174, 237}, - { 97, 101, 140, 149, 115, 240, 176, 238}, - { 99, 101, 140, 151, 115, 241, 177, 238}, - {101, 102, 140, 154, 116, 242, 179, 239}, - {103, 102, 140, 156, 117, 242, 180, 239}, - {105, 103, 141, 158, 118, 243, 182, 240}, - {107, 103, 141, 160, 118, 243, 183, 240}, - {109, 104, 141, 162, 119, 244, 185, 241}, - {111, 104, 141, 164, 119, 244, 186, 241}, - {113, 104, 142, 166, 120, 245, 187, 242}, - {114, 104, 142, 168, 121, 245, 188, 242}, - {116, 105, 143, 170, 122, 246, 190, 243}, - {118, 105, 143, 171, 122, 246, 191, 243}, - {120, 106, 143, 173, 123, 247, 192, 244}, - {121, 106, 143, 175, 124, 247, 193, 244}, - {123, 107, 144, 177, 125, 248, 195, 244}, - {125, 107, 144, 178, 125, 248, 196, 244}, - {127, 108, 145, 180, 126, 249, 197, 245}, - {128, 108, 145, 181, 127, 249, 198, 245}, - {130, 109, 145, 183, 128, 249, 199, 245}, - {132, 109, 145, 184, 128, 249, 200, 245}, - {134, 110, 146, 186, 129, 250, 201, 246}, - {135, 110, 146, 187, 130, 250, 202, 246}, - {137, 111, 147, 189, 131, 251, 203, 246}, - {138, 111, 147, 190, 131, 251, 204, 246}, - {140, 112, 147, 192, 132, 251, 205, 247}, - {141, 112, 147, 193, 132, 251, 206, 247}, - {143, 113, 148, 194, 133, 251, 207, 247}, - {144, 113, 148, 195, 134, 251, 207, 247}, - {146, 114, 149, 197, 135, 252, 208, 248}, - {147, 114, 149, 198, 135, 252, 209, 248}, - {149, 115, 149, 199, 136, 252, 210, 248}, - {150, 115, 149, 200, 137, 252, 210, 248}, - {152, 115, 150, 201, 138, 252, 211, 248}, - {153, 115, 150, 202, 138, 252, 212, 248}, - {155, 116, 151, 204, 139, 253, 213, 249}, - {156, 116, 151, 205, 139, 253, 213, 249}, - {158, 117, 151, 206, 140, 253, 214, 249}, - {159, 117, 151, 207, 141, 253, 215, 249}, - {161, 118, 152, 208, 142, 253, 216, 249}, - {162, 118, 152, 209, 142, 253, 216, 249}, - {163, 119, 153, 210, 143, 253, 217, 249}, - {164, 119, 153, 211, 143, 253, 217, 249}, - {166, 120, 153, 212, 144, 254, 218, 250}, - {167, 120, 153, 212, 145, 254, 219, 250}, - {168, 121, 154, 213, 146, 254, 220, 250}, - {169, 121, 154, 214, 146, 254, 220, 250}, - {171, 122, 155, 215, 147, 254, 221, 250}, - {172, 122, 155, 216, 147, 254, 221, 250}, - {173, 123, 155, 217, 148, 254, 222, 250}, - {174, 123, 155, 217, 149, 254, 222, 250}, - {176, 124, 156, 218, 150, 254, 223, 250}, - {177, 124, 156, 219, 150, 254, 223, 250}, - {178, 125, 157, 220, 151, 254, 224, 251}, - {179, 125, 157, 220, 151, 254, 224, 251}, - {180, 126, 157, 221, 152, 254, 225, 251}, - {181, 126, 157, 221, 152, 254, 225, 251}, - {183, 127, 158, 222, 153, 254, 226, 251}, - {184, 127, 158, 223, 154, 254, 226, 251}, - {185, 128, 159, 224, 155, 255, 227, 251}, - {186, 128, 159, 224, 155, 255, 227, 251}, - {187, 129, 160, 225, 156, 255, 228, 251}, - {188, 130, 160, 225, 156, 255, 228, 251}, - {189, 131, 160, 226, 157, 255, 228, 251}, - {190, 131, 160, 226, 158, 255, 228, 251}, - {191, 132, 161, 227, 159, 255, 229, 251}, - {192, 132, 161, 227, 159, 255, 229, 251}, - {193, 133, 162, 228, 160, 255, 230, 252}, - {194, 133, 162, 229, 160, 255, 230, 252}, - {195, 134, 163, 230, 161, 255, 231, 252}, - {196, 134, 163, 230, 161, 255, 231, 252}, - {197, 135, 163, 231, 162, 255, 231, 252}, - {198, 135, 163, 231, 162, 255, 231, 252}, - {199, 136, 164, 232, 163, 255, 232, 252}, - {200, 136, 164, 232, 164, 255, 232, 252}, - {201, 137, 165, 233, 165, 255, 233, 252}, - {201, 137, 165, 233, 165, 255, 233, 252}, - {202, 138, 166, 233, 166, 255, 233, 252}, - {203, 138, 166, 233, 166, 255, 233, 252}, - {204, 139, 166, 234, 167, 255, 234, 252}, - {205, 139, 166, 234, 167, 255, 234, 252}, - {206, 140, 167, 235, 168, 255, 235, 252}, - {206, 140, 167, 235, 168, 255, 235, 252}, - {207, 141, 168, 236, 169, 255, 235, 252}, - {208, 141, 168, 236, 170, 255, 235, 252}, - {209, 142, 169, 237, 171, 255, 236, 252}, - {209, 143, 169, 237, 171, 255, 236, 252}, - {210, 144, 169, 237, 172, 255, 236, 252}, - {211, 144, 169, 237, 172, 255, 236, 252}, - {212, 145, 170, 238, 173, 255, 237, 252}, - {213, 145, 170, 238, 173, 255, 237, 252}, - {214, 146, 171, 239, 174, 255, 237, 253}, - {214, 146, 171, 239, 174, 255, 237, 253}, - {215, 147, 172, 240, 175, 255, 238, 253}, - {215, 147, 172, 240, 175, 255, 238, 253}, - {216, 148, 173, 240, 176, 255, 238, 253}, - {217, 148, 173, 240, 176, 255, 238, 253}, - {218, 149, 173, 241, 177, 255, 239, 253}, - {218, 149, 173, 241, 178, 255, 239, 253}, - {219, 150, 174, 241, 179, 255, 239, 253}, - {219, 151, 174, 241, 179, 255, 239, 253}, - {220, 152, 175, 242, 180, 255, 240, 253}, - {221, 152, 175, 242, 180, 255, 240, 253}, - {222, 153, 176, 242, 181, 255, 240, 253}, - {222, 153, 176, 242, 181, 255, 240, 253}, - {223, 154, 177, 243, 182, 255, 240, 253}, - {223, 154, 177, 243, 182, 255, 240, 253}, - {224, 155, 178, 244, 183, 255, 241, 253}, - {224, 155, 178, 244, 183, 255, 241, 253}, - {225, 156, 178, 244, 184, 255, 241, 253}, - {225, 157, 178, 244, 184, 255, 241, 253}, - {226, 158, 179, 244, 185, 255, 242, 253}, - {227, 158, 179, 244, 185, 255, 242, 253}, - {228, 159, 180, 245, 186, 255, 242, 253}, - {228, 159, 180, 245, 186, 255, 242, 253}, - {229, 160, 181, 245, 187, 255, 242, 253}, - {229, 160, 181, 245, 187, 255, 242, 253}, - {230, 161, 182, 246, 188, 255, 243, 253}, - {230, 162, 182, 246, 188, 255, 243, 253}, - {231, 163, 183, 246, 189, 255, 243, 253}, - {231, 163, 183, 246, 189, 255, 243, 253}, - {232, 164, 184, 247, 190, 255, 243, 253}, - {232, 164, 184, 247, 190, 255, 243, 253}, - {233, 165, 185, 247, 191, 255, 244, 253}, - {233, 165, 185, 247, 191, 255, 244, 253}, - {234, 166, 185, 247, 192, 255, 244, 253}, - {234, 167, 185, 247, 192, 255, 244, 253}, - {235, 168, 186, 248, 193, 255, 244, 253}, - {235, 168, 186, 248, 193, 255, 244, 253}, - {236, 169, 187, 248, 194, 255, 244, 253}, - {236, 169, 187, 248, 194, 255, 244, 253}, - {236, 170, 188, 248, 195, 255, 245, 253}, - {236, 170, 188, 248, 195, 255, 245, 253}, - {237, 171, 189, 249, 196, 255, 245, 254}, - {237, 172, 189, 249, 196, 255, 245, 254}, - {238, 173, 190, 249, 197, 255, 245, 254}, - {238, 173, 190, 249, 197, 255, 245, 254}, - {239, 174, 191, 249, 198, 255, 245, 254}, - {239, 174, 191, 249, 198, 255, 245, 254}, - {240, 175, 192, 249, 199, 255, 246, 254}, - {240, 176, 192, 249, 199, 255, 246, 254}, - {240, 177, 193, 250, 200, 255, 246, 254}, - {240, 177, 193, 250, 200, 255, 246, 254}, - {241, 178, 194, 250, 201, 255, 246, 254}, - {241, 178, 194, 250, 201, 255, 246, 254}, - {242, 179, 195, 250, 202, 255, 246, 254}, - {242, 180, 195, 250, 202, 255, 246, 254}, - {242, 181, 196, 250, 203, 255, 247, 254}, - {242, 181, 196, 250, 203, 255, 247, 254}, - {243, 182, 197, 251, 204, 255, 247, 254}, - {243, 183, 197, 251, 204, 255, 247, 254}, - {244, 184, 198, 251, 205, 255, 247, 254}, - {244, 184, 198, 251, 205, 255, 247, 254}, - {244, 185, 199, 251, 206, 255, 247, 254}, - {244, 185, 199, 251, 206, 255, 247, 254}, - {245, 186, 200, 251, 207, 255, 247, 254}, - {245, 187, 200, 251, 207, 255, 247, 254}, - {246, 188, 201, 252, 207, 255, 248, 254}, - {246, 188, 201, 252, 207, 255, 248, 254}, - {246, 189, 202, 252, 208, 255, 248, 254}, - {246, 190, 202, 252, 208, 255, 248, 254}, - {247, 191, 203, 252, 209, 255, 248, 254}, - {247, 191, 203, 252, 209, 255, 248, 254}, - {247, 192, 204, 252, 210, 255, 248, 254}, - {247, 193, 204, 252, 210, 255, 248, 254}, - {248, 194, 205, 252, 211, 255, 248, 254}, - {248, 194, 205, 252, 211, 255, 248, 254}, - {248, 195, 206, 252, 212, 255, 249, 254}, - {248, 196, 206, 252, 212, 255, 249, 254}, - {249, 197, 207, 253, 213, 255, 249, 254}, - {249, 197, 207, 253, 213, 255, 249, 254}, - {249, 198, 208, 253, 214, 255, 249, 254}, - {249, 199, 209, 253, 214, 255, 249, 254}, - {250, 200, 210, 253, 215, 255, 249, 254}, - {250, 200, 210, 253, 215, 255, 249, 254}, - {250, 201, 211, 253, 215, 255, 249, 254}, - {250, 202, 211, 253, 215, 255, 249, 254}, - {250, 203, 212, 253, 216, 255, 249, 254}, - {250, 203, 212, 253, 216, 255, 249, 254}, - {251, 204, 213, 253, 217, 255, 250, 254}, - {251, 205, 213, 253, 217, 255, 250, 254}, - {251, 206, 214, 254, 218, 255, 250, 254}, - {251, 206, 215, 254, 218, 255, 250, 254}, - {252, 207, 216, 254, 219, 255, 250, 254}, - {252, 208, 216, 254, 219, 255, 250, 254}, - {252, 209, 217, 254, 220, 255, 250, 254}, - {252, 210, 217, 254, 220, 255, 250, 254}, - {252, 211, 218, 254, 221, 255, 250, 254}, - {252, 212, 218, 254, 221, 255, 250, 254}, - {253, 213, 219, 254, 222, 255, 250, 254}, - {253, 213, 220, 254, 222, 255, 250, 254}, - {253, 214, 221, 254, 223, 255, 250, 254}, - {253, 215, 221, 254, 223, 255, 250, 254}, - {253, 216, 222, 254, 224, 255, 251, 254}, - {253, 217, 223, 254, 224, 255, 251, 254}, - {253, 218, 224, 254, 225, 255, 251, 254}, - {253, 219, 224, 254, 225, 255, 251, 254}, - {254, 220, 225, 254, 225, 255, 251, 254}, - {254, 221, 226, 254, 225, 255, 251, 254}, - {254, 222, 227, 255, 226, 255, 251, 254}, - {254, 223, 227, 255, 226, 255, 251, 254}, - {254, 224, 228, 255, 227, 255, 251, 254}, - {254, 225, 229, 255, 227, 255, 251, 254}, - {254, 226, 230, 255, 228, 255, 251, 254}, - {254, 227, 230, 255, 229, 255, 251, 254}, - {255, 228, 231, 255, 230, 255, 251, 254}, - {255, 229, 232, 255, 230, 255, 251, 254}, - {255, 230, 233, 255, 231, 255, 252, 254}, - {255, 231, 234, 255, 231, 255, 252, 254}, - {255, 232, 235, 255, 232, 255, 252, 254}, - {255, 233, 236, 255, 232, 255, 252, 254}, - {255, 235, 237, 255, 233, 255, 252, 254}, - {255, 236, 238, 255, 234, 255, 252, 254}, - {255, 238, 240, 255, 235, 255, 252, 255}, - {255, 239, 241, 255, 235, 255, 252, 254}, - {255, 241, 243, 255, 236, 255, 252, 254}, - {255, 243, 245, 255, 237, 255, 252, 254}, - {255, 246, 247, 255, 239, 255, 253, 255}, - {255, 246, 247, 255, 239, 255, 253, 255}, +// vp9_pareto8_full[l][node] = (vp9_pareto8_full[l-1][node] + +// vp9_pareto8_full[l+1][node] ) >> 1; +const vp9_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES] = { + { 3, 86, 128, 6, 86, 23, 88, 29}, + { 6, 86, 128, 11, 87, 42, 91, 52}, + { 9, 86, 129, 17, 88, 61, 94, 76}, + { 12, 86, 129, 22, 88, 77, 97, 93}, + { 15, 87, 129, 28, 89, 93, 100, 110}, + { 17, 87, 129, 33, 90, 105, 103, 123}, + { 20, 88, 130, 38, 91, 118, 106, 136}, + { 23, 88, 130, 43, 91, 128, 108, 146}, + { 26, 89, 131, 48, 92, 139, 111, 156}, + { 28, 89, 131, 53, 93, 147, 114, 163}, + { 31, 90, 131, 58, 94, 156, 117, 171}, + { 34, 90, 131, 62, 94, 163, 119, 177}, + { 37, 90, 132, 66, 95, 171, 122, 184}, + { 39, 90, 132, 70, 96, 177, 124, 189}, + { 42, 91, 132, 75, 97, 183, 127, 194}, + { 44, 91, 132, 79, 97, 188, 129, 198}, + { 47, 92, 133, 83, 98, 193, 132, 202}, + { 49, 92, 133, 86, 99, 197, 134, 205}, + { 52, 93, 133, 90, 100, 201, 137, 208}, + { 54, 93, 133, 94, 100, 204, 139, 211}, + { 57, 94, 134, 98, 101, 208, 142, 214}, + { 59, 94, 134, 101, 102, 211, 144, 216}, + { 62, 94, 135, 105, 103, 214, 146, 218}, + { 64, 94, 135, 108, 103, 216, 148, 220}, + { 66, 95, 135, 111, 104, 219, 151, 222}, + { 68, 95, 135, 114, 105, 221, 153, 223}, + { 71, 96, 136, 117, 106, 224, 155, 225}, + { 73, 96, 136, 120, 106, 225, 157, 226}, + { 76, 97, 136, 123, 107, 227, 159, 228}, + { 78, 97, 136, 126, 108, 229, 160, 229}, + { 80, 98, 137, 129, 109, 231, 162, 231}, + { 82, 98, 137, 131, 109, 232, 164, 232}, + { 84, 98, 138, 134, 110, 234, 166, 233}, + { 86, 98, 138, 137, 111, 235, 168, 234}, + { 89, 99, 138, 140, 112, 236, 170, 235}, + { 91, 99, 138, 142, 112, 237, 171, 235}, + { 93, 100, 139, 145, 113, 238, 173, 236}, + { 95, 100, 139, 147, 114, 239, 174, 237}, + { 97, 101, 140, 149, 115, 240, 176, 238}, + { 99, 101, 140, 151, 115, 241, 177, 238}, + {101, 102, 140, 154, 116, 242, 179, 239}, + {103, 102, 140, 156, 117, 242, 180, 239}, + {105, 103, 141, 158, 118, 243, 182, 240}, + {107, 103, 141, 160, 118, 243, 183, 240}, + {109, 104, 141, 162, 119, 244, 185, 241}, + {111, 104, 141, 164, 119, 244, 186, 241}, + {113, 104, 142, 166, 120, 245, 187, 242}, + {114, 104, 142, 168, 121, 245, 188, 242}, + {116, 105, 143, 170, 122, 246, 190, 243}, + {118, 105, 143, 171, 122, 246, 191, 243}, + {120, 106, 143, 173, 123, 247, 192, 244}, + {121, 106, 143, 175, 124, 247, 193, 244}, + {123, 107, 144, 177, 125, 248, 195, 244}, + {125, 107, 144, 178, 125, 248, 196, 244}, + {127, 108, 145, 180, 126, 249, 197, 245}, + {128, 108, 145, 181, 127, 249, 198, 245}, + {130, 109, 145, 183, 128, 249, 199, 245}, + {132, 109, 145, 184, 128, 249, 200, 245}, + {134, 110, 146, 186, 129, 250, 201, 246}, + {135, 110, 146, 187, 130, 250, 202, 246}, + {137, 111, 147, 189, 131, 251, 203, 246}, + {138, 111, 147, 190, 131, 251, 204, 246}, + {140, 112, 147, 192, 132, 251, 205, 247}, + {141, 112, 147, 193, 132, 251, 206, 247}, + {143, 113, 148, 194, 133, 251, 207, 247}, + {144, 113, 148, 195, 134, 251, 207, 247}, + {146, 114, 149, 197, 135, 252, 208, 248}, + {147, 114, 149, 198, 135, 252, 209, 248}, + {149, 115, 149, 199, 136, 252, 210, 248}, + {150, 115, 149, 200, 137, 252, 210, 248}, + {152, 115, 150, 201, 138, 252, 211, 248}, + {153, 115, 150, 202, 138, 252, 212, 248}, + {155, 116, 151, 204, 139, 253, 213, 249}, + {156, 116, 151, 205, 139, 253, 213, 249}, + {158, 117, 151, 206, 140, 253, 214, 249}, + {159, 117, 151, 207, 141, 253, 215, 249}, + {161, 118, 152, 208, 142, 253, 216, 249}, + {162, 118, 152, 209, 142, 253, 216, 249}, + {163, 119, 153, 210, 143, 253, 217, 249}, + {164, 119, 153, 211, 143, 253, 217, 249}, + {166, 120, 153, 212, 144, 254, 218, 250}, + {167, 120, 153, 212, 145, 254, 219, 250}, + {168, 121, 154, 213, 146, 254, 220, 250}, + {169, 121, 154, 214, 146, 254, 220, 250}, + {171, 122, 155, 215, 147, 254, 221, 250}, + {172, 122, 155, 216, 147, 254, 221, 250}, + {173, 123, 155, 217, 148, 254, 222, 250}, + {174, 123, 155, 217, 149, 254, 222, 250}, + {176, 124, 156, 218, 150, 254, 223, 250}, + {177, 124, 156, 219, 150, 254, 223, 250}, + {178, 125, 157, 220, 151, 254, 224, 251}, + {179, 125, 157, 220, 151, 254, 224, 251}, + {180, 126, 157, 221, 152, 254, 225, 251}, + {181, 126, 157, 221, 152, 254, 225, 251}, + {183, 127, 158, 222, 153, 254, 226, 251}, + {184, 127, 158, 223, 154, 254, 226, 251}, + {185, 128, 159, 224, 155, 255, 227, 251}, + {186, 128, 159, 224, 155, 255, 227, 251}, + {187, 129, 160, 225, 156, 255, 228, 251}, + {188, 130, 160, 225, 156, 255, 228, 251}, + {189, 131, 160, 226, 157, 255, 228, 251}, + {190, 131, 160, 226, 158, 255, 228, 251}, + {191, 132, 161, 227, 159, 255, 229, 251}, + {192, 132, 161, 227, 159, 255, 229, 251}, + {193, 133, 162, 228, 160, 255, 230, 252}, + {194, 133, 162, 229, 160, 255, 230, 252}, + {195, 134, 163, 230, 161, 255, 231, 252}, + {196, 134, 163, 230, 161, 255, 231, 252}, + {197, 135, 163, 231, 162, 255, 231, 252}, + {198, 135, 163, 231, 162, 255, 231, 252}, + {199, 136, 164, 232, 163, 255, 232, 252}, + {200, 136, 164, 232, 164, 255, 232, 252}, + {201, 137, 165, 233, 165, 255, 233, 252}, + {201, 137, 165, 233, 165, 255, 233, 252}, + {202, 138, 166, 233, 166, 255, 233, 252}, + {203, 138, 166, 233, 166, 255, 233, 252}, + {204, 139, 166, 234, 167, 255, 234, 252}, + {205, 139, 166, 234, 167, 255, 234, 252}, + {206, 140, 167, 235, 168, 255, 235, 252}, + {206, 140, 167, 235, 168, 255, 235, 252}, + {207, 141, 168, 236, 169, 255, 235, 252}, + {208, 141, 168, 236, 170, 255, 235, 252}, + {209, 142, 169, 237, 171, 255, 236, 252}, + {209, 143, 169, 237, 171, 255, 236, 252}, + {210, 144, 169, 237, 172, 255, 236, 252}, + {211, 144, 169, 237, 172, 255, 236, 252}, + {212, 145, 170, 238, 173, 255, 237, 252}, + {213, 145, 170, 238, 173, 255, 237, 252}, + {214, 146, 171, 239, 174, 255, 237, 253}, + {214, 146, 171, 239, 174, 255, 237, 253}, + {215, 147, 172, 240, 175, 255, 238, 253}, + {215, 147, 172, 240, 175, 255, 238, 253}, + {216, 148, 173, 240, 176, 255, 238, 253}, + {217, 148, 173, 240, 176, 255, 238, 253}, + {218, 149, 173, 241, 177, 255, 239, 253}, + {218, 149, 173, 241, 178, 255, 239, 253}, + {219, 150, 174, 241, 179, 255, 239, 253}, + {219, 151, 174, 241, 179, 255, 239, 253}, + {220, 152, 175, 242, 180, 255, 240, 253}, + {221, 152, 175, 242, 180, 255, 240, 253}, + {222, 153, 176, 242, 181, 255, 240, 253}, + {222, 153, 176, 242, 181, 255, 240, 253}, + {223, 154, 177, 243, 182, 255, 240, 253}, + {223, 154, 177, 243, 182, 255, 240, 253}, + {224, 155, 178, 244, 183, 255, 241, 253}, + {224, 155, 178, 244, 183, 255, 241, 253}, + {225, 156, 178, 244, 184, 255, 241, 253}, + {225, 157, 178, 244, 184, 255, 241, 253}, + {226, 158, 179, 244, 185, 255, 242, 253}, + {227, 158, 179, 244, 185, 255, 242, 253}, + {228, 159, 180, 245, 186, 255, 242, 253}, + {228, 159, 180, 245, 186, 255, 242, 253}, + {229, 160, 181, 245, 187, 255, 242, 253}, + {229, 160, 181, 245, 187, 255, 242, 253}, + {230, 161, 182, 246, 188, 255, 243, 253}, + {230, 162, 182, 246, 188, 255, 243, 253}, + {231, 163, 183, 246, 189, 255, 243, 253}, + {231, 163, 183, 246, 189, 255, 243, 253}, + {232, 164, 184, 247, 190, 255, 243, 253}, + {232, 164, 184, 247, 190, 255, 243, 253}, + {233, 165, 185, 247, 191, 255, 244, 253}, + {233, 165, 185, 247, 191, 255, 244, 253}, + {234, 166, 185, 247, 192, 255, 244, 253}, + {234, 167, 185, 247, 192, 255, 244, 253}, + {235, 168, 186, 248, 193, 255, 244, 253}, + {235, 168, 186, 248, 193, 255, 244, 253}, + {236, 169, 187, 248, 194, 255, 244, 253}, + {236, 169, 187, 248, 194, 255, 244, 253}, + {236, 170, 188, 248, 195, 255, 245, 253}, + {236, 170, 188, 248, 195, 255, 245, 253}, + {237, 171, 189, 249, 196, 255, 245, 254}, + {237, 172, 189, 249, 196, 255, 245, 254}, + {238, 173, 190, 249, 197, 255, 245, 254}, + {238, 173, 190, 249, 197, 255, 245, 254}, + {239, 174, 191, 249, 198, 255, 245, 254}, + {239, 174, 191, 249, 198, 255, 245, 254}, + {240, 175, 192, 249, 199, 255, 246, 254}, + {240, 176, 192, 249, 199, 255, 246, 254}, + {240, 177, 193, 250, 200, 255, 246, 254}, + {240, 177, 193, 250, 200, 255, 246, 254}, + {241, 178, 194, 250, 201, 255, 246, 254}, + {241, 178, 194, 250, 201, 255, 246, 254}, + {242, 179, 195, 250, 202, 255, 246, 254}, + {242, 180, 195, 250, 202, 255, 246, 254}, + {242, 181, 196, 250, 203, 255, 247, 254}, + {242, 181, 196, 250, 203, 255, 247, 254}, + {243, 182, 197, 251, 204, 255, 247, 254}, + {243, 183, 197, 251, 204, 255, 247, 254}, + {244, 184, 198, 251, 205, 255, 247, 254}, + {244, 184, 198, 251, 205, 255, 247, 254}, + {244, 185, 199, 251, 206, 255, 247, 254}, + {244, 185, 199, 251, 206, 255, 247, 254}, + {245, 186, 200, 251, 207, 255, 247, 254}, + {245, 187, 200, 251, 207, 255, 247, 254}, + {246, 188, 201, 252, 207, 255, 248, 254}, + {246, 188, 201, 252, 207, 255, 248, 254}, + {246, 189, 202, 252, 208, 255, 248, 254}, + {246, 190, 202, 252, 208, 255, 248, 254}, + {247, 191, 203, 252, 209, 255, 248, 254}, + {247, 191, 203, 252, 209, 255, 248, 254}, + {247, 192, 204, 252, 210, 255, 248, 254}, + {247, 193, 204, 252, 210, 255, 248, 254}, + {248, 194, 205, 252, 211, 255, 248, 254}, + {248, 194, 205, 252, 211, 255, 248, 254}, + {248, 195, 206, 252, 212, 255, 249, 254}, + {248, 196, 206, 252, 212, 255, 249, 254}, + {249, 197, 207, 253, 213, 255, 249, 254}, + {249, 197, 207, 253, 213, 255, 249, 254}, + {249, 198, 208, 253, 214, 255, 249, 254}, + {249, 199, 209, 253, 214, 255, 249, 254}, + {250, 200, 210, 253, 215, 255, 249, 254}, + {250, 200, 210, 253, 215, 255, 249, 254}, + {250, 201, 211, 253, 215, 255, 249, 254}, + {250, 202, 211, 253, 215, 255, 249, 254}, + {250, 203, 212, 253, 216, 255, 249, 254}, + {250, 203, 212, 253, 216, 255, 249, 254}, + {251, 204, 213, 253, 217, 255, 250, 254}, + {251, 205, 213, 253, 217, 255, 250, 254}, + {251, 206, 214, 254, 218, 255, 250, 254}, + {251, 206, 215, 254, 218, 255, 250, 254}, + {252, 207, 216, 254, 219, 255, 250, 254}, + {252, 208, 216, 254, 219, 255, 250, 254}, + {252, 209, 217, 254, 220, 255, 250, 254}, + {252, 210, 217, 254, 220, 255, 250, 254}, + {252, 211, 218, 254, 221, 255, 250, 254}, + {252, 212, 218, 254, 221, 255, 250, 254}, + {253, 213, 219, 254, 222, 255, 250, 254}, + {253, 213, 220, 254, 222, 255, 250, 254}, + {253, 214, 221, 254, 223, 255, 250, 254}, + {253, 215, 221, 254, 223, 255, 250, 254}, + {253, 216, 222, 254, 224, 255, 251, 254}, + {253, 217, 223, 254, 224, 255, 251, 254}, + {253, 218, 224, 254, 225, 255, 251, 254}, + {253, 219, 224, 254, 225, 255, 251, 254}, + {254, 220, 225, 254, 225, 255, 251, 254}, + {254, 221, 226, 254, 225, 255, 251, 254}, + {254, 222, 227, 255, 226, 255, 251, 254}, + {254, 223, 227, 255, 226, 255, 251, 254}, + {254, 224, 228, 255, 227, 255, 251, 254}, + {254, 225, 229, 255, 227, 255, 251, 254}, + {254, 226, 230, 255, 228, 255, 251, 254}, + {254, 227, 230, 255, 229, 255, 251, 254}, + {255, 228, 231, 255, 230, 255, 251, 254}, + {255, 229, 232, 255, 230, 255, 251, 254}, + {255, 230, 233, 255, 231, 255, 252, 254}, + {255, 231, 234, 255, 231, 255, 252, 254}, + {255, 232, 235, 255, 232, 255, 252, 254}, + {255, 233, 236, 255, 232, 255, 252, 254}, + {255, 235, 237, 255, 233, 255, 252, 254}, + {255, 236, 238, 255, 234, 255, 252, 254}, + {255, 238, 240, 255, 235, 255, 252, 255}, + {255, 239, 241, 255, 235, 255, 252, 254}, + {255, 241, 243, 255, 236, 255, 252, 254}, + {255, 243, 245, 255, 237, 255, 252, 254}, + {255, 246, 247, 255, 239, 255, 253, 255}, + {255, 246, 247, 255, 239, 255, 253, 255}, +}; + +static const vp9_coeff_probs_model default_coef_probs_4x4[BLOCK_TYPES] = { + { // Y plane + { // Intra + { // Band 0 + { 195, 29, 183 }, { 84, 49, 136 }, { 8, 42, 71 } + }, { // Band 1 + { 31, 107, 169 }, { 35, 99, 159 }, { 17, 82, 140 }, + { 8, 66, 114 }, { 2, 44, 76 }, { 1, 19, 32 } + }, { // Band 2 + { 40, 132, 201 }, { 29, 114, 187 }, { 13, 91, 157 }, + { 7, 75, 127 }, { 3, 58, 95 }, { 1, 28, 47 } + }, { // Band 3 + { 69, 142, 221 }, { 42, 122, 201 }, { 15, 91, 159 }, + { 6, 67, 121 }, { 1, 42, 77 }, { 1, 17, 31 } + }, { // Band 4 + { 102, 148, 228 }, { 67, 117, 204 }, { 17, 82, 154 }, + { 6, 59, 114 }, { 2, 39, 75 }, { 1, 15, 29 } + }, { // Band 5 + { 156, 57, 233 }, { 119, 57, 212 }, { 58, 48, 163 }, + { 29, 40, 124 }, { 12, 30, 81 }, { 3, 12, 31 } + } + }, { // Inter + { // Band 0 + { 191, 107, 226 }, { 124, 117, 204 }, { 25, 99, 155 } + }, { // Band 1 + { 29, 148, 210 }, { 37, 126, 194 }, { 8, 93, 157 }, + { 2, 68, 118 }, { 1, 39, 69 }, { 1, 17, 33 } + }, { // Band 2 + { 41, 151, 213 }, { 27, 123, 193 }, { 3, 82, 144 }, + { 1, 58, 105 }, { 1, 32, 60 }, { 1, 13, 26 } + }, { // Band 3 + { 59, 159, 220 }, { 23, 126, 198 }, { 4, 88, 151 }, + { 1, 66, 114 }, { 1, 38, 71 }, { 1, 18, 34 } + }, { // Band 4 + { 114, 136, 232 }, { 51, 114, 207 }, { 11, 83, 155 }, + { 3, 56, 105 }, { 1, 33, 65 }, { 1, 17, 34 } + }, { // Band 5 + { 149, 65, 234 }, { 121, 57, 215 }, { 61, 49, 166 }, + { 28, 36, 114 }, { 12, 25, 76 }, { 3, 16, 42 } + } + } + }, { // UV plane + { // Intra + { // Band 0 + { 214, 49, 220 }, { 132, 63, 188 }, { 42, 65, 137 } + }, { // Band 1 + { 85, 137, 221 }, { 104, 131, 216 }, { 49, 111, 192 }, + { 21, 87, 155 }, { 2, 49, 87 }, { 1, 16, 28 } + }, { // Band 2 + { 89, 163, 230 }, { 90, 137, 220 }, { 29, 100, 183 }, + { 10, 70, 135 }, { 2, 42, 81 }, { 1, 17, 33 } + }, { // Band 3 + { 108, 167, 237 }, { 55, 133, 222 }, { 15, 97, 179 }, + { 4, 72, 135 }, { 1, 45, 85 }, { 1, 19, 38 } + }, { // Band 4 + { 124, 146, 240 }, { 66, 124, 224 }, { 17, 88, 175 }, + { 4, 58, 122 }, { 1, 36, 75 }, { 1, 18, 37 } + }, { // Band 5 + { 141, 79, 241 }, { 126, 70, 227 }, { 66, 58, 182 }, + { 30, 44, 136 }, { 12, 34, 96 }, { 2, 20, 47 } + } + }, { // Inter + { // Band 0 + { 229, 99, 249 }, { 143, 111, 235 }, { 46, 109, 192 } + }, { // Band 1 + { 82, 158, 236 }, { 94, 146, 224 }, { 25, 117, 191 }, + { 9, 87, 149 }, { 3, 56, 99 }, { 1, 33, 57 } + }, { // Band 2 + { 83, 167, 237 }, { 68, 145, 222 }, { 10, 103, 177 }, + { 2, 72, 131 }, { 1, 41, 79 }, { 1, 20, 39 } + }, { // Band 3 + { 99, 167, 239 }, { 47, 141, 224 }, { 10, 104, 178 }, + { 2, 73, 133 }, { 1, 44, 85 }, { 1, 22, 47 } + }, { // Band 4 + { 127, 145, 243 }, { 71, 129, 228 }, { 17, 93, 177 }, + { 3, 61, 124 }, { 1, 41, 84 }, { 1, 21, 52 } + }, { // Band 5 + { 157, 78, 244 }, { 140, 72, 231 }, { 69, 58, 184 }, + { 31, 44, 137 }, { 14, 38, 105 }, { 8, 23, 61 } + } + } + } +}; + +static const vp9_coeff_probs_model default_coef_probs_8x8[BLOCK_TYPES] = { + { // Y plane + { // Intra + { // Band 0 + { 125, 34, 187 }, { 52, 41, 133 }, { 6, 31, 56 } + }, { // Band 1 + { 37, 109, 153 }, { 51, 102, 147 }, { 23, 87, 128 }, + { 8, 67, 101 }, { 1, 41, 63 }, { 1, 19, 29 } + }, { // Band 2 + { 31, 154, 185 }, { 17, 127, 175 }, { 6, 96, 145 }, + { 2, 73, 114 }, { 1, 51, 82 }, { 1, 28, 45 } + }, { // Band 3 + { 23, 163, 200 }, { 10, 131, 185 }, { 2, 93, 148 }, + { 1, 67, 111 }, { 1, 41, 69 }, { 1, 14, 24 } + }, { // Band 4 + { 29, 176, 217 }, { 12, 145, 201 }, { 3, 101, 156 }, + { 1, 69, 111 }, { 1, 39, 63 }, { 1, 14, 23 } + }, { // Band 5 + { 57, 192, 233 }, { 25, 154, 215 }, { 6, 109, 167 }, + { 3, 78, 118 }, { 1, 48, 69 }, { 1, 21, 29 } + } + }, { // Inter + { // Band 0 + { 202, 105, 245 }, { 108, 106, 216 }, { 18, 90, 144 } + }, { // Band 1 + { 33, 172, 219 }, { 64, 149, 206 }, { 14, 117, 177 }, + { 5, 90, 141 }, { 2, 61, 95 }, { 1, 37, 57 } + }, { // Band 2 + { 33, 179, 220 }, { 11, 140, 198 }, { 1, 89, 148 }, + { 1, 60, 104 }, { 1, 33, 57 }, { 1, 12, 21 } + }, { // Band 3 + { 30, 181, 221 }, { 8, 141, 198 }, { 1, 87, 145 }, + { 1, 58, 100 }, { 1, 31, 55 }, { 1, 12, 20 } + }, { // Band 4 + { 32, 186, 224 }, { 7, 142, 198 }, { 1, 86, 143 }, + { 1, 58, 100 }, { 1, 31, 55 }, { 1, 12, 22 } + }, { // Band 5 + { 57, 192, 227 }, { 20, 143, 204 }, { 3, 96, 154 }, + { 1, 68, 112 }, { 1, 42, 69 }, { 1, 19, 32 } + } + } + }, { // UV plane + { // Intra + { // Band 0 + { 212, 35, 215 }, { 113, 47, 169 }, { 29, 48, 105 } + }, { // Band 1 + { 74, 129, 203 }, { 106, 120, 203 }, { 49, 107, 178 }, + { 19, 84, 144 }, { 4, 50, 84 }, { 1, 15, 25 } + }, { // Band 2 + { 71, 172, 217 }, { 44, 141, 209 }, { 15, 102, 173 }, + { 6, 76, 133 }, { 2, 51, 89 }, { 1, 24, 42 } + }, { // Band 3 + { 64, 185, 231 }, { 31, 148, 216 }, { 8, 103, 175 }, + { 3, 74, 131 }, { 1, 46, 81 }, { 1, 18, 30 } + }, { // Band 4 + { 65, 196, 235 }, { 25, 157, 221 }, { 5, 105, 174 }, + { 1, 67, 120 }, { 1, 38, 69 }, { 1, 15, 30 } + }, { // Band 5 + { 65, 204, 238 }, { 30, 156, 224 }, { 7, 107, 177 }, + { 2, 70, 124 }, { 1, 42, 73 }, { 1, 18, 34 } + } + }, { // Inter + { // Band 0 + { 225, 86, 251 }, { 144, 104, 235 }, { 42, 99, 181 } + }, { // Band 1 + { 85, 175, 239 }, { 112, 165, 229 }, { 29, 136, 200 }, + { 12, 103, 162 }, { 6, 77, 123 }, { 2, 53, 84 } + }, { // Band 2 + { 75, 183, 239 }, { 30, 155, 221 }, { 3, 106, 171 }, + { 1, 74, 128 }, { 1, 44, 76 }, { 1, 17, 28 } + }, { // Band 3 + { 73, 185, 240 }, { 27, 159, 222 }, { 2, 107, 172 }, + { 1, 75, 127 }, { 1, 42, 73 }, { 1, 17, 29 } + }, { // Band 4 + { 62, 190, 238 }, { 21, 159, 222 }, { 2, 107, 172 }, + { 1, 72, 122 }, { 1, 40, 71 }, { 1, 18, 32 } + }, { // Band 5 + { 61, 199, 240 }, { 27, 161, 226 }, { 4, 113, 180 }, + { 1, 76, 129 }, { 1, 46, 80 }, { 1, 23, 41 } + } + } + } +}; + +static const vp9_coeff_probs_model default_coef_probs_16x16[BLOCK_TYPES] = { + { // Y plane + { // Intra + { // Band 0 + { 7, 27, 153 }, { 5, 30, 95 }, { 1, 16, 30 } + }, { // Band 1 + { 50, 75, 127 }, { 57, 75, 124 }, { 27, 67, 108 }, + { 10, 54, 86 }, { 1, 33, 52 }, { 1, 12, 18 } + }, { // Band 2 + { 43, 125, 151 }, { 26, 108, 148 }, { 7, 83, 122 }, + { 2, 59, 89 }, { 1, 38, 60 }, { 1, 17, 27 } + }, { // Band 3 + { 23, 144, 163 }, { 13, 112, 154 }, { 2, 75, 117 }, + { 1, 50, 81 }, { 1, 31, 51 }, { 1, 14, 23 } + }, { // Band 4 + { 18, 162, 185 }, { 6, 123, 171 }, { 1, 78, 125 }, + { 1, 51, 86 }, { 1, 31, 54 }, { 1, 14, 23 } + }, { // Band 5 + { 15, 199, 227 }, { 3, 150, 204 }, { 1, 91, 146 }, + { 1, 55, 95 }, { 1, 30, 53 }, { 1, 11, 20 } + } + }, { // Inter + { // Band 0 + { 19, 55, 240 }, { 19, 59, 196 }, { 3, 52, 105 } + }, { // Band 1 + { 41, 166, 207 }, { 104, 153, 199 }, { 31, 123, 181 }, + { 14, 101, 152 }, { 5, 72, 106 }, { 1, 36, 52 } + }, { // Band 2 + { 35, 176, 211 }, { 12, 131, 190 }, { 2, 88, 144 }, + { 1, 60, 101 }, { 1, 36, 60 }, { 1, 16, 28 } + }, { // Band 3 + { 28, 183, 213 }, { 8, 134, 191 }, { 1, 86, 142 }, + { 1, 56, 96 }, { 1, 30, 53 }, { 1, 12, 20 } + }, { // Band 4 + { 20, 190, 215 }, { 4, 135, 192 }, { 1, 84, 139 }, + { 1, 53, 91 }, { 1, 28, 49 }, { 1, 11, 20 } + }, { // Band 5 + { 13, 196, 216 }, { 2, 137, 192 }, { 1, 86, 143 }, + { 1, 57, 99 }, { 1, 32, 56 }, { 1, 13, 24 } + } + } + }, { // UV plane + { // Intra + { // Band 0 + { 211, 29, 217 }, { 96, 47, 156 }, { 22, 43, 87 } + }, { // Band 1 + { 78, 120, 193 }, { 111, 116, 186 }, { 46, 102, 164 }, + { 15, 80, 128 }, { 2, 49, 76 }, { 1, 18, 28 } + }, { // Band 2 + { 71, 161, 203 }, { 42, 132, 192 }, { 10, 98, 150 }, + { 3, 69, 109 }, { 1, 44, 70 }, { 1, 18, 29 } + }, { // Band 3 + { 57, 186, 211 }, { 30, 140, 196 }, { 4, 93, 146 }, + { 1, 62, 102 }, { 1, 38, 65 }, { 1, 16, 27 } + }, { // Band 4 + { 47, 199, 217 }, { 14, 145, 196 }, { 1, 88, 142 }, + { 1, 57, 98 }, { 1, 36, 62 }, { 1, 15, 26 } + }, { // Band 5 + { 26, 219, 229 }, { 5, 155, 207 }, { 1, 94, 151 }, + { 1, 60, 104 }, { 1, 36, 62 }, { 1, 16, 28 } + } + }, { // Inter + { // Band 0 + { 233, 29, 248 }, { 146, 47, 220 }, { 43, 52, 140 } + }, { // Band 1 + { 100, 163, 232 }, { 179, 161, 222 }, { 63, 142, 204 }, + { 37, 113, 174 }, { 26, 89, 137 }, { 18, 68, 97 } + }, { // Band 2 + { 85, 181, 230 }, { 32, 146, 209 }, { 7, 100, 164 }, + { 3, 71, 121 }, { 1, 45, 77 }, { 1, 18, 30 } + }, { // Band 3 + { 65, 187, 230 }, { 20, 148, 207 }, { 2, 97, 159 }, + { 1, 68, 116 }, { 1, 40, 70 }, { 1, 14, 29 } + }, { // Band 4 + { 40, 194, 227 }, { 8, 147, 204 }, { 1, 94, 155 }, + { 1, 65, 112 }, { 1, 39, 66 }, { 1, 14, 26 } + }, { // Band 5 + { 16, 208, 228 }, { 3, 151, 207 }, { 1, 98, 160 }, + { 1, 67, 117 }, { 1, 41, 74 }, { 1, 17, 31 } + } + } + } +}; + +static const vp9_coeff_probs_model default_coef_probs_32x32[BLOCK_TYPES] = { + { // Y plane + { // Intra + { // Band 0 + { 17, 38, 140 }, { 7, 34, 80 }, { 1, 17, 29 } + }, { // Band 1 + { 37, 75, 128 }, { 41, 76, 128 }, { 26, 66, 116 }, + { 12, 52, 94 }, { 2, 32, 55 }, { 1, 10, 16 } + }, { // Band 2 + { 50, 127, 154 }, { 37, 109, 152 }, { 16, 82, 121 }, + { 5, 59, 85 }, { 1, 35, 54 }, { 1, 13, 20 } + }, { // Band 3 + { 40, 142, 167 }, { 17, 110, 157 }, { 2, 71, 112 }, + { 1, 44, 72 }, { 1, 27, 45 }, { 1, 11, 17 } + }, { // Band 4 + { 30, 175, 188 }, { 9, 124, 169 }, { 1, 74, 116 }, + { 1, 48, 78 }, { 1, 30, 49 }, { 1, 11, 18 } + }, { // Band 5 + { 10, 222, 223 }, { 2, 150, 194 }, { 1, 83, 128 }, + { 1, 48, 79 }, { 1, 27, 45 }, { 1, 11, 17 } + } + }, { // Inter + { // Band 0 + { 36, 41, 235 }, { 29, 36, 193 }, { 10, 27, 111 } + }, { // Band 1 + { 85, 165, 222 }, { 177, 162, 215 }, { 110, 135, 195 }, + { 57, 113, 168 }, { 23, 83, 120 }, { 10, 49, 61 } + }, { // Band 2 + { 85, 190, 223 }, { 36, 139, 200 }, { 5, 90, 146 }, + { 1, 60, 103 }, { 1, 38, 65 }, { 1, 18, 30 } + }, { // Band 3 + { 72, 202, 223 }, { 23, 141, 199 }, { 2, 86, 140 }, + { 1, 56, 97 }, { 1, 36, 61 }, { 1, 16, 27 } + }, { // Band 4 + { 55, 218, 225 }, { 13, 145, 200 }, { 1, 86, 141 }, + { 1, 57, 99 }, { 1, 35, 61 }, { 1, 13, 22 } + }, { // Band 5 + { 15, 235, 212 }, { 1, 132, 184 }, { 1, 84, 139 }, + { 1, 57, 97 }, { 1, 34, 56 }, { 1, 14, 23 } + } + } + }, { // UV plane + { // Intra + { // Band 0 + { 181, 21, 201 }, { 61, 37, 123 }, { 10, 38, 71 } + }, { // Band 1 + { 47, 106, 172 }, { 95, 104, 173 }, { 42, 93, 159 }, + { 18, 77, 131 }, { 4, 50, 81 }, { 1, 17, 23 } + }, { // Band 2 + { 62, 147, 199 }, { 44, 130, 189 }, { 28, 102, 154 }, + { 18, 75, 115 }, { 2, 44, 65 }, { 1, 12, 19 } + }, { // Band 3 + { 55, 153, 210 }, { 24, 130, 194 }, { 3, 93, 146 }, + { 1, 61, 97 }, { 1, 31, 50 }, { 1, 10, 16 } + }, { // Band 4 + { 49, 186, 223 }, { 17, 148, 204 }, { 1, 96, 142 }, + { 1, 53, 83 }, { 1, 26, 44 }, { 1, 11, 17 } + }, { // Band 5 + { 13, 217, 212 }, { 2, 136, 180 }, { 1, 78, 124 }, + { 1, 50, 83 }, { 1, 29, 49 }, { 1, 14, 23 } + } + }, { // Inter + { // Band 0 + { 197, 13, 247 }, { 82, 17, 222 }, { 25, 17, 162 } + }, { // Band 1 + { 126, 186, 247 }, { 234, 191, 243 }, { 176, 177, 234 }, + { 104, 158, 220 }, { 66, 128, 186 }, { 55, 90, 137 } + }, { // Band 2 + { 111, 197, 242 }, { 46, 158, 219 }, { 9, 104, 171 }, + { 2, 65, 125 }, { 1, 44, 80 }, { 1, 17, 91 } + }, { // Band 3 + { 104, 208, 245 }, { 39, 168, 224 }, { 3, 109, 162 }, + { 1, 79, 124 }, { 1, 50, 102 }, { 1, 43, 102 } + }, { // Band 4 + { 84, 220, 246 }, { 31, 177, 231 }, { 2, 115, 180 }, + { 1, 79, 134 }, { 1, 55, 77 }, { 1, 60, 79 } + }, { // Band 5 + { 43, 243, 240 }, { 8, 180, 217 }, { 1, 115, 166 }, + { 1, 84, 121 }, { 1, 51, 67 }, { 1, 16, 6 } + } + } + } }; static void extend_to_full_distribution(vp9_prob *probs, vp9_prob p) { @@ -403,8 +738,6 @@ void vp9_model_to_full_probs(const vp9_prob *model, vp9_prob *full) { extend_to_full_distribution(&full[UNCONSTRAINED_NODES], model[PIVOT_NODE]); } -#include "vp9/common/vp9_default_coef_probs.h" - void vp9_default_coef_probs(VP9_COMMON *cm) { vp9_copy(cm->fc.coef_probs[TX_4X4], default_coef_probs_4x4); vp9_copy(cm->fc.coef_probs[TX_8X8], default_coef_probs_8x8); diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index 941b251c3..bd037d793 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -21,30 +21,27 @@ #define DIFF_UPDATE_PROB 252 -/* Coefficient token alphabet */ - -#define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */ -#define ONE_TOKEN 1 /* 1 Extra Bits 0+1 */ -#define TWO_TOKEN 2 /* 2 Extra Bits 0+1 */ -#define THREE_TOKEN 3 /* 3 Extra Bits 0+1 */ -#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */ -#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */ -#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */ -#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */ -#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */ -#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */ -#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 14+1 */ -#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */ -#define MAX_ENTROPY_TOKENS 12 -#define ENTROPY_NODES 11 -#define EOSB_TOKEN 127 /* Not signalled, encoder only */ - -#define INTER_MODE_CONTEXTS 7 - -extern DECLARE_ALIGNED(16, const uint8_t, - vp9_pt_energy_class[MAX_ENTROPY_TOKENS]); - -#define DCT_EOB_MODEL_TOKEN 3 /* EOB Extra Bits 0+0 */ +// Coefficient token alphabet +#define ZERO_TOKEN 0 // 0 Extra Bits 0+0 +#define ONE_TOKEN 1 // 1 Extra Bits 0+1 +#define TWO_TOKEN 2 // 2 Extra Bits 0+1 +#define THREE_TOKEN 3 // 3 Extra Bits 0+1 +#define FOUR_TOKEN 4 // 4 Extra Bits 0+1 +#define CATEGORY1_TOKEN 5 // 5-6 Extra Bits 1+1 +#define CATEGORY2_TOKEN 6 // 7-10 Extra Bits 2+1 +#define CATEGORY3_TOKEN 7 // 11-18 Extra Bits 3+1 +#define CATEGORY4_TOKEN 8 // 19-34 Extra Bits 4+1 +#define CATEGORY5_TOKEN 9 // 35-66 Extra Bits 5+1 +#define CATEGORY6_TOKEN 10 // 67+ Extra Bits 14+1 +#define EOB_TOKEN 11 // EOB Extra Bits 0+0 + +#define ENTROPY_TOKENS 12 + +#define ENTROPY_NODES 11 + +extern DECLARE_ALIGNED(16, const uint8_t, vp9_pt_energy_class[ENTROPY_TOKENS]); + +#define EOB_MODEL_TOKEN 3 extern const vp9_tree_index vp9_coefmodel_tree[]; typedef struct { @@ -55,7 +52,7 @@ typedef struct { } vp9_extra_bit; // indexed by token value -extern const vp9_extra_bit vp9_extra_bits[MAX_ENTROPY_TOKENS]; +extern const vp9_extra_bit vp9_extra_bits[ENTROPY_TOKENS]; #define MAX_PROB 255 #define DCT_MAX_VALUE 16384 @@ -90,7 +87,7 @@ extern const vp9_extra_bit vp9_extra_bits[MAX_ENTROPY_TOKENS]; // #define ENTROPY_STATS typedef unsigned int vp9_coeff_count[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] - [MAX_ENTROPY_TOKENS]; + [ENTROPY_TOKENS]; typedef unsigned int vp9_coeff_stats[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; @@ -130,14 +127,14 @@ static const uint8_t *get_band_translate(TX_SIZE tx_size) { // 1, 3, 5, 7, ..., 253, 255 // In between probabilities are interpolated linearly -#define COEFPROB_MODELS 128 +#define COEFF_PROB_MODELS 256 #define UNCONSTRAINED_NODES 3 #define PIVOT_NODE 2 // which node is pivot #define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES) -extern const vp9_prob vp9_pareto8_full[256][MODEL_NODES]; +extern const vp9_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES]; typedef vp9_prob vp9_coeff_probs_model[REF_TYPES][COEF_BANDS] [PREV_COEF_CONTEXTS] diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index ff2bc45e4..40d8ffd3d 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -353,10 +353,17 @@ static void filter_selectively_vert_row2(PLANE_TYPE plane_type, // TODO(yunqingwang): count in loopfilter functions should be removed. if (mask & 1) { - if (mask_16x16_0 & 1) { - // if (mask_16x16_0 & 1) is 1, then (mask_16x16_1 & 1) is 1. - vp9_mb_lpf_vertical_edge_w_16(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr); + if ((mask_16x16_0 | mask_16x16_1) & 1) { + if ((mask_16x16_0 & mask_16x16_1) & 1) { + vp9_mb_lpf_vertical_edge_w_16(s, pitch, lfi0->mblim, lfi0->lim, + lfi0->hev_thr); + } else if (mask_16x16_0 & 1) { + vp9_mb_lpf_vertical_edge_w(s, pitch, lfi0->mblim, lfi0->lim, + lfi0->hev_thr); + } else { + vp9_mb_lpf_vertical_edge_w(s + 8 *pitch, pitch, lfi1->mblim, + lfi1->lim, lfi1->hev_thr); + } } if ((mask_8x8_0 | mask_8x8_1) & 1) { @@ -432,10 +439,14 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, count = 1; if (mask & 1) { if (mask_16x16 & 1) { - // If (mask_16x16 & 1) is 1, then (mask_16x16 & 3) is 3. - vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 2); - count = 2; + if ((mask_16x16 & 3) == 3) { + vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 2); + count = 2; + } else { + vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 1); + } } else if (mask_8x8 & 1) { if ((mask_8x8 & 3) == 3) { // Next block's thresholds diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index 751accf02..2c410669a 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -238,8 +238,10 @@ static int get_free_fb(VP9_COMMON *cm) { } static void ref_cnt_fb(int *buf, int *idx, int new_idx) { - if (buf[*idx] > 0) - buf[*idx]--; + const int ref_index = *idx; + + if (ref_index >= 0 && buf[ref_index] > 0) + buf[ref_index]--; *idx = new_idx; diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c index 22b66b57a..a367f0b5d 100644 --- a/vp9/common/vp9_pred_common.c +++ b/vp9/common/vp9_pred_common.c @@ -369,31 +369,22 @@ unsigned char vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) { // left of the entries corresponding to real blocks. // The prediction flags in these dummy entries are initialized to 0. unsigned char vp9_get_pred_context_tx_size(const MACROBLOCKD *xd) { - const MODE_INFO *const above_mi = get_above_mi(xd); - const MODE_INFO *const left_mi = get_left_mi(xd); - const MB_MODE_INFO *const above_mbmi = get_above_mbmi(above_mi); - const MB_MODE_INFO *const left_mbmi = get_left_mbmi(left_mi); - const int above_in_image = above_mi != NULL; - const int left_in_image = left_mi != NULL; const int max_tx_size = max_txsize_lookup[xd->mi_8x8[0]->mbmi.sb_type]; - int above_context = max_tx_size; - int left_context = max_tx_size; - - if (above_in_image) - above_context = above_mbmi->skip_coeff ? max_tx_size - : above_mbmi->tx_size; - - if (left_in_image) - left_context = left_mbmi->skip_coeff ? max_tx_size - : left_mbmi->tx_size; - - if (!left_in_image) - left_context = above_context; + const MB_MODE_INFO *const above_mbmi = get_above_mbmi(get_above_mi(xd)); + const MB_MODE_INFO *const left_mbmi = get_left_mbmi(get_left_mi(xd)); + const int has_above = above_mbmi != NULL; + const int has_left = left_mbmi != NULL; + int above_ctx = (has_above && !above_mbmi->skip_coeff) ? above_mbmi->tx_size + : max_tx_size; + int left_ctx = (has_left && !left_mbmi->skip_coeff) ? left_mbmi->tx_size + : max_tx_size; + if (!has_left) + left_ctx = above_ctx; - if (!above_in_image) - above_context = left_context; + if (!has_above) + above_ctx = left_ctx; - return above_context + left_context > max_tx_size; + return (above_ctx + left_ctx) > max_tx_size; } int vp9_get_segment_id(VP9_COMMON *cm, const uint8_t *segment_ids, diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index 09a4fc826..6e54cf3fd 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -102,24 +102,14 @@ MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv, // calculate the subsampled BLOCK_SIZE, but that type isn't defined for // sizes smaller than 16x16 yet. static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, - BLOCK_SIZE bsize, int pred_w, int pred_h, + int bw, int bh, + int x, int y, int w, int h, int mi_x, int mi_y) { struct macroblockd_plane *const pd = &xd->plane[plane]; - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); - const int bwl = b_width_log2(plane_bsize); - const int bw = 4 << bwl; - const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize]; - const int x = 4 * (block & ((1 << bwl) - 1)); - const int y = 4 * (block >> bwl); const MODE_INFO *mi = xd->mi_8x8[0]; const int is_compound = has_second_ref(&mi->mbmi); int ref; - assert(x < bw); - assert(y < bh); - assert(mi->mbmi.sb_type < BLOCK_8X8 || 4 << pred_w == bw); - assert(mi->mbmi.sb_type < BLOCK_8X8 || 4 << pred_h == bh); - for (ref = 0; ref < 1 + is_compound; ++ref) { struct scale_factors *const scale = &xd->scale_factor[ref]; struct buf_2d *const pre_buf = &pd->pre[ref]; @@ -162,9 +152,7 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, } inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, - &scaled_mv, scale, - 4 << pred_w, 4 << pred_h, ref, - &xd->subpix, xs, ys); + &scaled_mv, scale, w, h, ref, &xd->subpix, xs, ys); } } @@ -172,20 +160,26 @@ static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize, int mi_row, int mi_col, int plane_from, int plane_to) { int plane; + const int mi_x = mi_col * MI_SIZE; + const int mi_y = mi_row * MI_SIZE; for (plane = plane_from; plane <= plane_to; ++plane) { - const int mi_x = mi_col * MI_SIZE; - const int mi_y = mi_row * MI_SIZE; - const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x; - const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, + &xd->plane[plane]); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int bw = 4 * num_4x4_w; + const int bh = 4 * num_4x4_h; if (xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8) { int i = 0, x, y; assert(bsize == BLOCK_8X8); - for (y = 0; y < 1 << bhl; ++y) - for (x = 0; x < 1 << bwl; ++x) - build_inter_predictors(xd, plane, i++, bsize, 0, 0, mi_x, mi_y); + for (y = 0; y < num_4x4_h; ++y) + for (x = 0; x < num_4x4_w; ++x) + build_inter_predictors(xd, plane, i++, bw, bh, + 4 * x, 4 * y, 4, 4, mi_x, mi_y); } else { - build_inter_predictors(xd, plane, 0, bsize, bwl, bhl, mi_x, mi_y); + build_inter_predictors(xd, plane, 0, bw, bh, + 0, 0, bw, bh, mi_x, mi_y); } } } @@ -208,24 +202,14 @@ void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, // TODO(jingning): This function serves as a placeholder for decoder prediction // using on demand border extension. It should be moved to /decoder/ directory. static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, - BLOCK_SIZE bsize, int pred_w, int pred_h, + int bw, int bh, + int x, int y, int w, int h, int mi_x, int mi_y) { struct macroblockd_plane *const pd = &xd->plane[plane]; - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); - const int bwl = b_width_log2(plane_bsize); - const int bw = 4 << bwl; - const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize]; - const int x = 4 * (block & ((1 << bwl) - 1)); - const int y = 4 * (block >> bwl); const MODE_INFO *mi = xd->mi_8x8[0]; const int is_compound = has_second_ref(&mi->mbmi); int ref; - assert(x < bw); - assert(y < bh); - assert(mi->mbmi.sb_type < BLOCK_8X8 || 4 << pred_w == bw); - assert(mi->mbmi.sb_type < BLOCK_8X8 || 4 << pred_h == bh); - for (ref = 0; ref < 1 + is_compound; ++ref) { struct scale_factors *const scale = &xd->scale_factor[ref]; struct buf_2d *const pre_buf = &pd->pre[ref]; @@ -268,29 +252,33 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, } inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, - &scaled_mv, scale, - 4 << pred_w, 4 << pred_h, ref, - &xd->subpix, xs, ys); + &scaled_mv, scale, w, h, ref, &xd->subpix, xs, ys); } } void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize) { int plane; + const int mi_x = mi_col * MI_SIZE; + const int mi_y = mi_row * MI_SIZE; for (plane = 0; plane < MAX_MB_PLANE; ++plane) { - const int mi_x = mi_col * MI_SIZE; - const int mi_y = mi_row * MI_SIZE; - const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x; - const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, + &xd->plane[plane]); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int bw = 4 * num_4x4_w; + const int bh = 4 * num_4x4_h; if (xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8) { int i = 0, x, y; assert(bsize == BLOCK_8X8); - for (y = 0; y < 1 << bhl; ++y) - for (x = 0; x < 1 << bwl; ++x) - dec_build_inter_predictors(xd, plane, i++, bsize, 0, 0, mi_x, mi_y); + for (y = 0; y < num_4x4_h; ++y) + for (x = 0; x < num_4x4_w; ++x) + dec_build_inter_predictors(xd, plane, i++, bw, bh, + 4 * x, 4 * y, 4, 4, mi_x, mi_y); } else { - dec_build_inter_predictors(xd, plane, 0, bsize, bwl, bhl, mi_x, mi_y); + dec_build_inter_predictors(xd, plane, 0, bw, bh, + 0, 0, bw, bh, mi_x, mi_y); } } } diff --git a/vp9/common/vp9_treecoder.c b/vp9/common/vp9_treecoder.c index e2a5b9faa..dca3076f3 100644 --- a/vp9/common/vp9_treecoder.c +++ b/vp9/common/vp9_treecoder.c @@ -14,27 +14,6 @@ #include "./vpx_config.h" #include "vp9/common/vp9_treecoder.h" -static void tree2tok(struct vp9_token *const p, vp9_tree t, - int i, int v, int l) { - v += v; - ++l; - - do { - const vp9_tree_index j = t[i++]; - - if (j <= 0) { - p[-j].value = v; - p[-j].len = l; - } else { - tree2tok(p, t, j, v, l); - } - } while (++v & 1); -} - -void vp9_tokens_from_tree(struct vp9_token *p, vp9_tree t) { - tree2tok(p, t, 0, 0, 0); -} - static unsigned int convert_distribution(unsigned int i, vp9_tree tree, unsigned int branch_ct[][2], const unsigned int num_events[]) { diff --git a/vp9/common/vp9_treecoder.h b/vp9/common/vp9_treecoder.h index a79b1564a..bbe4e8f6a 100644 --- a/vp9/common/vp9_treecoder.h +++ b/vp9/common/vp9_treecoder.h @@ -34,15 +34,6 @@ typedef int8_t vp9_tree_index; typedef const vp9_tree_index vp9_tree[]; -struct vp9_token { - int value; - int len; -}; - -/* Construct encoding array from tree. */ - -void vp9_tokens_from_tree(struct vp9_token*, vp9_tree); - /* Convert array of token occurrence counts into a table of probabilities for the associated binary encoding tree. Also writes count of branches taken for each node on the tree; this facilitiates decisions as to diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c index 8fdf19af9..947c0ba44 100644 --- a/vp9/common/x86/vp9_idct_intrin_sse2.c +++ b/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -650,6 +650,25 @@ static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) { res[7] = _mm_unpackhi_epi64(tr1_6, tr1_7); } +static INLINE void array_transpose_4X8(__m128i *in, __m128i * out) { + const __m128i zero = _mm_setzero_si128(); + const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]); + const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]); + const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]); + const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]); + + const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); + const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); + const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); + const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); + + out[0] = _mm_unpacklo_epi64(tr1_0, tr1_4); + out[1] = _mm_unpackhi_epi64(tr1_0, tr1_4); + out[2] = _mm_unpacklo_epi64(tr1_2, tr1_6); + out[3] = _mm_unpackhi_epi64(tr1_2, tr1_6); + out[4] = out[5] = out[6] = out[7] = zero; +} + static void idct8_1d_sse2(__m128i *in) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); @@ -1139,14 +1158,14 @@ void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) { #define IDCT16_1D \ /* Stage2 */ \ { \ - const __m128i lo_1_15 = _mm_unpacklo_epi16(in1, in15); \ - const __m128i hi_1_15 = _mm_unpackhi_epi16(in1, in15); \ - const __m128i lo_9_7 = _mm_unpacklo_epi16(in9, in7); \ - const __m128i hi_9_7 = _mm_unpackhi_epi16(in9, in7); \ - const __m128i lo_5_11 = _mm_unpacklo_epi16(in5, in11); \ - const __m128i hi_5_11 = _mm_unpackhi_epi16(in5, in11); \ - const __m128i lo_13_3 = _mm_unpacklo_epi16(in13, in3); \ - const __m128i hi_13_3 = _mm_unpackhi_epi16(in13, in3); \ + const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], in[15]); \ + const __m128i hi_1_15 = _mm_unpackhi_epi16(in[1], in[15]); \ + const __m128i lo_9_7 = _mm_unpacklo_epi16(in[9], in[7]); \ + const __m128i hi_9_7 = _mm_unpackhi_epi16(in[9], in[7]); \ + const __m128i lo_5_11 = _mm_unpacklo_epi16(in[5], in[11]); \ + const __m128i hi_5_11 = _mm_unpackhi_epi16(in[5], in[11]); \ + const __m128i lo_13_3 = _mm_unpacklo_epi16(in[13], in[3]); \ + const __m128i hi_13_3 = _mm_unpackhi_epi16(in[13], in[3]); \ \ MULTIPLICATION_AND_ADD(lo_1_15, hi_1_15, lo_9_7, hi_9_7, \ stg2_0, stg2_1, stg2_2, stg2_3, \ @@ -1159,10 +1178,10 @@ void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) { \ /* Stage3 */ \ { \ - const __m128i lo_2_14 = _mm_unpacklo_epi16(in2, in14); \ - const __m128i hi_2_14 = _mm_unpackhi_epi16(in2, in14); \ - const __m128i lo_10_6 = _mm_unpacklo_epi16(in10, in6); \ - const __m128i hi_10_6 = _mm_unpackhi_epi16(in10, in6); \ + const __m128i lo_2_14 = _mm_unpacklo_epi16(in[2], in[14]); \ + const __m128i hi_2_14 = _mm_unpackhi_epi16(in[2], in[14]); \ + const __m128i lo_10_6 = _mm_unpacklo_epi16(in[10], in[6]); \ + const __m128i hi_10_6 = _mm_unpackhi_epi16(in[10], in[6]); \ \ MULTIPLICATION_AND_ADD(lo_2_14, hi_2_14, lo_10_6, hi_10_6, \ stg3_0, stg3_1, stg3_2, stg3_3, \ @@ -1181,10 +1200,10 @@ void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) { \ /* Stage4 */ \ { \ - const __m128i lo_0_8 = _mm_unpacklo_epi16(in0, in8); \ - const __m128i hi_0_8 = _mm_unpackhi_epi16(in0, in8); \ - const __m128i lo_4_12 = _mm_unpacklo_epi16(in4, in12); \ - const __m128i hi_4_12 = _mm_unpackhi_epi16(in4, in12); \ + const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], in[8]); \ + const __m128i hi_0_8 = _mm_unpackhi_epi16(in[0], in[8]); \ + const __m128i lo_4_12 = _mm_unpacklo_epi16(in[4], in[12]); \ + const __m128i hi_4_12 = _mm_unpackhi_epi16(in[4], in[12]); \ \ const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); \ const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); \ @@ -1296,16 +1315,7 @@ void vp9_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest, const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64); - __m128i in0 = zero, in1 = zero, in2 = zero, in3 = zero, in4 = zero, - in5 = zero, in6 = zero, in7 = zero, in8 = zero, in9 = zero, - in10 = zero, in11 = zero, in12 = zero, in13 = zero, - in14 = zero, in15 = zero; - __m128i l0 = zero, l1 = zero, l2 = zero, l3 = zero, l4 = zero, l5 = zero, - l6 = zero, l7 = zero, l8 = zero, l9 = zero, l10 = zero, l11 = zero, - l12 = zero, l13 = zero, l14 = zero, l15 = zero; - __m128i r0 = zero, r1 = zero, r2 = zero, r3 = zero, r4 = zero, r5 = zero, - r6 = zero, r7 = zero, r8 = zero, r9 = zero, r10 = zero, r11 = zero, - r12 = zero, r13 = zero, r14 = zero, r15 = zero; + __m128i in[16], l[16], r[16], *curr1; __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7, stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, stp1_8_0, stp1_12_0; @@ -1314,162 +1324,132 @@ void vp9_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest, __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int i; - // We work on a 8x16 block each time, and loop 4 times for 2-D 16x16 idct. - for (i = 0; i < 4; i++) { - // 1-D idct - if (i < 2) { - if (i == 1) input += 128; + curr1 = l; + for (i = 0; i < 2; i++) { + // 1-D idct // Load input data. - in0 = _mm_load_si128((const __m128i *)input); - in8 = _mm_load_si128((const __m128i *)(input + 8 * 1)); - in1 = _mm_load_si128((const __m128i *)(input + 8 * 2)); - in9 = _mm_load_si128((const __m128i *)(input + 8 * 3)); - in2 = _mm_load_si128((const __m128i *)(input + 8 * 4)); - in10 = _mm_load_si128((const __m128i *)(input + 8 * 5)); - in3 = _mm_load_si128((const __m128i *)(input + 8 * 6)); - in11 = _mm_load_si128((const __m128i *)(input + 8 * 7)); - in4 = _mm_load_si128((const __m128i *)(input + 8 * 8)); - in12 = _mm_load_si128((const __m128i *)(input + 8 * 9)); - in5 = _mm_load_si128((const __m128i *)(input + 8 * 10)); - in13 = _mm_load_si128((const __m128i *)(input + 8 * 11)); - in6 = _mm_load_si128((const __m128i *)(input + 8 * 12)); - in14 = _mm_load_si128((const __m128i *)(input + 8 * 13)); - in7 = _mm_load_si128((const __m128i *)(input + 8 * 14)); - in15 = _mm_load_si128((const __m128i *)(input + 8 * 15)); - - TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, - in4, in5, in6, in7); - TRANSPOSE_8X8(in8, in9, in10, in11, in12, in13, in14, in15, in8, in9, - in10, in11, in12, in13, in14, in15); - } + in[0] = _mm_load_si128((const __m128i *)input); + in[8] = _mm_load_si128((const __m128i *)(input + 8 * 1)); + in[1] = _mm_load_si128((const __m128i *)(input + 8 * 2)); + in[9] = _mm_load_si128((const __m128i *)(input + 8 * 3)); + in[2] = _mm_load_si128((const __m128i *)(input + 8 * 4)); + in[10] = _mm_load_si128((const __m128i *)(input + 8 * 5)); + in[3] = _mm_load_si128((const __m128i *)(input + 8 * 6)); + in[11] = _mm_load_si128((const __m128i *)(input + 8 * 7)); + in[4] = _mm_load_si128((const __m128i *)(input + 8 * 8)); + in[12] = _mm_load_si128((const __m128i *)(input + 8 * 9)); + in[5] = _mm_load_si128((const __m128i *)(input + 8 * 10)); + in[13] = _mm_load_si128((const __m128i *)(input + 8 * 11)); + in[6] = _mm_load_si128((const __m128i *)(input + 8 * 12)); + in[14] = _mm_load_si128((const __m128i *)(input + 8 * 13)); + in[7] = _mm_load_si128((const __m128i *)(input + 8 * 14)); + in[15] = _mm_load_si128((const __m128i *)(input + 8 * 15)); - if (i == 2) { - TRANSPOSE_8X8(l0, l1, l2, l3, l4, l5, l6, l7, in0, in1, in2, in3, in4, - in5, in6, in7); - TRANSPOSE_8X8(r0, r1, r2, r3, r4, r5, r6, r7, in8, in9, in10, in11, in12, - in13, in14, in15); - } + array_transpose_8x8(in, in); + array_transpose_8x8(in+8, in+8); - if (i == 3) { - TRANSPOSE_8X8(l8, l9, l10, l11, l12, l13, l14, l15, in0, in1, in2, in3, - in4, in5, in6, in7); - TRANSPOSE_8X8(r8, r9, r10, r11, r12, r13, r14, r15, in8, in9, in10, in11, - in12, in13, in14, in15); - } + IDCT16_1D + + // Stage7 + curr1[0] = _mm_add_epi16(stp2_0, stp1_15); + curr1[1] = _mm_add_epi16(stp2_1, stp1_14); + curr1[2] = _mm_add_epi16(stp2_2, stp2_13); + curr1[3] = _mm_add_epi16(stp2_3, stp2_12); + curr1[4] = _mm_add_epi16(stp2_4, stp2_11); + curr1[5] = _mm_add_epi16(stp2_5, stp2_10); + curr1[6] = _mm_add_epi16(stp2_6, stp1_9); + curr1[7] = _mm_add_epi16(stp2_7, stp1_8); + curr1[8] = _mm_sub_epi16(stp2_7, stp1_8); + curr1[9] = _mm_sub_epi16(stp2_6, stp1_9); + curr1[10] = _mm_sub_epi16(stp2_5, stp2_10); + curr1[11] = _mm_sub_epi16(stp2_4, stp2_11); + curr1[12] = _mm_sub_epi16(stp2_3, stp2_12); + curr1[13] = _mm_sub_epi16(stp2_2, stp2_13); + curr1[14] = _mm_sub_epi16(stp2_1, stp1_14); + curr1[15] = _mm_sub_epi16(stp2_0, stp1_15); + + curr1 = r; + input += 128; + } + for (i = 0; i < 2; i++) { + // 1-D idct + array_transpose_8x8(l+i*8, in); + array_transpose_8x8(r+i*8, in+8); - IDCT16_1D + IDCT16_1D - // Stage7 - if (i == 0) { - // Left 8x16 - l0 = _mm_add_epi16(stp2_0, stp1_15); - l1 = _mm_add_epi16(stp2_1, stp1_14); - l2 = _mm_add_epi16(stp2_2, stp2_13); - l3 = _mm_add_epi16(stp2_3, stp2_12); - l4 = _mm_add_epi16(stp2_4, stp2_11); - l5 = _mm_add_epi16(stp2_5, stp2_10); - l6 = _mm_add_epi16(stp2_6, stp1_9); - l7 = _mm_add_epi16(stp2_7, stp1_8); - l8 = _mm_sub_epi16(stp2_7, stp1_8); - l9 = _mm_sub_epi16(stp2_6, stp1_9); - l10 = _mm_sub_epi16(stp2_5, stp2_10); - l11 = _mm_sub_epi16(stp2_4, stp2_11); - l12 = _mm_sub_epi16(stp2_3, stp2_12); - l13 = _mm_sub_epi16(stp2_2, stp2_13); - l14 = _mm_sub_epi16(stp2_1, stp1_14); - l15 = _mm_sub_epi16(stp2_0, stp1_15); - } else if (i == 1) { - // Right 8x16 - r0 = _mm_add_epi16(stp2_0, stp1_15); - r1 = _mm_add_epi16(stp2_1, stp1_14); - r2 = _mm_add_epi16(stp2_2, stp2_13); - r3 = _mm_add_epi16(stp2_3, stp2_12); - r4 = _mm_add_epi16(stp2_4, stp2_11); - r5 = _mm_add_epi16(stp2_5, stp2_10); - r6 = _mm_add_epi16(stp2_6, stp1_9); - r7 = _mm_add_epi16(stp2_7, stp1_8); - r8 = _mm_sub_epi16(stp2_7, stp1_8); - r9 = _mm_sub_epi16(stp2_6, stp1_9); - r10 = _mm_sub_epi16(stp2_5, stp2_10); - r11 = _mm_sub_epi16(stp2_4, stp2_11); - r12 = _mm_sub_epi16(stp2_3, stp2_12); - r13 = _mm_sub_epi16(stp2_2, stp2_13); - r14 = _mm_sub_epi16(stp2_1, stp1_14); - r15 = _mm_sub_epi16(stp2_0, stp1_15); - } else { // 2-D - in0 = _mm_add_epi16(stp2_0, stp1_15); - in1 = _mm_add_epi16(stp2_1, stp1_14); - in2 = _mm_add_epi16(stp2_2, stp2_13); - in3 = _mm_add_epi16(stp2_3, stp2_12); - in4 = _mm_add_epi16(stp2_4, stp2_11); - in5 = _mm_add_epi16(stp2_5, stp2_10); - in6 = _mm_add_epi16(stp2_6, stp1_9); - in7 = _mm_add_epi16(stp2_7, stp1_8); - in8 = _mm_sub_epi16(stp2_7, stp1_8); - in9 = _mm_sub_epi16(stp2_6, stp1_9); - in10 = _mm_sub_epi16(stp2_5, stp2_10); - in11 = _mm_sub_epi16(stp2_4, stp2_11); - in12 = _mm_sub_epi16(stp2_3, stp2_12); - in13 = _mm_sub_epi16(stp2_2, stp2_13); - in14 = _mm_sub_epi16(stp2_1, stp1_14); - in15 = _mm_sub_epi16(stp2_0, stp1_15); + in[0] = _mm_add_epi16(stp2_0, stp1_15); + in[1] = _mm_add_epi16(stp2_1, stp1_14); + in[2] = _mm_add_epi16(stp2_2, stp2_13); + in[3] = _mm_add_epi16(stp2_3, stp2_12); + in[4] = _mm_add_epi16(stp2_4, stp2_11); + in[5] = _mm_add_epi16(stp2_5, stp2_10); + in[6] = _mm_add_epi16(stp2_6, stp1_9); + in[7] = _mm_add_epi16(stp2_7, stp1_8); + in[8] = _mm_sub_epi16(stp2_7, stp1_8); + in[9] = _mm_sub_epi16(stp2_6, stp1_9); + in[10] = _mm_sub_epi16(stp2_5, stp2_10); + in[11] = _mm_sub_epi16(stp2_4, stp2_11); + in[12] = _mm_sub_epi16(stp2_3, stp2_12); + in[13] = _mm_sub_epi16(stp2_2, stp2_13); + in[14] = _mm_sub_epi16(stp2_1, stp1_14); + in[15] = _mm_sub_epi16(stp2_0, stp1_15); // Final rounding and shift - in0 = _mm_adds_epi16(in0, final_rounding); - in1 = _mm_adds_epi16(in1, final_rounding); - in2 = _mm_adds_epi16(in2, final_rounding); - in3 = _mm_adds_epi16(in3, final_rounding); - in4 = _mm_adds_epi16(in4, final_rounding); - in5 = _mm_adds_epi16(in5, final_rounding); - in6 = _mm_adds_epi16(in6, final_rounding); - in7 = _mm_adds_epi16(in7, final_rounding); - in8 = _mm_adds_epi16(in8, final_rounding); - in9 = _mm_adds_epi16(in9, final_rounding); - in10 = _mm_adds_epi16(in10, final_rounding); - in11 = _mm_adds_epi16(in11, final_rounding); - in12 = _mm_adds_epi16(in12, final_rounding); - in13 = _mm_adds_epi16(in13, final_rounding); - in14 = _mm_adds_epi16(in14, final_rounding); - in15 = _mm_adds_epi16(in15, final_rounding); - - in0 = _mm_srai_epi16(in0, 6); - in1 = _mm_srai_epi16(in1, 6); - in2 = _mm_srai_epi16(in2, 6); - in3 = _mm_srai_epi16(in3, 6); - in4 = _mm_srai_epi16(in4, 6); - in5 = _mm_srai_epi16(in5, 6); - in6 = _mm_srai_epi16(in6, 6); - in7 = _mm_srai_epi16(in7, 6); - in8 = _mm_srai_epi16(in8, 6); - in9 = _mm_srai_epi16(in9, 6); - in10 = _mm_srai_epi16(in10, 6); - in11 = _mm_srai_epi16(in11, 6); - in12 = _mm_srai_epi16(in12, 6); - in13 = _mm_srai_epi16(in13, 6); - in14 = _mm_srai_epi16(in14, 6); - in15 = _mm_srai_epi16(in15, 6); - - RECON_AND_STORE(dest, in0); - RECON_AND_STORE(dest, in1); - RECON_AND_STORE(dest, in2); - RECON_AND_STORE(dest, in3); - RECON_AND_STORE(dest, in4); - RECON_AND_STORE(dest, in5); - RECON_AND_STORE(dest, in6); - RECON_AND_STORE(dest, in7); - RECON_AND_STORE(dest, in8); - RECON_AND_STORE(dest, in9); - RECON_AND_STORE(dest, in10); - RECON_AND_STORE(dest, in11); - RECON_AND_STORE(dest, in12); - RECON_AND_STORE(dest, in13); - RECON_AND_STORE(dest, in14); - RECON_AND_STORE(dest, in15); + in[0] = _mm_adds_epi16(in[0], final_rounding); + in[1] = _mm_adds_epi16(in[1], final_rounding); + in[2] = _mm_adds_epi16(in[2], final_rounding); + in[3] = _mm_adds_epi16(in[3], final_rounding); + in[4] = _mm_adds_epi16(in[4], final_rounding); + in[5] = _mm_adds_epi16(in[5], final_rounding); + in[6] = _mm_adds_epi16(in[6], final_rounding); + in[7] = _mm_adds_epi16(in[7], final_rounding); + in[8] = _mm_adds_epi16(in[8], final_rounding); + in[9] = _mm_adds_epi16(in[9], final_rounding); + in[10] = _mm_adds_epi16(in[10], final_rounding); + in[11] = _mm_adds_epi16(in[11], final_rounding); + in[12] = _mm_adds_epi16(in[12], final_rounding); + in[13] = _mm_adds_epi16(in[13], final_rounding); + in[14] = _mm_adds_epi16(in[14], final_rounding); + in[15] = _mm_adds_epi16(in[15], final_rounding); + + in[0] = _mm_srai_epi16(in[0], 6); + in[1] = _mm_srai_epi16(in[1], 6); + in[2] = _mm_srai_epi16(in[2], 6); + in[3] = _mm_srai_epi16(in[3], 6); + in[4] = _mm_srai_epi16(in[4], 6); + in[5] = _mm_srai_epi16(in[5], 6); + in[6] = _mm_srai_epi16(in[6], 6); + in[7] = _mm_srai_epi16(in[7], 6); + in[8] = _mm_srai_epi16(in[8], 6); + in[9] = _mm_srai_epi16(in[9], 6); + in[10] = _mm_srai_epi16(in[10], 6); + in[11] = _mm_srai_epi16(in[11], 6); + in[12] = _mm_srai_epi16(in[12], 6); + in[13] = _mm_srai_epi16(in[13], 6); + in[14] = _mm_srai_epi16(in[14], 6); + in[15] = _mm_srai_epi16(in[15], 6); + + RECON_AND_STORE(dest, in[0]); + RECON_AND_STORE(dest, in[1]); + RECON_AND_STORE(dest, in[2]); + RECON_AND_STORE(dest, in[3]); + RECON_AND_STORE(dest, in[4]); + RECON_AND_STORE(dest, in[5]); + RECON_AND_STORE(dest, in[6]); + RECON_AND_STORE(dest, in[7]); + RECON_AND_STORE(dest, in[8]); + RECON_AND_STORE(dest, in[9]); + RECON_AND_STORE(dest, in[10]); + RECON_AND_STORE(dest, in[11]); + RECON_AND_STORE(dest, in[12]); + RECON_AND_STORE(dest, in[13]); + RECON_AND_STORE(dest, in[14]); + RECON_AND_STORE(dest, in[15]); dest += 8 - (stride * 16); - } } } @@ -2489,15 +2469,7 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, const __m128i stg4_7 = pair_set_epi16(-cospi_8_64, cospi_24_64); const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64); - - __m128i in0 = zero, in1 = zero, in2 = zero, in3 = zero, in4 = zero, - in5 = zero, in6 = zero, in7 = zero, in8 = zero, in9 = zero, - in10 = zero, in11 = zero, in12 = zero, in13 = zero, - in14 = zero, in15 = zero; - __m128i l0 = zero, l1 = zero, l2 = zero, l3 = zero, l4 = zero, l5 = zero, - l6 = zero, l7 = zero, l8 = zero, l9 = zero, l10 = zero, l11 = zero, - l12 = zero, l13 = zero, l14 = zero, l15 = zero; - + __m128i in[16], l[16]; __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7, stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, stp1_8_0, stp1_12_0; @@ -2505,25 +2477,26 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15; __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int i; + in[4] = in[5] = in[6] = in[7] = in[12] = in[13] = in[14] = in[15] = zero; // 1-D idct. Load input data. - in0 = _mm_load_si128((const __m128i *)input); - in8 = _mm_load_si128((const __m128i *)(input + 8 * 1)); - in1 = _mm_load_si128((const __m128i *)(input + 8 * 2)); - in9 = _mm_load_si128((const __m128i *)(input + 8 * 3)); - in2 = _mm_load_si128((const __m128i *)(input + 8 * 4)); - in10 = _mm_load_si128((const __m128i *)(input + 8 * 5)); - in3 = _mm_load_si128((const __m128i *)(input + 8 * 6)); - in11 = _mm_load_si128((const __m128i *)(input + 8 * 7)); + in[0] = _mm_load_si128((const __m128i *)input); + in[8] = _mm_load_si128((const __m128i *)(input + 8 * 1)); + in[1] = _mm_load_si128((const __m128i *)(input + 8 * 2)); + in[9] = _mm_load_si128((const __m128i *)(input + 8 * 3)); + in[2] = _mm_load_si128((const __m128i *)(input + 8 * 4)); + in[10] = _mm_load_si128((const __m128i *)(input + 8 * 5)); + in[3] = _mm_load_si128((const __m128i *)(input + 8 * 6)); + in[11] = _mm_load_si128((const __m128i *)(input + 8 * 7)); - TRANSPOSE_8X4(in0, in1, in2, in3, in0, in1, in2, in3); - TRANSPOSE_8X4(in8, in9, in10, in11, in8, in9, in10, in11); + TRANSPOSE_8X4(in[0], in[1], in[2], in[3], in[0], in[1], in[2], in[3]); + TRANSPOSE_8X4(in[8], in[9], in[10], in[11], in[8], in[9], in[10], in[11]); // Stage2 { - const __m128i lo_1_15 = _mm_unpackhi_epi16(in0, in11); - const __m128i lo_9_7 = _mm_unpackhi_epi16(in8, in3); - const __m128i lo_5_11 = _mm_unpackhi_epi16(in2, in9); - const __m128i lo_13_3 = _mm_unpackhi_epi16(in10, in1); + const __m128i lo_1_15 = _mm_unpackhi_epi16(in[0], in[11]); + const __m128i lo_9_7 = _mm_unpackhi_epi16(in[8], in[3]); + const __m128i lo_5_11 = _mm_unpackhi_epi16(in[2], in[9]); + const __m128i lo_13_3 = _mm_unpackhi_epi16(in[10], in[1]); tmp0 = _mm_madd_epi16(lo_1_15, stg2_0); tmp2 = _mm_madd_epi16(lo_1_15, stg2_1); @@ -2565,8 +2538,8 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, // Stage3 { - const __m128i lo_2_14 = _mm_unpacklo_epi16(in1, in11); - const __m128i lo_10_6 = _mm_unpacklo_epi16(in9, in3); + const __m128i lo_2_14 = _mm_unpacklo_epi16(in[1], in[11]); + const __m128i lo_10_6 = _mm_unpacklo_epi16(in[9], in[3]); tmp0 = _mm_madd_epi16(lo_2_14, stg3_0); tmp2 = _mm_madd_epi16(lo_2_14, stg3_1); @@ -2601,8 +2574,8 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, // Stage4 { - const __m128i lo_0_8 = _mm_unpacklo_epi16(in0, in8); - const __m128i lo_4_12 = _mm_unpacklo_epi16(in2, in10); + const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], in[8]); + const __m128i lo_4_12 = _mm_unpacklo_epi16(in[2], in[10]); const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); @@ -2711,106 +2684,99 @@ void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, } // Stage7. Left 8x16 only. - l0 = _mm_add_epi16(stp2_0, stp1_15); - l1 = _mm_add_epi16(stp2_1, stp1_14); - l2 = _mm_add_epi16(stp2_2, stp2_13); - l3 = _mm_add_epi16(stp2_3, stp2_12); - l4 = _mm_add_epi16(stp2_4, stp2_11); - l5 = _mm_add_epi16(stp2_5, stp2_10); - l6 = _mm_add_epi16(stp2_6, stp1_9); - l7 = _mm_add_epi16(stp2_7, stp1_8); - l8 = _mm_sub_epi16(stp2_7, stp1_8); - l9 = _mm_sub_epi16(stp2_6, stp1_9); - l10 = _mm_sub_epi16(stp2_5, stp2_10); - l11 = _mm_sub_epi16(stp2_4, stp2_11); - l12 = _mm_sub_epi16(stp2_3, stp2_12); - l13 = _mm_sub_epi16(stp2_2, stp2_13); - l14 = _mm_sub_epi16(stp2_1, stp1_14); - l15 = _mm_sub_epi16(stp2_0, stp1_15); + l[0] = _mm_add_epi16(stp2_0, stp1_15); + l[1] = _mm_add_epi16(stp2_1, stp1_14); + l[2] = _mm_add_epi16(stp2_2, stp2_13); + l[3] = _mm_add_epi16(stp2_3, stp2_12); + l[4] = _mm_add_epi16(stp2_4, stp2_11); + l[5] = _mm_add_epi16(stp2_5, stp2_10); + l[6] = _mm_add_epi16(stp2_6, stp1_9); + l[7] = _mm_add_epi16(stp2_7, stp1_8); + l[8] = _mm_sub_epi16(stp2_7, stp1_8); + l[9] = _mm_sub_epi16(stp2_6, stp1_9); + l[10] = _mm_sub_epi16(stp2_5, stp2_10); + l[11] = _mm_sub_epi16(stp2_4, stp2_11); + l[12] = _mm_sub_epi16(stp2_3, stp2_12); + l[13] = _mm_sub_epi16(stp2_2, stp2_13); + l[14] = _mm_sub_epi16(stp2_1, stp1_14); + l[15] = _mm_sub_epi16(stp2_0, stp1_15); // 2-D idct. We do 2 8x16 blocks. for (i = 0; i < 2; i++) { - if (i == 0) - TRANSPOSE_4X8(l0, l1, l2, l3, l4, l5, l6, l7, in0, in1, in2, in3, in4, - in5, in6, in7); - - if (i == 1) - TRANSPOSE_4X8(l8, l9, l10, l11, l12, l13, l14, l15, in0, in1, in2, in3, - in4, in5, in6, in7); - - in8 = in9 = in10 = in11 = in12 = in13 = in14 = in15 = zero; + array_transpose_4X8(l + 8*i, in); + in[8] = in[9] = in[10] = in[11] = in[12] = in[13] = in[14] = in[15] = zero; IDCT16_1D // Stage7 - in0 = _mm_add_epi16(stp2_0, stp1_15); - in1 = _mm_add_epi16(stp2_1, stp1_14); - in2 = _mm_add_epi16(stp2_2, stp2_13); - in3 = _mm_add_epi16(stp2_3, stp2_12); - in4 = _mm_add_epi16(stp2_4, stp2_11); - in5 = _mm_add_epi16(stp2_5, stp2_10); - in6 = _mm_add_epi16(stp2_6, stp1_9); - in7 = _mm_add_epi16(stp2_7, stp1_8); - in8 = _mm_sub_epi16(stp2_7, stp1_8); - in9 = _mm_sub_epi16(stp2_6, stp1_9); - in10 = _mm_sub_epi16(stp2_5, stp2_10); - in11 = _mm_sub_epi16(stp2_4, stp2_11); - in12 = _mm_sub_epi16(stp2_3, stp2_12); - in13 = _mm_sub_epi16(stp2_2, stp2_13); - in14 = _mm_sub_epi16(stp2_1, stp1_14); - in15 = _mm_sub_epi16(stp2_0, stp1_15); + in[0] = _mm_add_epi16(stp2_0, stp1_15); + in[1] = _mm_add_epi16(stp2_1, stp1_14); + in[2] = _mm_add_epi16(stp2_2, stp2_13); + in[3] = _mm_add_epi16(stp2_3, stp2_12); + in[4] = _mm_add_epi16(stp2_4, stp2_11); + in[5] = _mm_add_epi16(stp2_5, stp2_10); + in[6] = _mm_add_epi16(stp2_6, stp1_9); + in[7] = _mm_add_epi16(stp2_7, stp1_8); + in[8] = _mm_sub_epi16(stp2_7, stp1_8); + in[9] = _mm_sub_epi16(stp2_6, stp1_9); + in[10] = _mm_sub_epi16(stp2_5, stp2_10); + in[11] = _mm_sub_epi16(stp2_4, stp2_11); + in[12] = _mm_sub_epi16(stp2_3, stp2_12); + in[13] = _mm_sub_epi16(stp2_2, stp2_13); + in[14] = _mm_sub_epi16(stp2_1, stp1_14); + in[15] = _mm_sub_epi16(stp2_0, stp1_15); // Final rounding and shift - in0 = _mm_adds_epi16(in0, final_rounding); - in1 = _mm_adds_epi16(in1, final_rounding); - in2 = _mm_adds_epi16(in2, final_rounding); - in3 = _mm_adds_epi16(in3, final_rounding); - in4 = _mm_adds_epi16(in4, final_rounding); - in5 = _mm_adds_epi16(in5, final_rounding); - in6 = _mm_adds_epi16(in6, final_rounding); - in7 = _mm_adds_epi16(in7, final_rounding); - in8 = _mm_adds_epi16(in8, final_rounding); - in9 = _mm_adds_epi16(in9, final_rounding); - in10 = _mm_adds_epi16(in10, final_rounding); - in11 = _mm_adds_epi16(in11, final_rounding); - in12 = _mm_adds_epi16(in12, final_rounding); - in13 = _mm_adds_epi16(in13, final_rounding); - in14 = _mm_adds_epi16(in14, final_rounding); - in15 = _mm_adds_epi16(in15, final_rounding); - - in0 = _mm_srai_epi16(in0, 6); - in1 = _mm_srai_epi16(in1, 6); - in2 = _mm_srai_epi16(in2, 6); - in3 = _mm_srai_epi16(in3, 6); - in4 = _mm_srai_epi16(in4, 6); - in5 = _mm_srai_epi16(in5, 6); - in6 = _mm_srai_epi16(in6, 6); - in7 = _mm_srai_epi16(in7, 6); - in8 = _mm_srai_epi16(in8, 6); - in9 = _mm_srai_epi16(in9, 6); - in10 = _mm_srai_epi16(in10, 6); - in11 = _mm_srai_epi16(in11, 6); - in12 = _mm_srai_epi16(in12, 6); - in13 = _mm_srai_epi16(in13, 6); - in14 = _mm_srai_epi16(in14, 6); - in15 = _mm_srai_epi16(in15, 6); - - RECON_AND_STORE(dest, in0); - RECON_AND_STORE(dest, in1); - RECON_AND_STORE(dest, in2); - RECON_AND_STORE(dest, in3); - RECON_AND_STORE(dest, in4); - RECON_AND_STORE(dest, in5); - RECON_AND_STORE(dest, in6); - RECON_AND_STORE(dest, in7); - RECON_AND_STORE(dest, in8); - RECON_AND_STORE(dest, in9); - RECON_AND_STORE(dest, in10); - RECON_AND_STORE(dest, in11); - RECON_AND_STORE(dest, in12); - RECON_AND_STORE(dest, in13); - RECON_AND_STORE(dest, in14); - RECON_AND_STORE(dest, in15); + in[0] = _mm_adds_epi16(in[0], final_rounding); + in[1] = _mm_adds_epi16(in[1], final_rounding); + in[2] = _mm_adds_epi16(in[2], final_rounding); + in[3] = _mm_adds_epi16(in[3], final_rounding); + in[4] = _mm_adds_epi16(in[4], final_rounding); + in[5] = _mm_adds_epi16(in[5], final_rounding); + in[6] = _mm_adds_epi16(in[6], final_rounding); + in[7] = _mm_adds_epi16(in[7], final_rounding); + in[8] = _mm_adds_epi16(in[8], final_rounding); + in[9] = _mm_adds_epi16(in[9], final_rounding); + in[10] = _mm_adds_epi16(in[10], final_rounding); + in[11] = _mm_adds_epi16(in[11], final_rounding); + in[12] = _mm_adds_epi16(in[12], final_rounding); + in[13] = _mm_adds_epi16(in[13], final_rounding); + in[14] = _mm_adds_epi16(in[14], final_rounding); + in[15] = _mm_adds_epi16(in[15], final_rounding); + + in[0] = _mm_srai_epi16(in[0], 6); + in[1] = _mm_srai_epi16(in[1], 6); + in[2] = _mm_srai_epi16(in[2], 6); + in[3] = _mm_srai_epi16(in[3], 6); + in[4] = _mm_srai_epi16(in[4], 6); + in[5] = _mm_srai_epi16(in[5], 6); + in[6] = _mm_srai_epi16(in[6], 6); + in[7] = _mm_srai_epi16(in[7], 6); + in[8] = _mm_srai_epi16(in[8], 6); + in[9] = _mm_srai_epi16(in[9], 6); + in[10] = _mm_srai_epi16(in[10], 6); + in[11] = _mm_srai_epi16(in[11], 6); + in[12] = _mm_srai_epi16(in[12], 6); + in[13] = _mm_srai_epi16(in[13], 6); + in[14] = _mm_srai_epi16(in[14], 6); + in[15] = _mm_srai_epi16(in[15], 6); + + RECON_AND_STORE(dest, in[0]); + RECON_AND_STORE(dest, in[1]); + RECON_AND_STORE(dest, in[2]); + RECON_AND_STORE(dest, in[3]); + RECON_AND_STORE(dest, in[4]); + RECON_AND_STORE(dest, in[5]); + RECON_AND_STORE(dest, in[6]); + RECON_AND_STORE(dest, in[7]); + RECON_AND_STORE(dest, in[8]); + RECON_AND_STORE(dest, in[9]); + RECON_AND_STORE(dest, in[10]); + RECON_AND_STORE(dest, in[11]); + RECON_AND_STORE(dest, in[12]); + RECON_AND_STORE(dest, in[13]); + RECON_AND_STORE(dest, in[14]); + RECON_AND_STORE(dest, in[15]); dest += 8 - (stride * 16); } diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index bcd51f5f8..516aa88cb 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -41,16 +41,14 @@ typedef struct TileWorkerData { VP9_COMMON *cm; vp9_reader bit_reader; DECLARE_ALIGNED(16, MACROBLOCKD, xd); - DECLARE_ALIGNED(16, unsigned char, token_cache[1024]); DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]); - DECLARE_ALIGNED(16, uint16_t, eobs[MAX_MB_PLANE][256]); } TileWorkerData; static int read_be32(const uint8_t *p) { return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]; } -static int is_compound_prediction_allowed(const VP9_COMMON *cm) { +static int is_compound_reference_allowed(const VP9_COMMON *cm) { int i; for (i = 1; i < ALLOWED_REFS_PER_FRAME; ++i) if (cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1]) @@ -59,7 +57,7 @@ static int is_compound_prediction_allowed(const VP9_COMMON *cm) { return 0; } -static void setup_compound_prediction(VP9_COMMON *cm) { +static void setup_compound_reference(VP9_COMMON *cm) { if (cm->ref_frame_sign_bias[LAST_FRAME] == cm->ref_frame_sign_bias[GOLDEN_FRAME]) { cm->comp_fixed_ref = ALTREF_FRAME; @@ -94,7 +92,7 @@ static TX_MODE read_tx_mode(vp9_reader *r) { return tx_mode; } -static void read_tx_probs(struct tx_probs *tx_probs, vp9_reader *r) { +static void read_tx_mode_probs(struct tx_probs *tx_probs, vp9_reader *r) { int i, j; for (i = 0; i < TX_SIZE_CONTEXTS; ++i) @@ -124,22 +122,20 @@ static void read_inter_mode_probs(FRAME_CONTEXT *fc, vp9_reader *r) { vp9_diff_update_prob(r, &fc->inter_mode_probs[i][j]); } -static INLINE REFERENCE_MODE read_comp_pred_mode(vp9_reader *r) { - REFERENCE_MODE mode = vp9_read_bit(r); - if (mode) - mode += vp9_read_bit(r); - return mode; +static REFERENCE_MODE read_reference_mode(VP9_COMMON *cm, vp9_reader *r) { + if (is_compound_reference_allowed(cm)) { + REFERENCE_MODE mode = vp9_read_bit(r); + if (mode) + mode += vp9_read_bit(r); + setup_compound_reference(cm); + return mode; + } else { + return SINGLE_REFERENCE; + } } -static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) { +static void read_reference_mode_probs(VP9_COMMON *cm, vp9_reader *r) { int i; - - const int compound_allowed = is_compound_prediction_allowed(cm); - cm->comp_pred_mode = compound_allowed ? read_comp_pred_mode(r) - : SINGLE_REFERENCE; - if (compound_allowed) - setup_compound_prediction(cm); - if (cm->comp_pred_mode == REFERENCE_MODE_SELECT) for (i = 0; i < COMP_INTER_CONTEXTS; i++) vp9_diff_update_prob(r, &cm->fc.comp_inter_prob[i]); @@ -242,9 +238,8 @@ static void alloc_tile_storage(VP9D_COMP *pbi, int tile_rows, int tile_cols) { static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block, TX_SIZE tx_size, uint8_t *dst, int stride, - uint8_t *token_cache) { + int eob) { struct macroblockd_plane *const pd = &xd->plane[plane]; - const int eob = pd->eobs[block]; if (eob > 0) { TX_TYPE tx_type; const int plane_type = pd->plane_type; @@ -275,20 +270,13 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block, if (eob == 1) { vpx_memset(dqcoeff, 0, 2 * sizeof(dqcoeff[0])); - vpx_memset(token_cache, 0, 2 * sizeof(token_cache[0])); } else { - if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10) { + if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10) vpx_memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); - vpx_memset(token_cache, 0, - 4 * (4 << tx_size) * sizeof(token_cache[0])); - } else if (tx_size == TX_32X32 && eob <= 34) { + else if (tx_size == TX_32X32 && eob <= 34) vpx_memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); - vpx_memset(token_cache, 0, 256 * sizeof(token_cache[0])); - } else { + else vpx_memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); - vpx_memset(token_cache, 0, - (16 << (tx_size << 1)) * sizeof(token_cache[0])); - } } } } @@ -297,7 +285,6 @@ struct intra_args { VP9_COMMON *cm; MACROBLOCKD *xd; vp9_reader *r; - uint8_t *token_cache; }; static void predict_and_reconstruct_intra_block(int plane, int block, @@ -325,10 +312,11 @@ static void predict_and_reconstruct_intra_block(int plane, int block, dst, pd->dst.stride, dst, pd->dst.stride); if (!mi->mbmi.skip_coeff) { - vp9_decode_block_tokens(cm, xd, plane, block, plane_bsize, x, y, tx_size, - args->r, args->token_cache); + const int eob = vp9_decode_block_tokens(cm, xd, plane, block, + plane_bsize, x, y, tx_size, + args->r); inverse_transform_block(xd, plane, block, tx_size, dst, pd->dst.stride, - args->token_cache); + eob); } } @@ -337,7 +325,6 @@ struct inter_args { MACROBLOCKD *xd; vp9_reader *r; int *eobtotal; - uint8_t *token_cache; }; static void reconstruct_inter_block(int plane, int block, @@ -347,14 +334,14 @@ static void reconstruct_inter_block(int plane, int block, VP9_COMMON *const cm = args->cm; MACROBLOCKD *const xd = args->xd; struct macroblockd_plane *const pd = &xd->plane[plane]; - int x, y; + int x, y, eob; txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y); - *args->eobtotal += vp9_decode_block_tokens(cm, xd, plane, block, - plane_bsize, x, y, tx_size, - args->r, args->token_cache); + eob = vp9_decode_block_tokens(cm, xd, plane, block, plane_bsize, x, y, + tx_size, args->r); inverse_transform_block(xd, plane, block, tx_size, &pd->dst.buf[4 * y * pd->dst.stride + 4 * x], - pd->dst.stride, args->token_cache); + pd->dst.stride, eob); + *args->eobtotal += eob; } static void set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, @@ -404,8 +391,7 @@ static void set_ref(VP9_COMMON *const cm, MACROBLOCKD *const xd, static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd, const TileInfo *const tile, int mi_row, int mi_col, - vp9_reader *r, BLOCK_SIZE bsize, - uint8_t *token_cache) { + vp9_reader *r, BLOCK_SIZE bsize) { const int less8x8 = bsize < BLOCK_8X8; MB_MODE_INFO *mbmi; @@ -427,9 +413,7 @@ static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd, } if (!is_inter_block(mbmi)) { - struct intra_args arg = { - cm, xd, r, token_cache - }; + struct intra_args arg = { cm, xd, r }; foreach_transformed_block(xd, bsize, predict_and_reconstruct_intra_block, &arg); } else { @@ -447,9 +431,7 @@ static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd, // Reconstruction if (!mbmi->skip_coeff) { int eobtotal = 0; - struct inter_args arg = { - cm, xd, r, &eobtotal, token_cache - }; + struct inter_args arg = { cm, xd, r, &eobtotal }; foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg); if (!less8x8 && eobtotal == 0) mbmi->skip_coeff = 1; // skip loopfilter @@ -488,8 +470,7 @@ static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd, int hbs, static void decode_modes_sb(VP9_COMMON *const cm, MACROBLOCKD *const xd, const TileInfo *const tile, int mi_row, int mi_col, - vp9_reader* r, BLOCK_SIZE bsize, - uint8_t *token_cache) { + vp9_reader* r, BLOCK_SIZE bsize) { const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2; PARTITION_TYPE partition; BLOCK_SIZE subsize; @@ -500,33 +481,27 @@ static void decode_modes_sb(VP9_COMMON *const cm, MACROBLOCKD *const xd, partition = read_partition(cm, xd, hbs, mi_row, mi_col, bsize, r); subsize = get_subsize(bsize, partition); if (subsize < BLOCK_8X8) { - decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache); + decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize); } else { switch (partition) { case PARTITION_NONE: - decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache); + decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize); break; case PARTITION_HORZ: - decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache); + decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize); if (mi_row + hbs < cm->mi_rows) - decode_modes_b(cm, xd, tile, mi_row + hbs, mi_col, r, subsize, - token_cache); + decode_modes_b(cm, xd, tile, mi_row + hbs, mi_col, r, subsize); break; case PARTITION_VERT: - decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache); + decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize); if (mi_col + hbs < cm->mi_cols) - decode_modes_b(cm, xd, tile, mi_row, mi_col + hbs, r, subsize, - token_cache); + decode_modes_b(cm, xd, tile, mi_row, mi_col + hbs, r, subsize); break; case PARTITION_SPLIT: - decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, subsize, - token_cache); - decode_modes_sb(cm, xd, tile, mi_row, mi_col + hbs, r, subsize, - token_cache); - decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col, r, subsize, - token_cache); - decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize, - token_cache); + decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, subsize); + decode_modes_sb(cm, xd, tile, mi_row, mi_col + hbs, r, subsize); + decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col, r, subsize); + decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize); break; default: assert(!"Invalid partition type"); @@ -712,12 +687,6 @@ static void apply_frame_size(VP9D_COMP *pbi, int width, int height) { if (cm->width != width || cm->height != height) { // Change in frame size. - if (cm->width == 0 || cm->height == 0) { - // Assign new frame buffer on first call. - cm->new_fb_idx = NUM_YV12_BUFFERS - 1; - cm->fb_idx_ref_cnt[cm->new_fb_idx] = 1; - } - // TODO(agrange) Don't test width/height, check overall size. if (width > cm->width || height > cm->height) { // Rescale frame buffers only if they're not big enough already. @@ -809,8 +778,7 @@ static void decode_tile(VP9D_COMP *pbi, const TileInfo *const tile, vp9_zero(xd->left_seg_context); for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) { - decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64, - pbi->token_cache); + decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64); } if (pbi->do_loopfilter_inline) { @@ -952,9 +920,7 @@ static void setup_tile_macroblockd(TileWorkerData *const tile_data) { for (i = 0; i < MAX_MB_PLANE; ++i) { pd[i].dqcoeff = tile_data->dqcoeff[i]; - pd[i].eobs = tile_data->eobs[i]; vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t)); - vpx_memset(tile_data->token_cache, 0, sizeof(tile_data->token_cache)); } } @@ -970,8 +936,7 @@ static int tile_worker_hook(void *arg1, void *arg2) { for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) { decode_modes_sb(tile_data->cm, &tile_data->xd, tile, - mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64, - tile_data->token_cache); + mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64); } } return !tile_data->xd.corrupted; @@ -1215,7 +1180,7 @@ static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data, cm->tx_mode = xd->lossless ? ONLY_4X4 : read_tx_mode(&r); if (cm->tx_mode == TX_MODE_SELECT) - read_tx_probs(&fc->tx_probs, &r); + read_tx_mode_probs(&fc->tx_probs, &r); read_coef_probs(fc, cm->tx_mode, &r); for (k = 0; k < MBSKIP_CONTEXTS; ++k) @@ -1233,7 +1198,8 @@ static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data, for (i = 0; i < INTRA_INTER_CONTEXTS; i++) vp9_diff_update_prob(&r, &fc->intra_inter_prob[i]); - read_comp_pred(cm, &r); + cm->comp_pred_mode = read_reference_mode(cm, &r); + read_reference_mode_probs(cm, &r); for (j = 0; j < BLOCK_SIZE_GROUPS; j++) for (i = 0; i < INTRA_MODES - 1; ++i) diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 49da1a0dc..7f68e5b2c 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -13,11 +13,8 @@ #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_seg_common.h" -#include "vp9/decoder/vp9_dboolhuff.h" #include "vp9/decoder/vp9_detokenize.h" -#include "vp9/decoder/vp9_onyxd_int.h" #define EOB_CONTEXT_NODE 0 #define ZERO_CONTEXT_NODE 1 @@ -60,42 +57,32 @@ static const vp9_prob cat6_prob[15] = { 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 }; -static const int token_to_counttoken[MAX_ENTROPY_TOKENS] = { - ZERO_TOKEN, ONE_TOKEN, TWO_TOKEN, TWO_TOKEN, - TWO_TOKEN, TWO_TOKEN, TWO_TOKEN, TWO_TOKEN, - TWO_TOKEN, TWO_TOKEN, TWO_TOKEN, DCT_EOB_MODEL_TOKEN -}; - #define INCREMENT_COUNT(token) \ do { \ if (!cm->frame_parallel_decoding_mode) \ - ++coef_counts[band][pt][token_to_counttoken[token]]; \ + ++coef_counts[band][ctx][token]; \ } while (0) - #define WRITE_COEF_CONTINUE(val, token) \ { \ - v = (val * dqv) >> dq_shift; \ - dqcoeff_ptr[scan[c]] = (vp9_read_bit(r) ? -v : v); \ - INCREMENT_COUNT(token); \ + v = (val * dqv) >> dq_shift; \ + dqcoeff[scan[c]] = vp9_read_bit(r) ? -v : v; \ token_cache[scan[c]] = vp9_pt_energy_class[token]; \ ++c; \ - pt = get_coef_context(nb, token_cache, c); \ - dqv = dq[1]; \ + ctx = get_coef_context(nb, token_cache, c); \ + dqv = dq[1]; \ continue; \ } - #define ADJUST_COEF(prob, bits_count) \ do { \ val += (vp9_read(r, prob) << bits_count); \ } while (0) -static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, - vp9_reader *r, int block_idx, - PLANE_TYPE type, int max_eob, int16_t *dqcoeff_ptr, - TX_SIZE tx_size, const int16_t *dq, int pt, - uint8_t *token_cache) { +static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, int block, + PLANE_TYPE type, int16_t *dqcoeff, TX_SIZE tx_size, + const int16_t *dq, int ctx, vp9_reader *r) { + const int max_eob = 16 << (tx_size << 1); const FRAME_CONTEXT *const fc = &cm->fc; FRAME_COUNTS *const counts = &cm->counts; const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi); @@ -107,10 +94,11 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, counts->coef[tx_size][type][ref]; unsigned int (*eob_branch_count)[PREV_COEF_CONTEXTS] = counts->eob_branch[tx_size][type][ref]; + uint8_t token_cache[32 * 32]; const uint8_t *cat6; const uint8_t *band_translate = get_band_translate(tx_size); const int dq_shift = (tx_size == TX_32X32); - const scan_order *so = get_scan(xd, tx_size, type, block_idx); + const scan_order *so = get_scan(xd, tx_size, type, block); const int16_t *scan = so->scan; const int16_t *nb = so->neighbors; int v; @@ -119,34 +107,36 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, while (c < max_eob) { int val; band = *band_translate++; - prob = coef_probs[band][pt]; + prob = coef_probs[band][ctx]; if (!cm->frame_parallel_decoding_mode) - ++eob_branch_count[band][pt]; + ++eob_branch_count[band][ctx]; if (!vp9_read(r, prob[EOB_CONTEXT_NODE])) { - if (!cm->frame_parallel_decoding_mode) - ++coef_counts[band][pt][DCT_EOB_MODEL_TOKEN]; + INCREMENT_COUNT(EOB_MODEL_TOKEN); break; } while (!vp9_read(r, prob[ZERO_CONTEXT_NODE])) { INCREMENT_COUNT(ZERO_TOKEN); dqv = dq[1]; + token_cache[scan[c]] = 0; ++c; if (c >= max_eob) return c; // zero tokens at the end (no eob token) - pt = get_coef_context(nb, token_cache, c); + ctx = get_coef_context(nb, token_cache, c); band = *band_translate++; - prob = coef_probs[band][pt]; + prob = coef_probs[band][ctx]; } // ONE_CONTEXT_NODE_0_ if (!vp9_read(r, prob[ONE_CONTEXT_NODE])) { + INCREMENT_COUNT(ONE_TOKEN); WRITE_COEF_CONTINUE(1, ONE_TOKEN); } + INCREMENT_COUNT(TWO_TOKEN); + prob = vp9_pareto8_full[prob[PIVOT_NODE] - 1]; - // LOW_VAL_CONTEXT_NODE_0_ if (!vp9_read(r, prob[LOW_VAL_CONTEXT_NODE])) { if (!vp9_read(r, prob[TWO_CONTEXT_NODE])) { WRITE_COEF_CONTINUE(2, TWO_TOKEN); @@ -156,35 +146,35 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, } WRITE_COEF_CONTINUE(4, FOUR_TOKEN); } - // HIGH_LOW_CONTEXT_NODE_0_ + if (!vp9_read(r, prob[HIGH_LOW_CONTEXT_NODE])) { if (!vp9_read(r, prob[CAT_ONE_CONTEXT_NODE])) { val = CAT1_MIN_VAL; ADJUST_COEF(CAT1_PROB0, 0); - WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY1); + WRITE_COEF_CONTINUE(val, CATEGORY1_TOKEN); } val = CAT2_MIN_VAL; ADJUST_COEF(CAT2_PROB1, 1); ADJUST_COEF(CAT2_PROB0, 0); - WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY2); + WRITE_COEF_CONTINUE(val, CATEGORY2_TOKEN); } - // CAT_THREEFOUR_CONTEXT_NODE_0_ + if (!vp9_read(r, prob[CAT_THREEFOUR_CONTEXT_NODE])) { if (!vp9_read(r, prob[CAT_THREE_CONTEXT_NODE])) { val = CAT3_MIN_VAL; ADJUST_COEF(CAT3_PROB2, 2); ADJUST_COEF(CAT3_PROB1, 1); ADJUST_COEF(CAT3_PROB0, 0); - WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY3); + WRITE_COEF_CONTINUE(val, CATEGORY3_TOKEN); } val = CAT4_MIN_VAL; ADJUST_COEF(CAT4_PROB3, 3); ADJUST_COEF(CAT4_PROB2, 2); ADJUST_COEF(CAT4_PROB1, 1); ADJUST_COEF(CAT4_PROB0, 0); - WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY4); + WRITE_COEF_CONTINUE(val, CATEGORY4_TOKEN); } - // CAT_FIVE_CONTEXT_NODE_0_: + if (!vp9_read(r, prob[CAT_FIVE_CONTEXT_NODE])) { val = CAT5_MIN_VAL; ADJUST_COEF(CAT5_PROB4, 4); @@ -192,7 +182,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, ADJUST_COEF(CAT5_PROB2, 2); ADJUST_COEF(CAT5_PROB1, 1); ADJUST_COEF(CAT5_PROB0, 0); - WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY5); + WRITE_COEF_CONTINUE(val, CATEGORY5_TOKEN); } val = 0; cat6 = cat6_prob; @@ -200,7 +190,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, val = (val << 1) | vp9_read(r, *cat6++); val += CAT6_MIN_VAL; - WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY6); + WRITE_COEF_CONTINUE(val, CATEGORY6_TOKEN); } return c; @@ -208,18 +198,14 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd, int plane, int block, BLOCK_SIZE plane_bsize, - int x, int y, TX_SIZE tx_size, vp9_reader *r, - uint8_t *token_cache) { + int x, int y, TX_SIZE tx_size, vp9_reader *r) { struct macroblockd_plane *const pd = &xd->plane[plane]; - const int seg_eob = get_tx_eob(&cm->seg, xd->mi_8x8[0]->mbmi.segment_id, - tx_size); - const int pt = get_entropy_context(tx_size, pd->above_context + x, - pd->left_context + y); - const int eob = decode_coefs(cm, xd, r, block, pd->plane_type, seg_eob, + const int ctx = get_entropy_context(tx_size, pd->above_context + x, + pd->left_context + y); + const int eob = decode_coefs(cm, xd, block, pd->plane_type, BLOCK_OFFSET(pd->dqcoeff, block), tx_size, - pd->dequant, pt, token_cache); + pd->dequant, ctx, r); set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, x, y); - pd->eobs[block] = eob; return eob; } diff --git a/vp9/decoder/vp9_detokenize.h b/vp9/decoder/vp9_detokenize.h index e858a19f7..2a8807379 100644 --- a/vp9/decoder/vp9_detokenize.h +++ b/vp9/decoder/vp9_detokenize.h @@ -17,7 +17,6 @@ int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd, int plane, int block, BLOCK_SIZE plane_bsize, - int x, int y, TX_SIZE tx_size, vp9_reader *r, - uint8_t *token_cache); + int x, int y, TX_SIZE tx_size, vp9_reader *r); #endif // VP9_DECODER_VP9_DETOKENIZE_H_ diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c index 740ad72cb..4c0cd45a9 100644 --- a/vp9/decoder/vp9_onyxd_if.c +++ b/vp9/decoder/vp9_onyxd_if.c @@ -112,10 +112,8 @@ static void init_macroblockd(VP9D_COMP *const pbi) { struct macroblockd_plane *const pd = xd->plane; int i; - for (i = 0; i < MAX_MB_PLANE; ++i) { + for (i = 0; i < MAX_MB_PLANE; ++i) pd[i].dqcoeff = pbi->dqcoeff[i]; - pd[i].eobs = pbi->eobs[i]; - } } VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) { @@ -127,6 +125,9 @@ VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) { vp9_zero(*pbi); + // Initialize the references to not point to any frame buffers. + memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); + if (setjmp(cm->error.jmp)) { cm->error.setjmp = 0; vp9_remove_decompressor(pbi); diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h index 3d1b97b2d..e90f8923c 100644 --- a/vp9/decoder/vp9_onyxd_int.h +++ b/vp9/decoder/vp9_onyxd_int.h @@ -23,7 +23,6 @@ typedef struct VP9Decompressor { DECLARE_ALIGNED(16, VP9_COMMON, common); DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]); - DECLARE_ALIGNED(16, uint16_t, eobs[MAX_MB_PLANE][256]); VP9D_CONFIG oxcf; @@ -52,8 +51,6 @@ typedef struct VP9Decompressor { ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; PARTITION_CONTEXT *above_seg_context; - - DECLARE_ALIGNED(16, uint8_t, token_cache[1024]); } VP9D_COMP; #endif // VP9_DECODER_VP9_ONYXD_INT_H_ diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 9f79f8cdc..97717fb9c 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -49,109 +49,6 @@ vp9_coeff_stats tree_update_hist[TX_SIZES][BLOCK_TYPES]; extern unsigned int active_section; #endif - -#ifdef MODE_STATS -int64_t tx_count_32x32p_stats[TX_SIZE_CONTEXTS][TX_SIZES]; -int64_t tx_count_16x16p_stats[TX_SIZE_CONTEXTS][TX_SIZES - 1]; -int64_t tx_count_8x8p_stats[TX_SIZE_CONTEXTS][TX_SIZES - 2]; -int64_t switchable_interp_stats[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS]; - -void init_tx_count_stats() { - vp9_zero(tx_count_32x32p_stats); - vp9_zero(tx_count_16x16p_stats); - vp9_zero(tx_count_8x8p_stats); -} - -void init_switchable_interp_stats() { - vp9_zero(switchable_interp_stats); -} - -static void update_tx_count_stats(VP9_COMMON *cm) { - int i, j; - for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - for (j = 0; j < TX_SIZES; j++) { - tx_count_32x32p_stats[i][j] += cm->fc.tx_count_32x32p[i][j]; - } - } - for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - for (j = 0; j < TX_SIZES - 1; j++) { - tx_count_16x16p_stats[i][j] += cm->fc.tx_count_16x16p[i][j]; - } - } - for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - for (j = 0; j < TX_SIZES - 2; j++) { - tx_count_8x8p_stats[i][j] += cm->fc.tx_count_8x8p[i][j]; - } - } -} - -static void update_switchable_interp_stats(VP9_COMMON *cm) { - int i, j; - for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) - for (j = 0; j < SWITCHABLE_FILTERS; ++j) - switchable_interp_stats[i][j] += cm->fc.switchable_interp_count[i][j]; -} - -void write_tx_count_stats() { - int i, j; - FILE *fp = fopen("tx_count.bin", "wb"); - fwrite(tx_count_32x32p_stats, sizeof(tx_count_32x32p_stats), 1, fp); - fwrite(tx_count_16x16p_stats, sizeof(tx_count_16x16p_stats), 1, fp); - fwrite(tx_count_8x8p_stats, sizeof(tx_count_8x8p_stats), 1, fp); - fclose(fp); - - printf( - "vp9_default_tx_count_32x32p[TX_SIZE_CONTEXTS][TX_SIZES] = {\n"); - for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - printf(" { "); - for (j = 0; j < TX_SIZES; j++) { - printf("%"PRId64", ", tx_count_32x32p_stats[i][j]); - } - printf("},\n"); - } - printf("};\n"); - printf( - "vp9_default_tx_count_16x16p[TX_SIZE_CONTEXTS][TX_SIZES-1] = {\n"); - for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - printf(" { "); - for (j = 0; j < TX_SIZES - 1; j++) { - printf("%"PRId64", ", tx_count_16x16p_stats[i][j]); - } - printf("},\n"); - } - printf("};\n"); - printf( - "vp9_default_tx_count_8x8p[TX_SIZE_CONTEXTS][TX_SIZES-2] = {\n"); - for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - printf(" { "); - for (j = 0; j < TX_SIZES - 2; j++) { - printf("%"PRId64", ", tx_count_8x8p_stats[i][j]); - } - printf("},\n"); - } - printf("};\n"); -} - -void write_switchable_interp_stats() { - int i, j; - FILE *fp = fopen("switchable_interp.bin", "wb"); - fwrite(switchable_interp_stats, sizeof(switchable_interp_stats), 1, fp); - fclose(fp); - - printf( - "vp9_default_switchable_filter_count[SWITCHABLE_FILTER_CONTEXTS]" - "[SWITCHABLE_FILTERS] = {\n"); - for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { - printf(" { "); - for (j = 0; j < SWITCHABLE_FILTERS; j++) { - printf("%"PRId64", ", switchable_interp_stats[i][j]); - } - printf("},\n"); - } - printf("};\n"); -} -#endif - static struct vp9_token intra_mode_encodings[INTRA_MODES]; static struct vp9_token switchable_interp_encodings[SWITCHABLE_FILTERS]; static struct vp9_token partition_encodings[PARTITION_TYPES]; @@ -279,7 +176,7 @@ static void pack_mb_tokens(vp9_writer* const w, // is split into two treed writes. The first treed write takes care of the // unconstrained nodes. The second treed write takes care of the // constrained nodes. - if (t >= TWO_TOKEN && t < DCT_EOB_TOKEN) { + if (t >= TWO_TOKEN && t < EOB_TOKEN) { int len = UNCONSTRAINED_NODES - p->skip_eob_node; int bits = v >> (n - len); treed_write(w, vp9_coef_tree, p->context_tree, bits, len, i); @@ -679,10 +576,10 @@ static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size) { #ifdef ENTROPY_STATS if (!cpi->dummy_packing) { int t; - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) + for (t = 0; t < ENTROPY_TOKENS; ++t) context_counters[tx_size][i][j][k][l][t] += coef_counts[i][j][k][l][t]; - context_counters[tx_size][i][j][k][l][MAX_ENTROPY_TOKENS] += + context_counters[tx_size][i][j][k][l][ENTROPY_TOKENS] += eob_branch_ct[i][j][k][l]; } #endif diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 71f7e7a52..a34a92917 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -71,6 +71,7 @@ struct macroblock_plane { DECLARE_ALIGNED(16, int16_t, src_diff[64 * 64]); int16_t *qcoeff; int16_t *coeff; + uint16_t *eobs; struct buf_2d src; // Quantizer setings @@ -86,7 +87,7 @@ struct macroblock_plane { /* The [2] dimension is for whether we skip the EOB node (i.e. if previous * coefficient in this block was zero) or not. */ typedef unsigned int vp9_coeff_cost[BLOCK_TYPES][REF_TYPES][COEF_BANDS][2] - [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; + [PREV_COEF_CONTEXTS][ENTROPY_TOKENS]; typedef struct macroblock MACROBLOCK; struct macroblock { diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 89da78190..bc71d0259 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -441,14 +441,14 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, p[i].coeff = ctx->coeff_pbuf[i][1]; p[i].qcoeff = ctx->qcoeff_pbuf[i][1]; pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; - pd[i].eobs = ctx->eobs_pbuf[i][1]; + p[i].eobs = ctx->eobs_pbuf[i][1]; } for (i = max_plane; i < MAX_MB_PLANE; ++i) { p[i].coeff = ctx->coeff_pbuf[i][2]; p[i].qcoeff = ctx->qcoeff_pbuf[i][2]; pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2]; - pd[i].eobs = ctx->eobs_pbuf[i][2]; + p[i].eobs = ctx->eobs_pbuf[i][2]; } // Restore the coding context of the MB to that that was in place @@ -677,7 +677,7 @@ static void pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, p[i].coeff = ctx->coeff_pbuf[i][0]; p[i].qcoeff = ctx->qcoeff_pbuf[i][0]; pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0]; - pd[i].eobs = ctx->eobs_pbuf[i][0]; + p[i].eobs = ctx->eobs_pbuf[i][0]; } ctx->is_coded = 0; x->skip_recode = 0; @@ -2592,27 +2592,19 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, context, &cm->counts.tx)[mbmi->tx_size]; } else { int x, y; - TX_SIZE sz = tx_mode_to_biggest_tx_size[cm->tx_mode]; - assert(sizeof(tx_mode_to_biggest_tx_size) / - sizeof(tx_mode_to_biggest_tx_size[0]) == TX_MODES); + TX_SIZE tx_size; // The new intra coding scheme requires no change of transform size if (is_inter_block(&mi->mbmi)) { - if (sz == TX_32X32 && bsize < BLOCK_32X32) - sz = TX_16X16; - if (sz == TX_16X16 && bsize < BLOCK_16X16) - sz = TX_8X8; - if (sz == TX_8X8 && bsize < BLOCK_8X8) - sz = TX_4X4; - } else if (bsize >= BLOCK_8X8) { - sz = mbmi->tx_size; + tx_size = MIN(tx_mode_to_biggest_tx_size[cm->tx_mode], + max_txsize_lookup[bsize]); } else { - sz = TX_4X4; + tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4; } for (y = 0; y < mi_height; y++) for (x = 0; x < mi_width; x++) if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows) - mi_8x8[mis * y + x]->mbmi.tx_size = sz; + mi_8x8[mis * y + x]->mbmi.tx_size = tx_size; } } } diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 83f87b066..0821c263a 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -144,7 +144,7 @@ static void optimize_b(MACROBLOCK *mb, const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, block); int16_t *qcoeff_ptr; int16_t *dqcoeff_ptr; - int eob = pd->eobs[block], final_eob, sz = 0; + int eob = p->eobs[block], final_eob, sz = 0; const int i0 = 0; int rc, x, next, i; int64_t rdmult, rddiv, rd_cost0, rd_cost1; @@ -176,7 +176,7 @@ static void optimize_b(MACROBLOCK *mb, tokens[eob][0].rate = 0; tokens[eob][0].error = 0; tokens[eob][0].next = default_eob; - tokens[eob][0].token = DCT_EOB_TOKEN; + tokens[eob][0].token = EOB_TOKEN; tokens[eob][0].qc = 0; *(tokens[eob] + 1) = *(tokens[eob] + 0); next = eob; @@ -243,21 +243,19 @@ static void optimize_b(MACROBLOCK *mb, /* If we reduced this coefficient to zero, check to see if * we need to move the EOB back here. */ - t0 = tokens[next][0].token == DCT_EOB_TOKEN ? - DCT_EOB_TOKEN : ZERO_TOKEN; - t1 = tokens[next][1].token == DCT_EOB_TOKEN ? - DCT_EOB_TOKEN : ZERO_TOKEN; + t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN; + t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN; } else { t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token; } if (next < default_eob) { band = band_translate[i + 1]; - if (t0 != DCT_EOB_TOKEN) { + if (t0 != EOB_TOKEN) { pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt] [tokens[next][0].token]; } - if (t1 != DCT_EOB_TOKEN) { + if (t1 != EOB_TOKEN) { pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache); rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt] [tokens[next][1].token]; @@ -289,12 +287,12 @@ static void optimize_b(MACROBLOCK *mb, t0 = tokens[next][0].token; t1 = tokens[next][1].token; /* Update the cost of each path if we're past the EOB token. */ - if (t0 != DCT_EOB_TOKEN) { + if (t0 != EOB_TOKEN) { tokens[next][0].rate += mb->token_costs[tx_size][type][ref][band][1][0][t0]; tokens[next][0].token = ZERO_TOKEN; } - if (t1 != DCT_EOB_TOKEN) { + if (t1 != EOB_TOKEN) { tokens[next][1].rate += mb->token_costs[tx_size][type][ref][band][1][0][t1]; tokens[next][1].token = ZERO_TOKEN; @@ -334,7 +332,7 @@ static void optimize_b(MACROBLOCK *mb, } final_eob++; - xd->plane[plane].eobs[block] = final_eob; + mb->plane[plane].eobs[block] = final_eob; *a = *l = (final_eob > 0); } @@ -372,7 +370,7 @@ void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize, int16_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); const scan_order *so; - uint16_t *eob = &pd->eobs[block]; + uint16_t *eob = &p->eobs[block]; const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; int i, j; int16_t *src_diff; @@ -423,6 +421,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; struct optimize_ctx *const ctx = args->ctx; + struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); int i, j; @@ -433,7 +432,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, // TODO(jingning): per transformed block zero forcing only enabled for // luma component. will integrate chroma components as well. if (x->zcoeff_blk[tx_size][block] && plane == 0) { - pd->eobs[block] = 0; + p->eobs[block] = 0; ctx->ta[plane][i] = 0; ctx->tl[plane][j] = 0; return; @@ -445,28 +444,28 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx); } else { - ctx->ta[plane][i] = pd->eobs[block] > 0; - ctx->tl[plane][j] = pd->eobs[block] > 0; + ctx->ta[plane][i] = p->eobs[block] > 0; + ctx->tl[plane][j] = p->eobs[block] > 0; } - if (x->skip_encode || pd->eobs[block] == 0) + if (x->skip_encode || p->eobs[block] == 0) return; switch (tx_size) { case TX_32X32: - vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); + vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); break; case TX_16X16: - vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); + vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); break; case TX_8X8: - vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); + vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); break; case TX_4X4: // this is like vp9_short_idct4x4 but has a special case around eob<=1 // which is significant (not just an optimization) for the lossless // case. - xd->itxm_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); + xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); break; default: assert(!"Invalid transform size"); @@ -478,6 +477,7 @@ static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize, struct encode_b_args *const args = arg; MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; + struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); int i, j; @@ -487,10 +487,10 @@ static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize, vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); - if (pd->eobs[block] == 0) + if (p->eobs[block] == 0) return; - xd->itxm_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); + xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); } void vp9_encode_sby(MACROBLOCK *x, BLOCK_SIZE bsize) { @@ -540,7 +540,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, const int diff_stride = 4 * (1 << bwl); uint8_t *src, *dst; int16_t *src_diff; - uint16_t *eob = &pd->eobs[block]; + uint16_t *eob = &p->eobs[block]; int i, j; txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); dst = &pd->dst.buf[4 * (j * pd->dst.stride + i)]; @@ -559,8 +559,9 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; block >>= 6; vp9_predict_intra_block(xd, block, bwl, TX_32X32, mode, - dst, pd->dst.stride, dst, pd->dst.stride); - + x->skip_encode ? src : dst, + x->skip_encode ? p->src.stride : pd->dst.stride, + dst, pd->dst.stride); if (!x->skip_recode) { vp9_subtract_block(32, 32, src_diff, diff_stride, src, p->src.stride, dst, pd->dst.stride); @@ -582,7 +583,9 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; block >>= 4; vp9_predict_intra_block(xd, block, bwl, TX_16X16, mode, - dst, pd->dst.stride, dst, pd->dst.stride); + x->skip_encode ? src : dst, + x->skip_encode ? p->src.stride : pd->dst.stride, + dst, pd->dst.stride); if (!x->skip_recode) { vp9_subtract_block(16, 16, src_diff, diff_stride, src, p->src.stride, dst, pd->dst.stride); @@ -600,7 +603,9 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; block >>= 2; vp9_predict_intra_block(xd, block, bwl, TX_8X8, mode, - dst, pd->dst.stride, dst, pd->dst.stride); + x->skip_encode ? src : dst, + x->skip_encode ? p->src.stride : pd->dst.stride, + dst, pd->dst.stride); if (!x->skip_recode) { vp9_subtract_block(8, 8, src_diff, diff_stride, src, p->src.stride, dst, pd->dst.stride); @@ -621,7 +626,9 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, - dst, pd->dst.stride, dst, pd->dst.stride); + x->skip_encode ? src : dst, + x->skip_encode ? p->src.stride : pd->dst.stride, + dst, pd->dst.stride); if (!x->skip_recode) { vp9_subtract_block(4, 4, src_diff, diff_stride, diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 5ec0b765a..60830d00c 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -545,7 +545,7 @@ void vp9_first_pass(VP9_COMP *cpi) { p[i].coeff = ctx->coeff_pbuf[i][1]; p[i].qcoeff = ctx->qcoeff_pbuf[i][1]; pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; - pd[i].eobs = ctx->eobs_pbuf[i][1]; + p[i].eobs = ctx->eobs_pbuf[i][1]; } x->skip_recode = 0; diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index e724ecaa6..4632b0758 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -160,6 +160,7 @@ void vp9_initialize_enc() { if (!init_done) { vp9_initialize_common(); + vp9_coef_tree_initialize(); vp9_tokenize_initialize(); vp9_init_quant_tables(); vp9_init_me_luts(); @@ -167,7 +168,6 @@ void vp9_initialize_enc() { // init_base_skip_probs(); vp9_entropy_mv_init(); vp9_entropy_mode_init(); - vp9_coef_tree_initialize(); init_done = 1; } } @@ -2588,9 +2588,9 @@ static void full_to_model_count(unsigned int *model_count, model_count[ZERO_TOKEN] = full_count[ZERO_TOKEN]; model_count[ONE_TOKEN] = full_count[ONE_TOKEN]; model_count[TWO_TOKEN] = full_count[TWO_TOKEN]; - for (n = THREE_TOKEN; n < DCT_EOB_TOKEN; ++n) + for (n = THREE_TOKEN; n < EOB_TOKEN; ++n) model_count[TWO_TOKEN] += full_count[n]; - model_count[DCT_EOB_MODEL_TOKEN] = full_count[DCT_EOB_TOKEN]; + model_count[EOB_MODEL_TOKEN] = full_count[EOB_TOKEN]; } static void full_to_model_counts( @@ -3270,13 +3270,12 @@ static void Pass2Encode(VP9_COMP *cpi, unsigned long *size, } static void check_initial_width(VP9_COMP *cpi, YV12_BUFFER_CONFIG *sd) { - VP9_COMMON *cm = &cpi->common; + VP9_COMMON *const cm = &cpi->common; if (!cpi->initial_width) { - // TODO(jkoleszar): Support 1/4 subsampling? - cm->subsampling_x = (sd != NULL) && sd->uv_width < sd->y_width; - cm->subsampling_y = (sd != NULL) && sd->uv_height < sd->y_height; + // TODO(agrange) Subsampling defaults to assuming sampled chroma. + cm->subsampling_x = sd != NULL ? (sd->uv_width < sd->y_width) : 1; + cm->subsampling_y = sd != NULL ? (sd->uv_height < sd->y_height) : 1; alloc_raw_frame_buffers(cpi); - cpi->initial_width = cm->width; cpi->initial_height = cm->height; } diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 698130aed..8c4172421 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -137,45 +137,18 @@ void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs, *eob_ptr = eob + 1; } -struct plane_block_idx { - int plane; - int block; -}; - -// TODO(jkoleszar): returning a struct so it can be used in a const context, -// expect to refactor this further later. -static INLINE struct plane_block_idx plane_block_idx(int y_blocks, - int b_idx) { - const int v_offset = y_blocks * 5 / 4; - struct plane_block_idx res; - - if (b_idx < y_blocks) { - res.plane = 0; - res.block = b_idx; - } else if (b_idx < v_offset) { - res.plane = 1; - res.block = b_idx - y_blocks; - } else { - assert(b_idx < y_blocks * 3 / 2); - res.plane = 2; - res.block = b_idx - v_offset; - } - return res; -} - -void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int y_blocks, int b_idx, +void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block, const int16_t *scan, const int16_t *iscan) { MACROBLOCKD *const xd = &x->e_mbd; - const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx); - struct macroblock_plane* p = &x->plane[pb_idx.plane]; - struct macroblockd_plane* pd = &xd->plane[pb_idx.plane]; + struct macroblock_plane* p = &x->plane[plane]; + struct macroblockd_plane* pd = &xd->plane[plane]; - vp9_quantize_b(BLOCK_OFFSET(p->coeff, pb_idx.block), + vp9_quantize_b(BLOCK_OFFSET(p->coeff, block), 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, - BLOCK_OFFSET(p->qcoeff, pb_idx.block), - BLOCK_OFFSET(pd->dqcoeff, pb_idx.block), - pd->dequant, p->zbin_extra, &pd->eobs[pb_idx.block], scan, iscan); + BLOCK_OFFSET(p->qcoeff, block), + BLOCK_OFFSET(pd->dqcoeff, block), + pd->dequant, p->zbin_extra, &p->eobs[block], scan, iscan); } static void invert_quant(int16_t *quant, int16_t *shift, int d) { diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h index c078e1d41..41cfa5283 100644 --- a/vp9/encoder/vp9_quantize.h +++ b/vp9/encoder/vp9_quantize.h @@ -13,7 +13,7 @@ #include "vp9/encoder/vp9_block.h" -void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int y_blocks, int b_idx, +void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block, const int16_t *scan, const int16_t *iscan); struct VP9_COMP; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 84b71224e..71a3650fe 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -165,8 +165,8 @@ static void fill_token_costs(vp9_coeff_cost *c, vp9_coef_tree); vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs, vp9_coef_tree); - assert(c[t][i][j][k][0][l][DCT_EOB_TOKEN] == - c[t][i][j][k][1][l][DCT_EOB_TOKEN]); + assert(c[t][i][j][k][0][l][EOB_TOKEN] == + c[t][i][j][k][1][l][EOB_TOKEN]); } } @@ -283,7 +283,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) { cpi->mb.select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL && cm->frame_type != KEY_FRAME) ? - 0 : 1; + 0 : 1; set_block_thresholds(cpi); @@ -525,10 +525,10 @@ static INLINE int cost_coeffs(MACROBLOCK *x, struct macroblockd_plane *pd = &xd->plane[plane]; const PLANE_TYPE type = pd->plane_type; const int16_t *band_count = &band_counts[tx_size][1]; - const int eob = pd->eobs[block]; + const int eob = p->eobs[block]; const int16_t *const qcoeff_ptr = BLOCK_OFFSET(p->qcoeff, block); const int ref = mbmi->ref_frame[0] != INTRA_FRAME; - unsigned int (*token_costs)[2][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] = + unsigned int (*token_costs)[2][PREV_COEF_CONTEXTS][ENTROPY_TOKENS] = x->token_costs[tx_size][type][ref]; const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L; uint8_t *p_tok = x->token_cache; @@ -541,7 +541,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x, if (eob == 0) { // single eob token - cost = token_costs[0][0][pt][DCT_EOB_TOKEN]; + cost = token_costs[0][0][pt][EOB_TOKEN]; c = 0; } else { int band_left = *band_count++; @@ -573,7 +573,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x, // eob token if (band_left) { pt = get_coef_context(nb, p_tok, c); - cost += (*token_costs)[0][pt][DCT_EOB_TOKEN]; + cost += (*token_costs)[0][pt][EOB_TOKEN]; } } @@ -643,7 +643,7 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, // TODO(jingning): temporarily enabled only for luma component rd = MIN(rd1, rd2); if (plane == 0) - x->zcoeff_blk[tx_size][block] = !xd->plane[plane].eobs[block] || + x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] || (rd1 > rd2 && !xd->lossless); args->this_rate += args->rate; @@ -739,7 +739,7 @@ static void txfm_rd_in_plane(MACROBLOCK *x, *distortion = rd_stack->this_dist; *rate = rd_stack->this_rate; *sse = rd_stack->this_sse; - *skippable = vp9_is_skippable_in_plane(xd, bsize, plane); + *skippable = vp9_is_skippable_in_plane(x, bsize, plane); } } @@ -1055,7 +1055,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, else x->fwd_txm4x4(src_diff, coeff, 8); - vp9_regular_quantize_b_4x4(x, 4, block, so->scan, so->iscan); + vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, so->scan, so->neighbors); @@ -1299,11 +1299,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, int this_rate_tokenonly, this_rate, s; int64_t this_distortion, this_sse; - // int mode_mask = (bsize <= BLOCK_8X8) - // ? ALL_INTRA_MODES : cpi->sf.intra_uv_mode_mask; - - for (mode = DC_PRED; mode <= TM_PRED; mode ++) { - // if (!(mode_mask & (1 << mode))) + for (mode = DC_PRED; mode <= TM_PRED; ++mode) { if (!(cpi->sf.intra_uv_mode_mask[max_uv_txsize_lookup[bsize]] & (1 << mode))) continue; @@ -1331,9 +1327,9 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, struct macroblockd_plane *const pd = x->e_mbd.plane; for (i = 1; i < MAX_MB_PLANE; ++i) { p[i].coeff = ctx->coeff_pbuf[i][2]; - p[i].qcoeff = ctx->qcoeff_pbuf[i][2]; + p[i].qcoeff = ctx->qcoeff_pbuf[i][2]; pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2]; - pd[i].eobs = ctx->eobs_pbuf[i][2]; + p[i].eobs = ctx->eobs_pbuf[i][2]; ctx->coeff_pbuf[i][2] = ctx->coeff_pbuf[i][0]; ctx->qcoeff_pbuf[i][2] = ctx->qcoeff_pbuf[i][0]; @@ -1343,14 +1339,13 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, ctx->coeff_pbuf[i][0] = p[i].coeff; ctx->qcoeff_pbuf[i][0] = p[i].qcoeff; ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff; - ctx->eobs_pbuf[i][0] = pd[i].eobs; + ctx->eobs_pbuf[i][0] = p[i].eobs; } } } } x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode_selected; - return best_rd; } @@ -1546,7 +1541,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, coeff = BLOCK_OFFSET(p->coeff, k); x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff), coeff, 8); - vp9_regular_quantize_b_4x4(x, 4, k, so->scan, so->iscan); + vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan); thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz); thissse += ssz; @@ -1635,6 +1630,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, MB_PREDICTION_MODE this_mode; MODE_INFO *mi = x->e_mbd.mi_8x8[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; + struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &x->e_mbd.plane[0]; const int label_count = 4; int64_t this_segment_rd = 0; @@ -1963,11 +1959,11 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv, bsi->rdstat[i][mode_idx].brate, 0); bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate; - bsi->rdstat[i][mode_idx].eobs = pd->eobs[i]; + bsi->rdstat[i][mode_idx].eobs = p->eobs[i]; if (num_4x4_blocks_wide > 1) - bsi->rdstat[i + 1][mode_idx].eobs = pd->eobs[i + 1]; + bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1]; if (num_4x4_blocks_high > 1) - bsi->rdstat[i + 2][mode_idx].eobs = pd->eobs[i + 2]; + bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2]; } if (bsi->rdstat[i][mode_idx].brdcost < best_rd) { @@ -2065,7 +2061,7 @@ static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int; if (has_second_ref(mbmi)) mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int; - xd->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs; + x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs; mi->bmi[i].as_mode = bsi->modes[i]; } @@ -2075,7 +2071,7 @@ static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, *returntotrate = bsi->r; *returndistortion = bsi->d; *returnyrate = bsi->segment_yrate; - *skippable = vp9_is_skippable_in_plane(&x->e_mbd, BLOCK_8X8, 0); + *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0); *psse = bsi->sse; mbmi->mode = bsi->modes[3]; @@ -3010,7 +3006,7 @@ static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, p[i].coeff = ctx->coeff_pbuf[i][1]; p[i].qcoeff = ctx->qcoeff_pbuf[i][1]; pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; - pd[i].eobs = ctx->eobs_pbuf[i][1]; + p[i].eobs = ctx->eobs_pbuf[i][1]; ctx->coeff_pbuf[i][1] = ctx->coeff_pbuf[i][0]; ctx->qcoeff_pbuf[i][1] = ctx->qcoeff_pbuf[i][0]; @@ -3020,7 +3016,7 @@ static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, ctx->coeff_pbuf[i][0] = p[i].coeff; ctx->qcoeff_pbuf[i][0] = p[i].qcoeff; ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff; - ctx->eobs_pbuf[i][0] = pd[i].eobs; + ctx->eobs_pbuf[i][0] = p[i].eobs; } } @@ -3675,7 +3671,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (cpi->sf.use_uv_intra_rd_estimate) { // Do Intra UV best rd mode selection if best mode choice above was intra. if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) { - TX_SIZE uv_tx_size = get_uv_tx_size(mbmi); + TX_SIZE uv_tx_size; + *mbmi = best_mbmode; + uv_tx_size = get_uv_tx_size(mbmi); rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size], &rate_uv_tokenonly[uv_tx_size], &dist_uv[uv_tx_size], @@ -4136,7 +4134,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, tmp_best_mbmode = *mbmi; for (i = 0; i < 4; i++) { tmp_best_bmodes[i] = xd->mi_8x8[0]->bmi[i]; - x->zcoeff_blk[TX_4X4][i] = !xd->plane[0].eobs[i]; + x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i]; } pred_exists = 1; if (switchable_filter_index == 0 && @@ -4423,7 +4421,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (cpi->sf.use_uv_intra_rd_estimate) { // Do Intra UV best rd mode selection if best mode choice above was intra. if (vp9_ref_order[best_mode_index].ref_frame == INTRA_FRAME) { - TX_SIZE uv_tx_size = get_uv_tx_size(mbmi); + TX_SIZE uv_tx_size; + *mbmi = best_mbmode; + uv_tx_size = get_uv_tx_size(mbmi); rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size], &rate_uv_tokenonly[uv_tx_size], &dist_uv[uv_tx_size], diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 389ec152a..407041ced 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -27,30 +27,30 @@ static int dct_value_cost[DCT_MAX_VALUE * 2]; const int *vp9_dct_value_cost_ptr; // Array indices are identical to previously-existing CONTEXT_NODE indices -const vp9_tree_index vp9_coef_tree[TREE_SIZE(MAX_ENTROPY_TOKENS)] = { - -DCT_EOB_TOKEN, 2, /* 0 = EOB */ - -ZERO_TOKEN, 4, /* 1 = ZERO */ - -ONE_TOKEN, 6, /* 2 = ONE */ - 8, 12, /* 3 = LOW_VAL */ - -TWO_TOKEN, 10, /* 4 = TWO */ - -THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */ - 14, 16, /* 6 = HIGH_LOW */ - -DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */ - 18, 20, /* 8 = CAT_THREEFOUR */ - -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */ - -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */ +const vp9_tree_index vp9_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = { + -EOB_TOKEN, 2, // 0 = EOB + -ZERO_TOKEN, 4, // 1 = ZERO + -ONE_TOKEN, 6, // 2 = ONE + 8, 12, // 3 = LOW_VAL + -TWO_TOKEN, 10, // 4 = TWO + -THREE_TOKEN, -FOUR_TOKEN, // 5 = THREE + 14, 16, // 6 = HIGH_LOW + -CATEGORY1_TOKEN, -CATEGORY2_TOKEN, // 7 = CAT_ONE + 18, 20, // 8 = CAT_THREEFOUR + -CATEGORY3_TOKEN, -CATEGORY4_TOKEN, // 9 = CAT_THREE + -CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 10 = CAT_FIVE }; // Unconstrained Node Tree -const vp9_tree_index vp9_coef_con_tree[TREE_SIZE(MAX_ENTROPY_TOKENS)] = { - 2, 6, /* 0 = LOW_VAL */ - -TWO_TOKEN, 4, /* 1 = TWO */ - -THREE_TOKEN, -FOUR_TOKEN, /* 2 = THREE */ - 8, 10, /* 3 = HIGH_LOW */ - -DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 4 = CAT_ONE */ - 12, 14, /* 5 = CAT_THREEFOUR */ - -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 6 = CAT_THREE */ - -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 7 = CAT_FIVE */ +const vp9_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)] = { + 2, 6, // 0 = LOW_VAL + -TWO_TOKEN, 4, // 1 = TWO + -THREE_TOKEN, -FOUR_TOKEN, // 2 = THREE + 8, 10, // 3 = HIGH_LOW + -CATEGORY1_TOKEN, -CATEGORY2_TOKEN, // 4 = CAT_ONE + 12, 14, // 5 = CAT_THREEFOUR + -CATEGORY3_TOKEN, -CATEGORY4_TOKEN, // 6 = CAT_THREE + -CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 7 = CAT_FIVE }; static const vp9_prob Pcat1[] = { 159}; @@ -84,22 +84,22 @@ static void init_bit_trees() { init_bit_tree(cat6, 14); } -const vp9_extra_bit vp9_extra_bits[MAX_ENTROPY_TOKENS] = { +const vp9_extra_bit vp9_extra_bits[ENTROPY_TOKENS] = { {0, 0, 0, 0}, // ZERO_TOKEN {0, 0, 0, 1}, // ONE_TOKEN {0, 0, 0, 2}, // TWO_TOKEN {0, 0, 0, 3}, // THREE_TOKEN {0, 0, 0, 4}, // FOUR_TOKEN - {cat1, Pcat1, 1, 5}, // DCT_VAL_CATEGORY1 - {cat2, Pcat2, 2, 7}, // DCT_VAL_CATEGORY2 - {cat3, Pcat3, 3, 11}, // DCT_VAL_CATEGORY3 - {cat4, Pcat4, 4, 19}, // DCT_VAL_CATEGORY4 - {cat5, Pcat5, 5, 35}, // DCT_VAL_CATEGORY5 - {cat6, Pcat6, 14, 67}, // DCT_VAL_CATEGORY6 - {0, 0, 0, 0} // DCT_EOB_TOKEN + {cat1, Pcat1, 1, 5}, // CATEGORY1_TOKEN + {cat2, Pcat2, 2, 7}, // CATEGORY2_TOKEN + {cat3, Pcat3, 3, 11}, // CATEGORY3_TOKEN + {cat4, Pcat4, 4, 19}, // CATEGORY4_TOKEN + {cat5, Pcat5, 5, 35}, // CATEGORY5_TOKEN + {cat6, Pcat6, 14, 67}, // CATEGORY6_TOKEN + {0, 0, 0, 0} // EOB_TOKEN }; -struct vp9_token vp9_coef_encodings[MAX_ENTROPY_TOKENS]; +struct vp9_token vp9_coef_encodings[ENTROPY_TOKENS]; void vp9_coef_tree_initialize() { init_bit_trees(); @@ -168,10 +168,11 @@ static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct tokenize_b_args* const args = arg; MACROBLOCKD *const xd = args->xd; + struct macroblock_plane *p = &args->cpi->mb.plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; int aoff, loff; txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &aoff, &loff); - set_contexts(xd, pd, plane_bsize, tx_size, pd->eobs[block] > 0, aoff, loff); + set_contexts(xd, pd, plane_bsize, tx_size, p->eobs[block] > 0, aoff, loff); } static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, @@ -181,16 +182,15 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, MACROBLOCKD *xd = args->xd; TOKENEXTRA **tp = args->tp; uint8_t *token_cache = args->token_cache; + struct macroblock_plane *p = &cpi->mb.plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; int pt; /* near block/prev token context index */ int c = 0, rc = 0; TOKENEXTRA *t = *tp; /* store tokens starting here */ - const int eob = pd->eobs[block]; + const int eob = p->eobs[block]; const PLANE_TYPE type = pd->plane_type; - struct macroblock_plane *p = &cpi->mb.plane[plane]; const int16_t *qcoeff_ptr = BLOCK_OFFSET(p->qcoeff, block); - const int segment_id = mbmi->segment_id; const int16_t *scan, *nb; const scan_order *so; @@ -226,7 +226,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, t->extra = vp9_dct_value_tokens_ptr[v].extra; token = vp9_dct_value_tokens_ptr[v].token; } else { - token = DCT_EOB_TOKEN; + token = EOB_TOKEN; } t->token = token; @@ -249,7 +249,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, } struct is_skippable_args { - MACROBLOCKD *xd; + MACROBLOCK *x; int *skippable; }; @@ -257,21 +257,21 @@ static void is_skippable(int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *argv) { struct is_skippable_args *args = argv; - args->skippable[0] &= (!args->xd->plane[plane].eobs[block]); + args->skippable[0] &= (!args->x->plane[plane].eobs[block]); } -int vp9_sb_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE bsize) { +static int sb_is_skippable(MACROBLOCK *x, BLOCK_SIZE bsize) { int result = 1; - struct is_skippable_args args = {xd, &result}; - foreach_transformed_block(xd, bsize, is_skippable, &args); + struct is_skippable_args args = {x, &result}; + foreach_transformed_block(&x->e_mbd, bsize, is_skippable, &args); return result; } -int vp9_is_skippable_in_plane(MACROBLOCKD *xd, BLOCK_SIZE bsize, - int plane) { +int vp9_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { int result = 1; - struct is_skippable_args args = {xd, &result}; - foreach_transformed_block_in_plane(xd, bsize, plane, is_skippable, &args); + struct is_skippable_args args = {x, &result}; + foreach_transformed_block_in_plane(&x->e_mbd, bsize, plane, is_skippable, + &args); return result; } @@ -286,7 +286,7 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, SEG_LVL_SKIP); struct tokenize_b_args arg = {cpi, xd, t, mbmi->tx_size, cpi->mb.token_cache}; - mbmi->skip_coeff = vp9_sb_is_skippable(xd, bsize); + mbmi->skip_coeff = sb_is_skippable(&cpi->mb, bsize); if (mbmi->skip_coeff) { if (!dry_run) cm->counts.mbskip[mb_skip_context][1] += skip_inc; diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index 2e3bf5203..67e6c9d3d 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -12,10 +12,14 @@ #define VP9_ENCODER_VP9_TOKENIZE_H_ #include "vp9/common/vp9_entropy.h" + #include "vp9/encoder/vp9_block.h" +#include "vp9/encoder/vp9_treewriter.h" void vp9_tokenize_initialize(); +#define EOSB_TOKEN 127 // Not signalled, encoder only + typedef struct { int16_t token; int16_t extra; @@ -32,9 +36,8 @@ extern const vp9_tree_index vp9_coef_tree[]; extern const vp9_tree_index vp9_coef_con_tree[]; extern struct vp9_token vp9_coef_encodings[]; -int vp9_sb_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE bsize); -int vp9_is_skippable_in_plane(MACROBLOCKD *xd, BLOCK_SIZE bsize, - int plane); +int vp9_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane); + struct VP9_COMP; void vp9_tokenize_sb(struct VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, diff --git a/vp9/encoder/vp9_treewriter.c b/vp9/encoder/vp9_treewriter.c index e4aed5374..5b0c17fe7 100644 --- a/vp9/encoder/vp9_treewriter.c +++ b/vp9/encoder/vp9_treewriter.c @@ -36,3 +36,24 @@ void vp9_cost_tokens_skip(int *costs, const vp9_prob *probs, vp9_tree tree) { costs[-tree[0]] = vp9_cost_bit(probs[0], 0); cost(costs, tree, probs, 2, 0); } + +static void tree2tok(struct vp9_token *tokens, const vp9_tree_index *tree, + int i, int v, int l) { + v += v; + ++l; + + do { + const vp9_tree_index j = tree[i++]; + if (j <= 0) { + tokens[-j].value = v; + tokens[-j].len = l; + } else { + tree2tok(tokens, tree, j, v, l); + } + } while (++v & 1); +} + +void vp9_tokens_from_tree(struct vp9_token *tokens, + const vp9_tree_index *tree) { + tree2tok(tokens, tree, 0, 0, 0); +} diff --git a/vp9/encoder/vp9_treewriter.h b/vp9/encoder/vp9_treewriter.h index 3245960ac..94f3eb987 100644 --- a/vp9/encoder/vp9_treewriter.h +++ b/vp9/encoder/vp9_treewriter.h @@ -44,6 +44,14 @@ static INLINE void treed_write(vp9_writer *w, } while (len); } +struct vp9_token { + int value; + int len; +}; + + +void vp9_tokens_from_tree(struct vp9_token*, const vp9_tree_index *); + static INLINE void write_token(vp9_writer *w, vp9_tree tree, const vp9_prob *probs, const struct vp9_token *token) { diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index eefbd1ac9..f6aebff0c 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -17,7 +17,6 @@ VP9_COMMON_SRCS-yes += common/vp9_alloccommon.c VP9_COMMON_SRCS-yes += common/vp9_convolve.c VP9_COMMON_SRCS-yes += common/vp9_convolve.h VP9_COMMON_SRCS-yes += common/vp9_debugmodes.c -VP9_COMMON_SRCS-yes += common/vp9_default_coef_probs.h VP9_COMMON_SRCS-yes += common/vp9_entropy.c VP9_COMMON_SRCS-yes += common/vp9_entropymode.c VP9_COMMON_SRCS-yes += common/vp9_entropymv.c diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 9a23ebd53..5d53a4149 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -198,6 +198,10 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK(vp8_cfg, arnr_type, 1, 3); RANGE_CHECK(vp8_cfg, cq_level, 0, 63); + // TODO(yaowu): remove this when ssim tuning is implemented for vp9 + if (vp8_cfg->tuning == VP8_TUNE_SSIM) + ERROR("Option --tune=ssim is not currently supported in VP9."); + if (cfg->g_pass == VPX_RC_LAST_PASS) { size_t packet_sz = sizeof(FIRSTPASS_STATS); int n_packets = (int)(cfg->rc_twopass_stats_in.sz / packet_sz); |