diff options
Diffstat (limited to 'vp8/common')
27 files changed, 720 insertions, 639 deletions
diff --git a/vp8/common/arm/arm_systemdependent.c b/vp8/common/arm/arm_systemdependent.c index 1e2467411..89a2be825 100644 --- a/vp8/common/arm/arm_systemdependent.c +++ b/vp8/common/arm/arm_systemdependent.c @@ -11,7 +11,6 @@ #include "vpx_config.h" #include "vpx_ports/arm.h" -#include "vp8/common/g_common.h" #include "vp8/common/pragmas.h" #include "vp8/common/subpixel.h" #include "vp8/common/loopfilter.h" diff --git a/vp8/common/arm/dequantize_arm.c b/vp8/common/arm/dequantize_arm.c index 20a8ac4fc..7cf4bf943 100644 --- a/vp8/common/arm/dequantize_arm.c +++ b/vp8/common/arm/dequantize_arm.c @@ -23,22 +23,20 @@ extern void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ); #if HAVE_ARMV7 -void vp8_dequantize_b_neon(BLOCKD *d) +void vp8_dequantize_b_neon(BLOCKD *d, short *DQC) { short *DQ = d->dqcoeff; short *Q = d->qcoeff; - short *DQC = d->dequant; vp8_dequantize_b_loop_neon(Q, DQC, DQ); } #endif #if HAVE_ARMV6 -void vp8_dequantize_b_v6(BLOCKD *d) +void vp8_dequantize_b_v6(BLOCKD *d, short *DQC) { short *DQ = d->dqcoeff; short *Q = d->qcoeff; - short *DQC = d->dequant; vp8_dequantize_b_loop_v6(Q, DQC, DQ); } diff --git a/vp8/common/bigend.h b/vp8/common/bigend.h deleted file mode 100644 index 6ac3f8b5a..000000000 --- a/vp8/common/bigend.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef _bigend_h -#define _bigend_h - -#if defined(__cplusplus) -extern "C" { -#endif - -#define invert2(x) ( (((x)>>8)&0x00ff) | (((x)<<8)&0xff00) ) -#define invert4(x) ( ((invert2(x)&0x0000ffff)<<16) | (invert2((x>>16))&0x0000ffff) ) - -#define high_byte(x) (unsigned char)x -#define mid2Byte(x) (unsigned char)(x >> 8) -#define mid1Byte(x) (unsigned char)(x >> 16) -#define low_byte(x) (unsigned char)(x >> 24) - -#define SWAPENDS 1 - -#if defined(__cplusplus) -} -#endif -#endif diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h index 91e90e2a6..b237206e6 100644 --- a/vp8/common/blockd.h +++ b/vp8/common/blockd.h @@ -21,9 +21,6 @@ void vpx_log(const char *format, ...); #include "subpixel.h" #include "vpx_ports/mem.h" -#define TRUE 1 -#define FALSE 0 - /*#define DCPRED 1*/ #define DCPREDSIMTHRESH 0 #define DCPREDCNTTHRESH 3 @@ -187,7 +184,6 @@ typedef struct short *qcoeff; short *dqcoeff; unsigned char *predictor; - short *diff; short *dequant; /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */ @@ -206,12 +202,16 @@ typedef struct typedef struct MacroBlockD { - DECLARE_ALIGNED(16, short, diff[400]); /* from idct diff */ DECLARE_ALIGNED(16, unsigned char, predictor[384]); DECLARE_ALIGNED(16, short, qcoeff[400]); DECLARE_ALIGNED(16, short, dqcoeff[400]); DECLARE_ALIGNED(16, char, eobs[25]); + DECLARE_ALIGNED(16, short, dequant_y1[16]); + DECLARE_ALIGNED(16, short, dequant_y1_dc[16]); + DECLARE_ALIGNED(16, short, dequant_y2[16]); + DECLARE_ALIGNED(16, short, dequant_uv[16]); + /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */ BLOCKD block[25]; int fullpixel_mask; diff --git a/vp8/common/common.h b/vp8/common/common.h index 9a93da991..2cc1c544c 100644 --- a/vp8/common/common.h +++ b/vp8/common/common.h @@ -18,8 +18,6 @@ #include "vpx_mem/vpx_mem.h" -#include "common_types.h" - /* Only need this for fixed-size arrays, for structs just assign. */ #define vp8_copy( Dest, Src) { \ diff --git a/vp8/common/common_types.h b/vp8/common/common_types.h deleted file mode 100644 index 4e6248697..000000000 --- a/vp8/common/common_types.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef __INC_COMMON_TYPES -#define __INC_COMMON_TYPES - -#define TRUE 1 -#define FALSE 0 - -#endif diff --git a/vp8/common/dequantize.c b/vp8/common/dequantize.c index 4a48a3192..96245162f 100644 --- a/vp8/common/dequantize.c +++ b/vp8/common/dequantize.c @@ -14,12 +14,11 @@ #include "vp8/common/idct.h" #include "vpx_mem/vpx_mem.h" -void vp8_dequantize_b_c(BLOCKD *d) +void vp8_dequantize_b_c(BLOCKD *d, short *DQC) { int i; short *DQ = d->dqcoeff; short *Q = d->qcoeff; - short *DQC = d->dequant; for (i = 0; i < 16; i++) { diff --git a/vp8/common/dequantize.h b/vp8/common/dequantize.h index f66cf2bac..429359190 100644 --- a/vp8/common/dequantize.h +++ b/vp8/common/dequantize.h @@ -14,7 +14,7 @@ #include "vp8/common/blockd.h" #define prototype_dequant_block(sym) \ - void sym(BLOCKD *x) + void sym(BLOCKD *x, short *DQC) #define prototype_dequant_idct_add(sym) \ void sym(short *input, short *dq, \ diff --git a/vp8/common/dma_desc.h b/vp8/common/dma_desc.h deleted file mode 100644 index b923da6e0..000000000 --- a/vp8/common/dma_desc.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef _dma_desc_h -#define _dma_desc_h - -#if defined(__cplusplus) -extern "C" { -#endif - - -#define NDSIZE_LG 0x00000900 // Next Descriptor Size -#define NDSIZE_SM 0x00000800 // Next Descriptor Size -#define NDSIZE_7 0x00000700 // Next Descriptor Size -#define NDSIZE_6 0x00000600 // Next Descriptor Size -#define NDSIZE_5 0x00000500 // Next Descriptor Size -#define NDSIZE_4 0x00000400 // Next Descriptor Size -#define NDSIZE_3 0x00000300 // Next Descriptor Size -#define NDSIZE_2 0x00000200 // Next Descriptor Size -#define NDSIZE_1 0x00000100 // Next Descriptor Size - -#define FLOW_STOP 0x0000 -#define FLOW_AUTO 0x1000 -#define FLOW_DESC_AR 0x4000 -#define FLOW_DESC_SM 0x6000 -#define FLOW_DESC_LG 0x7000 - - typedef struct - { - unsigned int ndp; - //unsigned short ndpl; - //unsigned short ndph; - unsigned int sa; - //unsigned short sal; - //unsigned short sah; - - unsigned short dmacfg; - unsigned short xcnt; - unsigned short xmod; - unsigned short ycnt; - unsigned short ymod; - - } LARGE_DESC; - - typedef struct - { - unsigned short ndpl; - unsigned short sal; - unsigned short sah; - unsigned short dmacfg; - unsigned short xcnt; - unsigned short xmod; - unsigned short ycnt; - unsigned short ymod; - } SMALL_DESC; - - typedef struct - { - unsigned short sal; - unsigned short sah; - unsigned short dmacfg; - unsigned short xcnt; - unsigned short xmod; - unsigned short ycnt; - unsigned short ymod; - } ARRAY_DESC_7; - - typedef struct - { - unsigned short sal; - unsigned short sah; - unsigned short dmacfg; - unsigned short xcnt; - unsigned short xmod; - unsigned short ycnt; - } ARRAY_DESC_6; - - typedef struct - { - unsigned short sal; - unsigned short sah; - unsigned short dmacfg; - unsigned short xcnt; - unsigned short xmod; - } ARRAY_DESC_5; - - typedef struct - { - unsigned short sal; - unsigned short sah; - unsigned short dmacfg; - unsigned short xcnt; - } ARRAY_DESC_4; - - typedef struct - { - unsigned short sal; - unsigned short sah; - unsigned short dmacfg; - } ARRAY_DESC_3; - - typedef struct - { - unsigned short sal; - unsigned short sah; - } ARRAY_DESC_2; - - typedef struct - { - unsigned short sal; - } ARRAY_DESC_1; - -#if defined(__cplusplus) -} -#endif - -#endif //_dma_desc_h diff --git a/vp8/common/duck_io.h b/vp8/common/duck_io.h deleted file mode 100644 index 43daa65bc..000000000 --- a/vp8/common/duck_io.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef _duck_io_h -#define _duck_io_h - -#if defined(__cplusplus) -extern "C" { -#endif - -#if defined (_WIN32) - typedef __int64 int64_t; -#elif defined(__MWERKS__) - typedef long long int64_t; -#elif defined(__APPLE__) || defined(__POWERPC) -#include <ppc/types.h> -#else - typedef long long int64_t; -#endif - - typedef struct - { - int64_t offset; // offset to start from - int blocking; // non-zero for blocking - } re_open_t; - - - typedef enum - { - SAL_ERR_MAX = -10, - SAL_ERROR = -11, // Default error - SAL_ERR_WSASTARTUP = -12, - SAL_ERR_SOCKET_CREATE = -13, - SAL_ERR_RESOLVING_HOSTNAME = -14, - SAL_ERR_SERVER_CONNECTION = -15, - SAL_ERR_SENDING_DATA = -16, - SAL_ERR_RECEIVING_DATA = -17, - SAL_ERR_404_FILE_NOT_FOUND = -18, - SAL_ERR_PARSING_HTTP_HEADER = -19, - SAL_ERR_PARSING_CONTENT_LEN = -20, - SAL_ERR_CONNECTION_TIMEOUT = -21, - SAL_ERR_FILE_OPEN_FAILED = -22, - SAL_ERR_MIN = -23 - } SAL_ERR; /* EMH 1-15-03 */ - - - typedef struct sal_err_map_temp - { - SAL_ERR code; - const char *decode; - - } sal_err_map_t; - - - static char *sal_err_text(SAL_ERR e) - { - int t; - const sal_err_map_t g_sal_err_map[] = - { - { SAL_ERR_WSASTARTUP, "Error with WSAStartup" }, - { SAL_ERR_SOCKET_CREATE, "Error creating socket" }, - { SAL_ERR_RESOLVING_HOSTNAME, "Error resolving hostname" }, - { SAL_ERR_SERVER_CONNECTION, "Error connecting to server" }, - { SAL_ERR_SENDING_DATA, "Error sending data" }, - { SAL_ERR_RECEIVING_DATA, "Error receiving data" }, - { SAL_ERR_404_FILE_NOT_FOUND, "Error file not found " }, - { SAL_ERR_PARSING_HTTP_HEADER, "Error parsing http header" }, - { SAL_ERR_PARSING_CONTENT_LEN, "Error parsing content length" }, - { SAL_ERR_CONNECTION_TIMEOUT, "Error Connection timed out" }, - { SAL_ERR_FILE_OPEN_FAILED, "Error opening file" } - }; - - for (t = 0; t < sizeof(g_sal_err_map) / sizeof(sal_err_map_t); t++) - { - if (e == g_sal_err_map[t].code) - return (char *) g_sal_err_map[t].decode; - } - - return 0; - } - - - - - - - - int duck_open(const char *fname, unsigned long user_data); - - void duck_close(int ghndl); - - int duck_read(int ghndl, unsigned char *buf, int nbytes); - - int64_t duck_seek(int g_hndl, int64_t offs, int origin); - - int duck_read_finished(int han, int flag); /* FWG 7-9-99 */ - - int duck_name(int handle, char name[], size_t max_len); /* EMH 9-23-03 */ - - int duck_read_blocking(int handle, unsigned char *buffer, int bytes); /* EMH 9-23-03 */ - - int64_t duck_available_data(int handle); /* EMH 10-23-03 */ - -#if defined(__cplusplus) -} -#endif - -#endif diff --git a/vp8/common/findnearmv.h b/vp8/common/findnearmv.h index 01909b937..a3443d765 100644 --- a/vp8/common/findnearmv.h +++ b/vp8/common/findnearmv.h @@ -60,10 +60,10 @@ static unsigned int vp8_check_mv_bounds(int_mv *mv, int mb_to_left_edge, int mb_to_bottom_edge) { unsigned int need_to_clamp; - need_to_clamp = (mv->as_mv.col < mb_to_left_edge) ? 1 : 0; - need_to_clamp |= (mv->as_mv.col > mb_to_right_edge) ? 1 : 0; - need_to_clamp |= (mv->as_mv.row < mb_to_top_edge) ? 1 : 0; - need_to_clamp |= (mv->as_mv.row > mb_to_bottom_edge) ? 1 : 0; + need_to_clamp = (mv->as_mv.col < mb_to_left_edge); + need_to_clamp |= (mv->as_mv.col > mb_to_right_edge); + need_to_clamp |= (mv->as_mv.row < mb_to_top_edge); + need_to_clamp |= (mv->as_mv.row > mb_to_bottom_edge); return need_to_clamp; } diff --git a/vp8/common/g_common.h b/vp8/common/g_common.h deleted file mode 100644 index 5f523980b..000000000 --- a/vp8/common/g_common.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -extern void (*vp8_clear_system_state)(void); -extern void (*vp8_plane_add_noise)(unsigned char *Start, unsigned int Width, unsigned int Height, int Pitch, int DPitch, int q); -extern void (*de_interlace) -( - unsigned char *src_ptr, - unsigned char *dst_ptr, - int Width, - int Height, - int Stride -); diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c index dbf8d6504..01d76206d 100644 --- a/vp8/common/generic/systemdependent.c +++ b/vp8/common/generic/systemdependent.c @@ -10,7 +10,6 @@ #include "vpx_config.h" -#include "vp8/common/g_common.h" #include "vp8/common/subpixel.h" #include "vp8/common/loopfilter.h" #include "vp8/common/recon.h" diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h index 7eec58e26..f49e2e577 100644 --- a/vp8/common/invtrans.h +++ b/vp8/common/invtrans.h @@ -17,6 +17,10 @@ #include "blockd.h" #include "onyxc_int.h" +#if CONFIG_MULTITHREAD +#include "vpx_mem/vpx_mem.h" +#endif + static void eob_adjust(char *eobs, short *diff) { /* eob adjust.... the idct can only skip if both the dc and eob are zero */ @@ -32,9 +36,7 @@ static void eob_adjust(char *eobs, short *diff) static void vp8_inverse_transform_mby(MACROBLOCKD *xd, const VP8_COMMON_RTCD *rtcd) { - short *DQC = xd->block[0].dequant; - /* save the dc dequant constant in case it is overridden */ - short dc_dequant_temp = DQC[0]; + short *DQC = xd->dequant_y1; if (xd->mode_info_context->mbmi.mode != SPLITMV) { @@ -51,15 +53,11 @@ static void vp8_inverse_transform_mby(MACROBLOCKD *xd, } eob_adjust(xd->eobs, xd->qcoeff); - /* override the dc dequant constant */ - DQC[0] = 1; + DQC = xd->dequant_y1_dc; } DEQUANT_INVOKE (&rtcd->dequant, idct_add_y_block) - (xd->qcoeff, xd->block[0].dequant, + (xd->qcoeff, DQC, xd->dst.y_buffer, xd->dst.y_stride, xd->eobs); - - /* restore the dc dequant constant */ - DQC[0] = dc_dequant_temp; } #endif diff --git a/vp8/common/littlend.h b/vp8/common/littlend.h deleted file mode 100644 index 99df1164c..000000000 --- a/vp8/common/littlend.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef _littlend_h -#define _littlend_h - -#if defined(__cplusplus) -extern "C" { -#endif - -#define invert2(x) (x) -#define invert4(x) (x) - -#define low_byte(x) (unsigned char)x -#define mid1Byte(x) (unsigned char)(x >> 8) -#define mid2Byte(x) (unsigned char)(x >> 16) -#define high_byte(x) (unsigned char)(x >> 24) - -#define SWAPENDS 0 - -#if defined(__cplusplus) -} -#endif - -#endif diff --git a/vp8/common/mbpitch.c b/vp8/common/mbpitch.c index 11fa3ffa7..f8971d754 100644 --- a/vp8/common/mbpitch.c +++ b/vp8/common/mbpitch.c @@ -87,7 +87,6 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x) { for (c = 0; c < 4; c++) { - x->block[r*4+c].diff = &x->diff[r * 4 * 16 + c * 4]; x->block[r*4+c].predictor = x->predictor + r * 4 * 16 + c * 4; } } @@ -96,7 +95,6 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x) { for (c = 0; c < 2; c++) { - x->block[16+r*2+c].diff = &x->diff[256 + r * 4 * 8 + c * 4]; x->block[16+r*2+c].predictor = x->predictor + 256 + r * 4 * 8 + c * 4; } @@ -106,14 +104,11 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x) { for (c = 0; c < 2; c++) { - x->block[20+r*2+c].diff = &x->diff[320+ r * 4 * 8 + c * 4]; x->block[20+r*2+c].predictor = x->predictor + 320 + r * 4 * 8 + c * 4; } } - x->block[24].diff = &x->diff[384]; - for (r = 0; r < 25; r++) { x->block[r].qcoeff = x->qcoeff + r * 16; diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h index 37fa5a0cd..d17a32b82 100644 --- a/vp8/common/onyx.h +++ b/vp8/common/onyx.h @@ -22,9 +22,9 @@ extern "C" #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" #include "vpx_scale/yv12config.h" -#include "type_aliases.h" #include "ppflags.h" - typedef int *VP8_PTR; + + struct VP8_COMP; /* Create/destroy static data structures. */ @@ -226,27 +226,27 @@ extern "C" void vp8_initialize(); - VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf); - void vp8_remove_compressor(VP8_PTR *comp); + struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf); + void vp8_remove_compressor(struct VP8_COMP* *comp); - void vp8_init_config(VP8_PTR onyx, VP8_CONFIG *oxcf); - void vp8_change_config(VP8_PTR onyx, VP8_CONFIG *oxcf); + void vp8_init_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf); + void vp8_change_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf); // receive a frames worth of data caller can assume that a copy of this frame is made // and not just a copy of the pointer.. - int vp8_receive_raw_frame(VP8_PTR comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time_stamp); - int vp8_get_compressed_data(VP8_PTR comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush); - int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags); - - int vp8_use_as_reference(VP8_PTR comp, int ref_frame_flags); - int vp8_update_reference(VP8_PTR comp, int ref_frame_flags); - int vp8_get_reference(VP8_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); - int vp8_set_reference(VP8_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); - int vp8_update_entropy(VP8_PTR comp, int update); - int vp8_set_roimap(VP8_PTR comp, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4]); - int vp8_set_active_map(VP8_PTR comp, unsigned char *map, unsigned int rows, unsigned int cols); - int vp8_set_internal_size(VP8_PTR comp, VPX_SCALING horiz_mode, VPX_SCALING vert_mode); - int vp8_get_quantizer(VP8_PTR c); + int vp8_receive_raw_frame(struct VP8_COMP* comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time_stamp); + int vp8_get_compressed_data(struct VP8_COMP* comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush); + int vp8_get_preview_raw_frame(struct VP8_COMP* comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags); + + int vp8_use_as_reference(struct VP8_COMP* comp, int ref_frame_flags); + int vp8_update_reference(struct VP8_COMP* comp, int ref_frame_flags); + int vp8_get_reference(struct VP8_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); + int vp8_set_reference(struct VP8_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); + int vp8_update_entropy(struct VP8_COMP* comp, int update); + int vp8_set_roimap(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4]); + int vp8_set_active_map(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols); + int vp8_set_internal_size(struct VP8_COMP* comp, VPX_SCALING horiz_mode, VPX_SCALING vert_mode); + int vp8_get_quantizer(struct VP8_COMP* c); #ifdef __cplusplus } diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h index f733ff774..77a0ca2cc 100644 --- a/vp8/common/onyxc_int.h +++ b/vp8/common/onyxc_int.h @@ -93,9 +93,9 @@ typedef struct VP8Common { struct vpx_internal_error_info error; - DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][2]); + DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][2]); + DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][2]); int Width; int Height; diff --git a/vp8/common/onyxd.h b/vp8/common/onyxd.h index 43fa00bd3..35a8b6e55 100644 --- a/vp8/common/onyxd.h +++ b/vp8/common/onyxd.h @@ -18,13 +18,13 @@ extern "C" { #endif -#include "type_aliases.h" #include "vpx_scale/yv12config.h" #include "ppflags.h" #include "vpx_ports/mem.h" #include "vpx/vpx_codec.h" - typedef void *VP8D_PTR; + struct VP8D_COMP; + typedef struct { int Width; @@ -49,19 +49,19 @@ extern "C" void vp8dx_initialize(void); - void vp8dx_set_setting(VP8D_PTR comp, VP8D_SETTING oxst, int x); + void vp8dx_set_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst, int x); - int vp8dx_get_setting(VP8D_PTR comp, VP8D_SETTING oxst); + int vp8dx_get_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst); - int vp8dx_receive_compressed_data(VP8D_PTR comp, unsigned long size, const unsigned char *dest, int64_t time_stamp); - int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags); + int vp8dx_receive_compressed_data(struct VP8D_COMP* comp, unsigned long size, const unsigned char *dest, int64_t time_stamp); + int vp8dx_get_raw_frame(struct VP8D_COMP* comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags); - vpx_codec_err_t vp8dx_get_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); - vpx_codec_err_t vp8dx_set_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); + vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); + vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); - VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf); + struct VP8D_COMP* vp8dx_create_decompressor(VP8D_CONFIG *oxcf); - void vp8dx_remove_decompressor(VP8D_PTR comp); + void vp8dx_remove_decompressor(struct VP8D_COMP* comp); #ifdef __cplusplus } diff --git a/vp8/common/ppc/systemdependent.c b/vp8/common/ppc/systemdependent.c index 1f5d79068..7046a63e8 100644 --- a/vp8/common/ppc/systemdependent.c +++ b/vp8/common/ppc/systemdependent.c @@ -9,7 +9,6 @@ */ -#include "g_common.h" #include "subpixel.h" #include "loopfilter.h" #include "recon.h" diff --git a/vp8/common/type_aliases.h b/vp8/common/type_aliases.h deleted file mode 100644 index 22b531a76..000000000 --- a/vp8/common/type_aliases.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** -* -* Module Title : type_aliases.h -* -* Description : Standard type aliases -* -****************************************************************************/ -#ifndef __INC_TYPE_ALIASES_H -#define __INC_TYPE_ALIASES_H - -/**************************************************************************** -* Macros -****************************************************************************/ -#define EXPORT -#define IMPORT extern /* Used to declare imported data & routines */ -#define PRIVATE static /* Used to declare & define module-local data */ -#define LOCAL static /* Used to define all persistent routine-local data */ -#define STD_IN_PATH 0 /* Standard input path */ -#define STD_OUT_PATH 1 /* Standard output path */ -#define STD_ERR_PATH 2 /* Standard error path */ -#define STD_IN_FILE stdin /* Standard input file pointer */ -#define STD_OUT_FILE stdout /* Standard output file pointer */ -#define STD_ERR_FILE stderr /* Standard error file pointer */ -#define max_int 0x7FFFFFFF - -#define __export -#define _export - -#define CCONV - -#ifndef NULL -#ifdef __cplusplus -#define NULL 0 -#else -#define NULL ((void *)0) -#endif -#endif - -#ifndef FALSE -#define FALSE 0 -#endif - -#ifndef TRUE -#define TRUE 1 -#endif - -/**************************************************************************** -* Typedefs -****************************************************************************/ -#ifndef TYPE_INT8 -#define TYPE_INT8 -typedef signed char INT8; -#endif - -#ifndef TYPE_INT16 -/*#define TYPE_INT16*/ -typedef signed short INT16; -#endif - -#ifndef TYPE_INT32 -/*#define TYPE_INT32*/ -typedef signed int INT32; -#endif - -#ifndef TYPE_UINT8 -/*#define TYPE_UINT8*/ -typedef unsigned char UINT8; -#endif - -#ifndef TYPE_UINT32 -/*#define TYPE_UINT32*/ -typedef unsigned int UINT32; -#endif - -#ifndef TYPE_UINT16 -/*#define TYPE_UINT16*/ -typedef unsigned short UINT16; -#endif - -#ifndef TYPE_BOOL -/*#define TYPE_BOOL*/ -typedef int BOOL; -#endif - -typedef unsigned char BOOLEAN; - -#ifdef _MSC_VER -typedef __int64 INT64; -#else - -#ifndef TYPE_INT64 -#ifdef _TMS320C6X -/* for now we only have 40bits */ -typedef long INT64; -#else -typedef long long INT64; -#endif -#endif - -#endif - -/* Floating point */ -typedef double FLOAT64; -typedef float FLOAT32; - -#endif diff --git a/vp8/common/x86/idct_blk_mmx.c b/vp8/common/x86/idct_blk_mmx.c index 49cebd6f5..8ff483708 100644 --- a/vp8/common/x86/idct_blk_mmx.c +++ b/vp8/common/x86/idct_blk_mmx.c @@ -14,12 +14,12 @@ extern void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q); -void vp8_dequantize_b_mmx(BLOCKD *d) +void vp8_dequantize_b_mmx(BLOCKD *d, short *DQC) { short *sq = (short *) d->qcoeff; short *dq = (short *) d->dqcoeff; - short *q = (short *) d->dequant; - vp8_dequantize_b_impl_mmx(sq, dq, q); + + vp8_dequantize_b_impl_mmx(sq, dq, DQC); } void vp8_dequant_idct_add_y_block_mmx diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm index 86927d9f1..2ad010adb 100644 --- a/vp8/common/x86/loopfilter_sse2.asm +++ b/vp8/common/x86/loopfilter_sse2.asm @@ -1385,52 +1385,54 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): SHADOW_ARGS_TO_STACK 3 SAVE_XMM 7 GET_GOT rbx - push rsi - push rdi ; end prolog - mov rsi, arg(0) ;src_ptr + mov rcx, arg(0) ;src_ptr movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - mov rdx, arg(2) ;blimit - movdqa xmm3, XMMWORD PTR [rdx] - mov rdi, rsi ; rdi points to row +1 for indirect addressing - add rdi, rax + lea rdx, [rcx + rax] neg rax ; calculate mask - movdqa xmm1, [rsi+2*rax] ; p1 - movdqa xmm0, [rdi] ; q1 + movdqa xmm0, [rdx] ; q1 + mov rdx, arg(2) ;blimit + movdqa xmm1, [rcx+2*rax] ; p1 + movdqa xmm2, xmm1 movdqa xmm7, xmm0 - movdqa xmm4, xmm0 + psubusb xmm0, xmm1 ; q1-=p1 - psubusb xmm1, xmm4 ; p1-=q1 + psubusb xmm1, xmm7 ; p1-=q1 por xmm1, xmm0 ; abs(p1-q1) pand xmm1, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw xmm1, 1 ; abs(p1-q1)/2 - movdqa xmm5, [rsi+rax] ; p0 - movdqa xmm4, [rsi] ; q0 + movdqa xmm3, XMMWORD PTR [rdx] + + movdqa xmm5, [rcx+rax] ; p0 + movdqa xmm4, [rcx] ; q0 movdqa xmm0, xmm4 ; q0 movdqa xmm6, xmm5 ; p0 psubusb xmm5, xmm4 ; p0-=q0 psubusb xmm4, xmm6 ; q0-=p0 por xmm5, xmm4 ; abs(p0 - q0) + + movdqa xmm4, [GLOBAL(t80)] + paddusb xmm5, xmm5 ; abs(p0-q0)*2 paddusb xmm5, xmm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit pxor xmm3, xmm3 pcmpeqb xmm5, xmm3 + ; start work on filters - pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values + pxor xmm2, xmm4 ; p1 offset to convert to signed values + pxor xmm7, xmm4 ; q1 offset to convert to signed values psubsb xmm2, xmm7 ; p1 - q1 - pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values + pxor xmm6, xmm4 ; offset to convert to signed values + pxor xmm0, xmm4 ; offset to convert to signed values movdqa xmm3, xmm0 ; q0 psubsb xmm0, xmm6 ; q0 - p0 paddsb xmm2, xmm0 ; p1 - q1 + 1 * (q0 - p0) @@ -1438,42 +1440,36 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): paddsb xmm2, xmm0 ; p1 - q1 + 3 * (q0 - p0) pand xmm5, xmm2 ; mask filter values we don't care about - ; do + 4 side - paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 - - movdqa xmm0, xmm5 ; get a copy of filters - psllw xmm0, 8 ; shift left 8 - psraw xmm0, 3 ; arithmetic shift right 11 - psrlw xmm0, 8 - movdqa xmm1, xmm5 ; get a copy of filters - psraw xmm1, 11 ; arithmetic shift right 11 - psllw xmm1, 8 ; shift left 8 to put it back - - por xmm0, xmm1 ; put the two together to get result + paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 + movdqa xmm0, xmm5 + psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4 - psubsb xmm3, xmm0 ; q0-= q0 add - pxor xmm3, [GLOBAL(t80)] ; unoffset - movdqa [rsi], xmm3 ; write back + movdqa xmm1, [GLOBAL(te0)] + movdqa xmm2, [GLOBAL(t1f)] - ; now do +3 side - psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4 + pxor xmm7, xmm7 + pcmpgtb xmm7, xmm0 ;save sign + pand xmm7, xmm1 ;preserve the upper 3 bits + psrlw xmm0, 3 + pand xmm0, xmm2 ;clear out upper 3 bits + por xmm0, xmm7 ;add sign + psubsb xmm3, xmm0 ; q0-= q0sz add - movdqa xmm0, xmm5 ; get a copy of filters - psllw xmm0, 8 ; shift left 8 - psraw xmm0, 3 ; arithmetic shift right 11 - psrlw xmm0, 8 - psraw xmm5, 11 ; arithmetic shift right 11 - psllw xmm5, 8 ; shift left 8 to put it back - por xmm0, xmm5 ; put the two together to get result + pxor xmm7, xmm7 + pcmpgtb xmm7, xmm5 ;save sign + pand xmm7, xmm1 ;preserve the upper 3 bits + psrlw xmm5, 3 + pand xmm5, xmm2 ;clear out upper 3 bits + por xmm5, xmm7 ;add sign + paddsb xmm6, xmm5 ; p0+= p0 add + pxor xmm3, xmm4 ; unoffset + movdqa [rcx], xmm3 ; write back - paddsb xmm6, xmm0 ; p0+= p0 add - pxor xmm6, [GLOBAL(t80)] ; unoffset - movdqa [rsi+rax], xmm6 ; write back + pxor xmm6, xmm4 ; unoffset + movdqa [rcx+rax], xmm6 ; write back ; begin epilog - pop rdi - pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS @@ -1536,9 +1532,6 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): punpckldq xmm0, xmm1 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00 punpckhdq xmm2, xmm1 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 - movdqa t0, xmm0 ; save to t0 - movdqa t1, xmm2 ; save to t1 - lea rsi, [rsi + rax*8] lea rdi, [rsi + rax] lea rdx, [rsi + rax*4] @@ -1551,26 +1544,24 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): punpckldq xmm4, xmm1 ; c3 c2 c1 c0 83 82 81 80 punpckldq xmm6, xmm3 ; d3 d2 d1 d0 93 92 91 90 - movd xmm0, [rsi + rax*2] ; a3 a2 a1 a0 + movd xmm1, [rsi + rax*2] ; a3 a2 a1 a0 movd xmm5, [rdx + rax*2] ; e3 e2 e1 e0 - movd xmm2, [rdi + rax*2] ; b3 b2 b1 b0 + movd xmm3, [rdi + rax*2] ; b3 b2 b1 b0 movd xmm7, [rcx + rax*2] ; f3 f2 f1 f0 - punpckldq xmm0, xmm5 ; e3 e2 e1 e0 a3 a2 a1 a0 - punpckldq xmm2, xmm7 ; f3 f2 f1 f0 b3 b2 b1 b0 + punpckldq xmm1, xmm5 ; e3 e2 e1 e0 a3 a2 a1 a0 + punpckldq xmm3, xmm7 ; f3 f2 f1 f0 b3 b2 b1 b0 punpcklbw xmm4, xmm6 ; d3 c3 d2 c2 d1 c1 d0 c0 93 83 92 82 91 81 90 80 - punpcklbw xmm0, xmm2 ; f3 e3 f2 e2 f1 e1 f0 e0 b3 a3 b2 a2 b1 a1 b0 a0 + punpcklbw xmm1, xmm3 ; f3 e3 f2 e2 f1 e1 f0 e0 b3 a3 b2 a2 b1 a1 b0 a0 - movdqa xmm1, xmm4 - punpcklwd xmm4, xmm0 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80 - punpckhwd xmm1, xmm0 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 + movdqa xmm7, xmm4 + punpcklwd xmm4, xmm1 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80 + punpckhwd xmm7, xmm1 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 movdqa xmm6, xmm4 - punpckldq xmm4, xmm1 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80 - punpckhdq xmm6, xmm1 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82 + punpckldq xmm4, xmm7 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80 + punpckhdq xmm6, xmm7 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82 - movdqa xmm0, t0 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00 - movdqa xmm2, t1 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 movdqa xmm1, xmm0 movdqa xmm3, xmm2 @@ -1579,6 +1570,8 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): punpcklqdq xmm2, xmm6 ; q0 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 punpckhqdq xmm3, xmm6 ; q1 f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 + mov rdx, arg(2) ;blimit + ; calculate mask movdqa xmm6, xmm0 ; p1 movdqa xmm7, xmm3 ; q1 @@ -1588,6 +1581,8 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): pand xmm6, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw xmm6, 1 ; abs(p1-q1)/2 + movdqa xmm7, [rdx] + movdqa xmm5, xmm1 ; p0 movdqa xmm4, xmm2 ; q0 psubusb xmm5, xmm2 ; p0-=q0 @@ -1596,8 +1591,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): paddusb xmm5, xmm5 ; abs(p0-q0)*2 paddusb xmm5, xmm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - mov rdx, arg(2) ;blimit - movdqa xmm7, XMMWORD PTR [rdx] + movdqa xmm4, [GLOBAL(t80)] psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit pxor xmm7, xmm7 @@ -1607,59 +1601,48 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): movdqa t0, xmm0 movdqa t1, xmm3 - pxor xmm0, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor xmm3, [GLOBAL(t80)] ; q1 offset to convert to signed values - + pxor xmm0, xmm4 ; p1 offset to convert to signed values + pxor xmm3, xmm4 ; q1 offset to convert to signed values psubsb xmm0, xmm3 ; p1 - q1 - movdqa xmm6, xmm1 ; p0 - - movdqa xmm7, xmm2 ; q0 - pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor xmm7, [GLOBAL(t80)] ; offset to convert to signed values - movdqa xmm3, xmm7 ; offseted ; q0 - - psubsb xmm7, xmm6 ; q0 - p0 - paddsb xmm0, xmm7 ; p1 - q1 + 1 * (q0 - p0) + movdqa xmm6, xmm1 ; p0 +; movdqa xmm7, xmm2 ; q0 - paddsb xmm0, xmm7 ; p1 - q1 + 2 * (q0 - p0) - paddsb xmm0, xmm7 ; p1 - q1 + 3 * (q0 - p0) + pxor xmm6, xmm4 ; offset to convert to signed values + pxor xmm2, xmm4 ; offset to convert to signed values + movdqa xmm3, xmm2 ; offseted ; q0 + psubsb xmm2, xmm6 ; q0 - p0 + paddsb xmm0, xmm2 ; p1 - q1 + 1 * (q0 - p0) + paddsb xmm0, xmm2 ; p1 - q1 + 2 * (q0 - p0) + paddsb xmm0, xmm2 ; p1 - q1 + 3 * (q0 - p0) pand xmm5, xmm0 ; mask filter values we don't care about - paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 - - movdqa xmm0, xmm5 ; get a copy of filters - psllw xmm0, 8 ; shift left 8 - - psraw xmm0, 3 ; arithmetic shift right 11 - psrlw xmm0, 8 - - movdqa xmm7, xmm5 ; get a copy of filters - psraw xmm7, 11 ; arithmetic shift right 11 - - psllw xmm7, 8 ; shift left 8 to put it back - por xmm0, xmm7 ; put the two together to get result - - psubsb xmm3, xmm0 ; q0-= q0sz add - pxor xmm3, [GLOBAL(t80)] ; unoffset q0 - - ; now do +3 side + movdqa xmm0, xmm5 psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4 - movdqa xmm0, xmm5 ; get a copy of filters - psllw xmm0, 8 ; shift left 8 - psraw xmm0, 3 ; arithmetic shift right 11 + movdqa xmm1, [GLOBAL(te0)] + movdqa xmm2, [GLOBAL(t1f)] - psrlw xmm0, 8 - psraw xmm5, 11 ; arithmetic shift right 11 + pxor xmm7, xmm7 + pcmpgtb xmm7, xmm0 ;save sign + pand xmm7, xmm1 ;preserve the upper 3 bits + psrlw xmm0, 3 + pand xmm0, xmm2 ;clear out upper 3 bits + por xmm0, xmm7 ;add sign + psubsb xmm3, xmm0 ; q0-= q0sz add - psllw xmm5, 8 ; shift left 8 to put it back - por xmm0, xmm5 ; put the two together to get result + pxor xmm7, xmm7 + pcmpgtb xmm7, xmm5 ;save sign + pand xmm7, xmm1 ;preserve the upper 3 bits + psrlw xmm5, 3 + pand xmm5, xmm2 ;clear out upper 3 bits + por xmm5, xmm7 ;add sign + paddsb xmm6, xmm5 ; p0+= p0 add - paddsb xmm6, xmm0 ; p0+= p0 add - pxor xmm6, [GLOBAL(t80)] ; unoffset p0 + pxor xmm3, xmm4 ; unoffset q0 + pxor xmm6, xmm4 ; unoffset p0 movdqa xmm0, t0 ; p1 movdqa xmm4, t1 ; q1 @@ -1763,3 +1746,9 @@ s9: align 16 s63: times 8 dw 0x003f +align 16 +te0: + times 16 db 0xe0 +align 16 +t1f: + times 16 db 0x1f diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm index a82c1b4fd..4b68ef5f2 100644 --- a/vp8/common/x86/recon_sse2.asm +++ b/vp8/common/x86/recon_sse2.asm @@ -559,12 +559,492 @@ sym(vp8_intra_pred_uv_ho_%1): vp8_intra_pred_uv_ho mmx2 vp8_intra_pred_uv_ho ssse3 +;void vp8_intra_pred_y_dc_sse2( +; unsigned char *dst, +; int dst_stride +; unsigned char *src, +; int src_stride, +; ) +global sym(vp8_intra_pred_y_dc_sse2) +sym(vp8_intra_pred_y_dc_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + push rsi + push rdi + ; end prolog + + ; from top + mov rsi, arg(2) ;src; + movsxd rax, dword ptr arg(3) ;src_stride; + sub rsi, rax + pxor xmm0, xmm0 + movdqa xmm1, [rsi] + psadbw xmm1, xmm0 + movq xmm2, xmm1 + punpckhqdq xmm1, xmm1 + paddw xmm1, xmm2 + + ; from left + dec rsi + lea rdi, [rax*3] + movzx ecx, byte [rsi+rax] + movzx edx, byte [rsi+rax*2] + add ecx, edx + movzx edx, byte [rsi+rdi] + add ecx, edx + lea rsi, [rsi+rax*4] + movzx edx, byte [rsi] + add ecx, edx + movzx edx, byte [rsi+rax] + add ecx, edx + movzx edx, byte [rsi+rax*2] + add ecx, edx + movzx edx, byte [rsi+rdi] + add ecx, edx + lea rsi, [rsi+rax*4] + movzx edx, byte [rsi] + add ecx, edx + movzx edx, byte [rsi+rax] + add ecx, edx + movzx edx, byte [rsi+rax*2] + add ecx, edx + movzx edx, byte [rsi+rdi] + add ecx, edx + lea rsi, [rsi+rax*4] + movzx edx, byte [rsi] + add ecx, edx + movzx edx, byte [rsi+rax] + add ecx, edx + movzx edx, byte [rsi+rax*2] + add ecx, edx + movzx edx, byte [rsi+rdi] + add ecx, edx + movzx edx, byte [rsi+rax*4] + add ecx, edx + + ; add up + pextrw edx, xmm1, 0x0 + lea edx, [edx+ecx+16] + sar edx, 5 + movd xmm1, edx + ; FIXME use pshufb for ssse3 version + pshuflw xmm1, xmm1, 0x0 + punpcklqdq xmm1, xmm1 + packuswb xmm1, xmm1 + + ; write out + mov rsi, 2 + mov rdi, arg(0) ;dst; + movsxd rcx, dword ptr arg(1) ;dst_stride + lea rax, [rcx*3] + +.label + movdqa [rdi ], xmm1 + movdqa [rdi+rcx ], xmm1 + movdqa [rdi+rcx*2], xmm1 + movdqa [rdi+rax ], xmm1 + lea rdi, [rdi+rcx*4] + movdqa [rdi ], xmm1 + movdqa [rdi+rcx ], xmm1 + movdqa [rdi+rcx*2], xmm1 + movdqa [rdi+rax ], xmm1 + lea rdi, [rdi+rcx*4] + dec rsi + jnz .label + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +;void vp8_intra_pred_y_dctop_sse2( +; unsigned char *dst, +; int dst_stride +; unsigned char *src, +; int src_stride, +; ) +global sym(vp8_intra_pred_y_dctop_sse2) +sym(vp8_intra_pred_y_dctop_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + push rsi + GET_GOT rbx + ; end prolog + + ; from top + mov rcx, arg(2) ;src; + movsxd rax, dword ptr arg(3) ;src_stride; + sub rcx, rax + pxor xmm0, xmm0 + movdqa xmm1, [rcx] + psadbw xmm1, xmm0 + movdqa xmm2, xmm1 + punpckhqdq xmm1, xmm1 + paddw xmm1, xmm2 + + ; add up + paddw xmm1, [GLOBAL(dc_8)] + psraw xmm1, 4 + ; FIXME use pshufb for ssse3 version + pshuflw xmm1, xmm1, 0x0 + punpcklqdq xmm1, xmm1 + packuswb xmm1, xmm1 + + ; write out + mov rsi, 2 + mov rdx, arg(0) ;dst; + movsxd rcx, dword ptr arg(1) ;dst_stride + lea rax, [rcx*3] + +.label + movdqa [rdx ], xmm1 + movdqa [rdx+rcx ], xmm1 + movdqa [rdx+rcx*2], xmm1 + movdqa [rdx+rax ], xmm1 + lea rdx, [rdx+rcx*4] + movdqa [rdx ], xmm1 + movdqa [rdx+rcx ], xmm1 + movdqa [rdx+rcx*2], xmm1 + movdqa [rdx+rax ], xmm1 + lea rdx, [rdx+rcx*4] + dec rsi + jnz .label + + ; begin epilog + RESTORE_GOT + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +;void vp8_intra_pred_y_dcleft_sse2( +; unsigned char *dst, +; int dst_stride +; unsigned char *src, +; int src_stride, +; ) +global sym(vp8_intra_pred_y_dcleft_sse2) +sym(vp8_intra_pred_y_dcleft_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + push rsi + push rdi + ; end prolog + + ; from left + mov rsi, arg(2) ;src; + movsxd rax, dword ptr arg(3) ;src_stride; + dec rsi + lea rdi, [rax*3] + movzx ecx, byte [rsi] + movzx edx, byte [rsi+rax] + add ecx, edx + movzx edx, byte [rsi+rax*2] + add ecx, edx + movzx edx, byte [rsi+rdi] + add ecx, edx + lea rsi, [rsi+rax*4] + movzx edx, byte [rsi] + add ecx, edx + movzx edx, byte [rsi+rax] + add ecx, edx + movzx edx, byte [rsi+rax*2] + add ecx, edx + movzx edx, byte [rsi+rdi] + add ecx, edx + lea rsi, [rsi+rax*4] + movzx edx, byte [rsi] + add ecx, edx + movzx edx, byte [rsi+rax] + add ecx, edx + movzx edx, byte [rsi+rax*2] + add ecx, edx + movzx edx, byte [rsi+rdi] + add ecx, edx + lea rsi, [rsi+rax*4] + movzx edx, byte [rsi] + add ecx, edx + movzx edx, byte [rsi+rax] + add ecx, edx + movzx edx, byte [rsi+rax*2] + add ecx, edx + movzx edx, byte [rsi+rdi] + lea edx, [ecx+edx+8] + + ; add up + shr edx, 4 + movd xmm1, edx + ; FIXME use pshufb for ssse3 version + pshuflw xmm1, xmm1, 0x0 + punpcklqdq xmm1, xmm1 + packuswb xmm1, xmm1 + + ; write out + mov rsi, 2 + mov rdi, arg(0) ;dst; + movsxd rcx, dword ptr arg(1) ;dst_stride + lea rax, [rcx*3] + +.label + movdqa [rdi ], xmm1 + movdqa [rdi+rcx ], xmm1 + movdqa [rdi+rcx*2], xmm1 + movdqa [rdi+rax ], xmm1 + lea rdi, [rdi+rcx*4] + movdqa [rdi ], xmm1 + movdqa [rdi+rcx ], xmm1 + movdqa [rdi+rcx*2], xmm1 + movdqa [rdi+rax ], xmm1 + lea rdi, [rdi+rcx*4] + dec rsi + jnz .label + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +;void vp8_intra_pred_y_dc128_sse2( +; unsigned char *dst, +; int dst_stride +; unsigned char *src, +; int src_stride, +; ) +global sym(vp8_intra_pred_y_dc128_sse2) +sym(vp8_intra_pred_y_dc128_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + push rsi + GET_GOT rbx + ; end prolog + + ; write out + mov rsi, 2 + movdqa xmm1, [GLOBAL(dc_128)] + mov rax, arg(0) ;dst; + movsxd rdx, dword ptr arg(1) ;dst_stride + lea rcx, [rdx*3] + +.label + movdqa [rax ], xmm1 + movdqa [rax+rdx ], xmm1 + movdqa [rax+rdx*2], xmm1 + movdqa [rax+rcx ], xmm1 + lea rax, [rax+rdx*4] + movdqa [rax ], xmm1 + movdqa [rax+rdx ], xmm1 + movdqa [rax+rdx*2], xmm1 + movdqa [rax+rcx ], xmm1 + lea rax, [rax+rdx*4] + dec rsi + jnz .label + + ; begin epilog + RESTORE_GOT + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +;void vp8_intra_pred_y_tm_sse2( +; unsigned char *dst, +; int dst_stride +; unsigned char *src, +; int src_stride, +; ) +%macro vp8_intra_pred_y_tm 1 +global sym(vp8_intra_pred_y_tm_%1) +sym(vp8_intra_pred_y_tm_%1): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + push rsi + push rdi + GET_GOT rbx + ; end prolog + + ; read top row + mov edx, 8 + mov rsi, arg(2) ;src; + movsxd rax, dword ptr arg(3) ;src_stride; + sub rsi, rax + pxor xmm0, xmm0 +%ifidn %1, ssse3 + movdqa xmm3, [GLOBAL(dc_1024)] +%endif + movdqa xmm1, [rsi] + movdqa xmm2, xmm1 + punpcklbw xmm1, xmm0 + punpckhbw xmm2, xmm0 + + ; set up left ptrs ans subtract topleft + movd xmm4, [rsi-1] + lea rsi, [rsi+rax-1] +%ifidn %1, sse2 + punpcklbw xmm4, xmm0 + pshuflw xmm4, xmm4, 0x0 + punpcklqdq xmm4, xmm4 +%else + pshufb xmm4, xmm3 +%endif + psubw xmm1, xmm4 + psubw xmm2, xmm4 + + ; set up dest ptrs + mov rdi, arg(0) ;dst; + movsxd rcx, dword ptr arg(1) ;dst_stride +vp8_intra_pred_y_tm_%1_loop: + movd xmm4, [rsi] + movd xmm5, [rsi+rax] +%ifidn %1, sse2 + punpcklbw xmm4, xmm0 + punpcklbw xmm5, xmm0 + pshuflw xmm4, xmm4, 0x0 + pshuflw xmm5, xmm5, 0x0 + punpcklqdq xmm4, xmm4 + punpcklqdq xmm5, xmm5 +%else + pshufb xmm4, xmm3 + pshufb xmm5, xmm3 +%endif + movdqa xmm6, xmm4 + movdqa xmm7, xmm5 + paddw xmm4, xmm1 + paddw xmm6, xmm2 + paddw xmm5, xmm1 + paddw xmm7, xmm2 + packuswb xmm4, xmm6 + packuswb xmm5, xmm7 + movdqa [rdi ], xmm4 + movdqa [rdi+rcx], xmm5 + lea rsi, [rsi+rax*2] + lea rdi, [rdi+rcx*2] + dec edx + jnz vp8_intra_pred_y_tm_%1_loop + + ; begin epilog + RESTORE_GOT + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret +%endmacro + +vp8_intra_pred_y_tm sse2 +vp8_intra_pred_y_tm ssse3 + +;void vp8_intra_pred_y_ve_sse2( +; unsigned char *dst, +; int dst_stride +; unsigned char *src, +; int src_stride, +; ) +global sym(vp8_intra_pred_y_ve_sse2) +sym(vp8_intra_pred_y_ve_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + push rsi + ; end prolog + + ; read from top + mov rax, arg(2) ;src; + movsxd rdx, dword ptr arg(3) ;src_stride; + sub rax, rdx + movdqa xmm1, [rax] + + ; write out + mov rsi, 2 + mov rax, arg(0) ;dst; + movsxd rdx, dword ptr arg(1) ;dst_stride + lea rcx, [rdx*3] + +.label + movdqa [rax ], xmm1 + movdqa [rax+rdx ], xmm1 + movdqa [rax+rdx*2], xmm1 + movdqa [rax+rcx ], xmm1 + lea rax, [rax+rdx*4] + movdqa [rax ], xmm1 + movdqa [rax+rdx ], xmm1 + movdqa [rax+rdx*2], xmm1 + movdqa [rax+rcx ], xmm1 + lea rax, [rax+rdx*4] + dec rsi + jnz .label + + ; begin epilog + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +;void vp8_intra_pred_y_ho_sse2( +; unsigned char *dst, +; int dst_stride +; unsigned char *src, +; int src_stride, +; ) +global sym(vp8_intra_pred_y_ho_sse2) +sym(vp8_intra_pred_y_ho_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + push rsi + push rdi + ; end prolog + + ; read from left and write out + mov edx, 8 + mov rsi, arg(2) ;src; + movsxd rax, dword ptr arg(3) ;src_stride; + mov rdi, arg(0) ;dst; + movsxd rcx, dword ptr arg(1) ;dst_stride + dec rsi + +vp8_intra_pred_y_ho_sse2_loop: + movd xmm0, [rsi] + movd xmm1, [rsi+rax] + ; FIXME use pshufb for ssse3 version + punpcklbw xmm0, xmm0 + punpcklbw xmm1, xmm1 + pshuflw xmm0, xmm0, 0x0 + pshuflw xmm1, xmm1, 0x0 + punpcklqdq xmm0, xmm0 + punpcklqdq xmm1, xmm1 + movdqa [rdi ], xmm0 + movdqa [rdi+rcx], xmm1 + lea rsi, [rsi+rax*2] + lea rdi, [rdi+rcx*2] + dec edx + jnz vp8_intra_pred_y_ho_sse2_loop + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + SECTION_RODATA +align 16 dc_128: - times 8 db 128 + times 16 db 128 dc_4: times 4 dw 4 align 16 +dc_8: + times 8 dw 8 +align 16 dc_1024: times 8 dw 0x400 align 16 diff --git a/vp8/common/x86/recon_wrapper_sse2.c b/vp8/common/x86/recon_wrapper_sse2.c index fcc75a901..44221cd0b 100644 --- a/vp8/common/x86/recon_wrapper_sse2.c +++ b/vp8/common/x86/recon_wrapper_sse2.c @@ -94,3 +94,69 @@ void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x) vp8_intra_pred_uv_tm_ssse3, vp8_intra_pred_uv_ho_ssse3); } + +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dc_sse2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dctop_sse2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dcleft_sse2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_dc128_sse2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_ho_sse2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_ve_sse2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_tm_sse2); +extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_y_tm_ssse3); + +static void vp8_build_intra_predictors_mby_x86(MACROBLOCKD *x, + unsigned char *dst_y, + int dst_stride, + build_intra_predictors_mbuv_fn_t tm_func) +{ + int mode = x->mode_info_context->mbmi.mode; + build_intra_predictors_mbuv_fn_t fn; + int src_stride = x->dst.y_stride; + switch (mode) { + case V_PRED: fn = vp8_intra_pred_y_ve_sse2; break; + case H_PRED: fn = vp8_intra_pred_y_ho_sse2; break; + case TM_PRED: fn = tm_func; break; + case DC_PRED: + if (x->up_available) { + if (x->left_available) { + fn = vp8_intra_pred_y_dc_sse2; break; + } else { + fn = vp8_intra_pred_y_dctop_sse2; break; + } + } else if (x->left_available) { + fn = vp8_intra_pred_y_dcleft_sse2; break; + } else { + fn = vp8_intra_pred_y_dc128_sse2; break; + } + break; + default: return; + } + + fn(dst_y, dst_stride, x->dst.y_buffer, src_stride); + return; +} + +void vp8_build_intra_predictors_mby_sse2(MACROBLOCKD *x) +{ + vp8_build_intra_predictors_mby_x86(x, x->predictor, 16, + vp8_intra_pred_y_tm_sse2); +} + +void vp8_build_intra_predictors_mby_ssse3(MACROBLOCKD *x) +{ + vp8_build_intra_predictors_mby_x86(x, x->predictor, 16, + vp8_intra_pred_y_tm_ssse3); +} + +void vp8_build_intra_predictors_mby_s_sse2(MACROBLOCKD *x) +{ + vp8_build_intra_predictors_mby_x86(x, x->dst.y_buffer, x->dst.y_stride, + vp8_intra_pred_y_tm_sse2); +} + +void vp8_build_intra_predictors_mby_s_ssse3(MACROBLOCKD *x) +{ + vp8_build_intra_predictors_mby_x86(x, x->dst.y_buffer, x->dst.y_stride, + vp8_intra_pred_y_tm_ssse3); + +} diff --git a/vp8/common/x86/recon_x86.h b/vp8/common/x86/recon_x86.h index fbb3dcb63..afacc60d1 100644 --- a/vp8/common/x86/recon_x86.h +++ b/vp8/common/x86/recon_x86.h @@ -42,6 +42,8 @@ extern prototype_copy_block(vp8_copy_mem16x16_mmx); extern prototype_copy_block(vp8_copy_mem16x16_sse2); extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_sse2); extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_sse2); +extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_sse2); +extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_s_sse2); #if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_recon_copy16x16 @@ -53,12 +55,20 @@ extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_sse2); #undef vp8_recon_build_intra_predictors_mbuv_s #define vp8_recon_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_sse2 +#undef vp8_recon_build_intra_predictors_mby +#define vp8_recon_build_intra_predictors_mby vp8_build_intra_predictors_mby_sse2 + +#undef vp8_recon_build_intra_predictors_mby_s +#define vp8_recon_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_sse2 + #endif #endif #if HAVE_SSSE3 extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_ssse3); extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_ssse3); +extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_ssse3); +extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_s_ssse3); #if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_recon_build_intra_predictors_mbuv @@ -67,6 +77,12 @@ extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_ssse3) #undef vp8_recon_build_intra_predictors_mbuv_s #define vp8_recon_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_ssse3 +#undef vp8_recon_build_intra_predictors_mby +#define vp8_recon_build_intra_predictors_mby vp8_build_intra_predictors_mby_ssse3 + +#undef vp8_recon_build_intra_predictors_mby_s +#define vp8_recon_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_ssse3 + #endif #endif #endif diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c index ad3a1f76b..e1e1b7987 100644 --- a/vp8/common/x86/x86_systemdependent.c +++ b/vp8/common/x86/x86_systemdependent.c @@ -11,7 +11,6 @@ #include "vpx_config.h" #include "vpx_ports/x86.h" -#include "vp8/common/g_common.h" #include "vp8/common/subpixel.h" #include "vp8/common/loopfilter.h" #include "vp8/common/recon.h" @@ -86,6 +85,10 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx) vp8_build_intra_predictors_mbuv_sse2; rtcd->recon.build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_sse2; + rtcd->recon.build_intra_predictors_mby = + vp8_build_intra_predictors_mby_sse2; + rtcd->recon.build_intra_predictors_mby_s = + vp8_build_intra_predictors_mby_s_sse2; rtcd->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_sse2; rtcd->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2; @@ -132,6 +135,10 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx) vp8_build_intra_predictors_mbuv_ssse3; rtcd->recon.build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_ssse3; + rtcd->recon.build_intra_predictors_mby = + vp8_build_intra_predictors_mby_ssse3; + rtcd->recon.build_intra_predictors_mby_s = + vp8_build_intra_predictors_mby_s_ssse3; } #endif |