1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
|
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
.globl vp8_subtract_mbuv_ppc
.globl vp8_subtract_mby_ppc
;# r3 short *diff
;# r4 unsigned char *usrc
;# r5 unsigned char *vsrc
;# r6 unsigned char *pred
;# r7 int stride
vp8_subtract_mbuv_ppc:
mfspr r11, 256 ;# get old VRSAVE
oris r12, r11, 0xf000
mtspr 256, r12 ;# set VRSAVE
li r9, 256
add r3, r3, r9
add r3, r3, r9
add r6, r6, r9
li r10, 16
li r9, 4
mtctr r9
vspltisw v0, 0
mbu_loop:
lvsl v5, 0, r4 ;# permutate value for alignment
lvx v1, 0, r4 ;# src
lvx v2, 0, r6 ;# pred
add r4, r4, r7
addi r6, r6, 16
vperm v1, v1, v0, v5
vmrghb v3, v0, v1 ;# unpack high src to short
vmrghb v4, v0, v2 ;# unpack high pred to short
lvsl v5, 0, r4 ;# permutate value for alignment
lvx v1, 0, r4 ;# src
add r4, r4, r7
vsubshs v3, v3, v4
stvx v3, 0, r3 ;# store out diff
vperm v1, v1, v0, v5
vmrghb v3, v0, v1 ;# unpack high src to short
vmrglb v4, v0, v2 ;# unpack high pred to short
vsubshs v3, v3, v4
stvx v3, r10, r3 ;# store out diff
addi r3, r3, 32
bdnz mbu_loop
mtctr r9
mbv_loop:
lvsl v5, 0, r5 ;# permutate value for alignment
lvx v1, 0, r5 ;# src
lvx v2, 0, r6 ;# pred
add r5, r5, r7
addi r6, r6, 16
vperm v1, v1, v0, v5
vmrghb v3, v0, v1 ;# unpack high src to short
vmrghb v4, v0, v2 ;# unpack high pred to short
lvsl v5, 0, r5 ;# permutate value for alignment
lvx v1, 0, r5 ;# src
add r5, r5, r7
vsubshs v3, v3, v4
stvx v3, 0, r3 ;# store out diff
vperm v1, v1, v0, v5
vmrghb v3, v0, v1 ;# unpack high src to short
vmrglb v4, v0, v2 ;# unpack high pred to short
vsubshs v3, v3, v4
stvx v3, r10, r3 ;# store out diff
addi r3, r3, 32
bdnz mbv_loop
mtspr 256, r11 ;# reset old VRSAVE
blr
;# r3 short *diff
;# r4 unsigned char *src
;# r5 unsigned char *pred
;# r6 int stride
vp8_subtract_mby_ppc:
mfspr r11, 256 ;# get old VRSAVE
oris r12, r11, 0xf800
mtspr 256, r12 ;# set VRSAVE
li r10, 16
mtctr r10
vspltisw v0, 0
mby_loop:
lvx v1, 0, r4 ;# src
lvx v2, 0, r5 ;# pred
add r4, r4, r6
addi r5, r5, 16
vmrghb v3, v0, v1 ;# unpack high src to short
vmrghb v4, v0, v2 ;# unpack high pred to short
vsubshs v3, v3, v4
stvx v3, 0, r3 ;# store out diff
vmrglb v3, v0, v1 ;# unpack low src to short
vmrglb v4, v0, v2 ;# unpack low pred to short
vsubshs v3, v3, v4
stvx v3, r10, r3 ;# store out diff
addi r3, r3, 32
bdnz mby_loop
mtspr 256, r11 ;# reset old VRSAVE
blr
|