8a50ccb900275a928da6f0123fd067995d9a0f3b
[libav.git] / libavcodec / ppc / dsputil_altivec.c
1 /*
2 * Copyright (c) 2002 Brian Foley
3 * Copyright (c) 2002 Dieter Shirley
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20 #include "../dsputil.h"
21 #include "dsputil_altivec.h"
22
23 #if CONFIG_DARWIN
24 #include <sys/sysctl.h>
25 #endif
26
27 int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
28 {
29 int i, s;
30 vector unsigned char perm1, perm2, *pix1v, *pix2v;
31 vector unsigned char t1, t2, t3,t4, t5;
32 vector unsigned int sad, zero;
33 vector signed int sumdiffs;
34
35 zero = (vector unsigned int) (0);
36 sad = (vector unsigned int) (0);
37
38
39 for(i=0;i<16;i++) {
40 /* Read potentially unaligned pixels into t1 and t2 */
41 perm1 = vec_lvsl(0, pix1);
42 pix1v = (vector unsigned char *) pix1;
43 perm2 = vec_lvsl(0, pix2);
44 pix2v = (vector unsigned char *) pix2;
45 t1 = vec_perm(pix1v[0], pix1v[1], perm1);
46 t2 = vec_perm(pix2v[0], pix2v[1], perm2);
47
48 /* Calculate a sum of abs differences vector */
49 t3 = vec_max(t1, t2);
50 t4 = vec_min(t1, t2);
51 t5 = vec_sub(t3, t4);
52
53 /* Add each 4 pixel group together and put 4 results into sad */
54 sad = vec_sum4s(t5, sad);
55
56 pix1 += line_size;
57 pix2 += line_size;
58 }
59
60 /* Sum up the four partial sums, and put the result into s */
61 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
62 sumdiffs = vec_splat(sumdiffs, 3);
63 vec_ste(sumdiffs, 0, &s);
64
65 return s;
66 }
67
68 int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
69 {
70 int i, s;
71 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;
72 vector unsigned char t1, t2, t3,t4, t5;
73 vector unsigned int sad, zero;
74 vector signed int sumdiffs;
75
76 zero = (vector unsigned int) (0);
77 sad = (vector unsigned int) (0);
78 permclear = (vector unsigned char) (255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
79
80 for(i=0;i<8;i++) {
81 /* Read potentially unaligned pixels into t1 and t2
82 Since we're reading 16 pixels, and actually only want 8,
83 mask out the last 8 pixels. The 0s don't change the sum. */
84 perm1 = vec_lvsl(0, pix1);
85 pix1v = (vector unsigned char *) pix1;
86 perm2 = vec_lvsl(0, pix2);
87 pix2v = (vector unsigned char *) pix2;
88 t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear);
89 t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear);
90
91 /* Calculate a sum of abs differences vector */
92 t3 = vec_max(t1, t2);
93 t4 = vec_min(t1, t2);
94 t5 = vec_sub(t3, t4);
95
96 /* Add each 4 pixel group together and put 4 results into sad */
97 sad = vec_sum4s(t5, sad);
98
99 pix1 += line_size;
100 pix2 += line_size;
101 }
102
103 /* Sum up the four partial sums, and put the result into s */
104 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
105 sumdiffs = vec_splat(sumdiffs, 3);
106 vec_ste(sumdiffs, 0, &s);
107
108 return s;
109 }
110
111 int pix_sum_altivec(UINT8 * pix, int line_size)
112 {
113
114 vector unsigned char perm, *pixv;
115 vector unsigned char t1;
116 vector unsigned int sad, zero;
117 vector signed int sumdiffs;
118
119 int s, i;
120
121 zero = (vector unsigned int) (0);
122 sad = (vector unsigned int) (0);
123
124 for (i = 0; i < 16; i++) {
125 /* Read the potentially unaligned 16 pixels into t1 */
126 perm = vec_lvsl(0, pix);
127 pixv = (vector unsigned char *) pix;
128 t1 = vec_perm(pixv[0], pixv[1], perm);
129
130 /* Add each 4 pixel group together and put 4 results into sad */
131 sad = vec_sum4s(t1, sad);
132
133 pix += line_size;
134 }
135
136 /* Sum up the four partial sums, and put the result into s */
137 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
138 sumdiffs = vec_splat(sumdiffs, 3);
139 vec_ste(sumdiffs, 0, &s);
140
141 return s;
142 }
143
144 void get_pixels_altivec(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
145 {
146 int i;
147 vector unsigned char perm, bytes, *pixv;
148 vector unsigned char zero = (vector unsigned char) (0);
149 vector signed short shorts;
150
151 for(i=0;i<8;i++)
152 {
153 // Read potentially unaligned pixels.
154 // We're reading 16 pixels, and actually only want 8,
155 // but we simply ignore the extras.
156 perm = vec_lvsl(0, pixels);
157 pixv = (vector unsigned char *) pixels;
158 bytes = vec_perm(pixv[0], pixv[1], perm);
159
160 // convert the bytes into shorts
161 shorts = (vector signed short)vec_mergeh(zero, bytes);
162
163 // save the data to the block, we assume the block is 16-byte aligned
164 vec_st(shorts, i*16, (vector signed short*)block);
165
166 pixels += line_size;
167 }
168 }
169
170 void diff_pixels_altivec(DCTELEM *restrict block, const UINT8 *s1,
171 const UINT8 *s2, int stride)
172 {
173 int i;
174 vector unsigned char perm, bytes, *pixv;
175 vector unsigned char zero = (vector unsigned char) (0);
176 vector signed short shorts1, shorts2;
177
178 for(i=0;i<4;i++)
179 {
180 // Read potentially unaligned pixels
181 // We're reading 16 pixels, and actually only want 8,
182 // but we simply ignore the extras.
183 perm = vec_lvsl(0, s1);
184 pixv = (vector unsigned char *) s1;
185 bytes = vec_perm(pixv[0], pixv[1], perm);
186
187 // convert the bytes into shorts
188 shorts1 = (vector signed short)vec_mergeh(zero, bytes);
189
190 // Do the same for the second block of pixels
191 perm = vec_lvsl(0, s2);
192 pixv = (vector unsigned char *) s2;
193 bytes = vec_perm(pixv[0], pixv[1], perm);
194
195 // convert the bytes into shorts
196 shorts2 = (vector signed short)vec_mergeh(zero, bytes);
197
198 // Do the subtraction
199 shorts1 = vec_sub(shorts1, shorts2);
200
201 // save the data to the block, we assume the block is 16-byte aligned
202 vec_st(shorts1, 0, (vector signed short*)block);
203
204 s1 += stride;
205 s2 += stride;
206 block += 8;
207
208
209 // The code below is a copy of the code above... This is a manual
210 // unroll.
211
212 // Read potentially unaligned pixels
213 // We're reading 16 pixels, and actually only want 8,
214 // but we simply ignore the extras.
215 perm = vec_lvsl(0, s1);
216 pixv = (vector unsigned char *) s1;
217 bytes = vec_perm(pixv[0], pixv[1], perm);
218
219 // convert the bytes into shorts
220 shorts1 = (vector signed short)vec_mergeh(zero, bytes);
221
222 // Do the same for the second block of pixels
223 perm = vec_lvsl(0, s2);
224 pixv = (vector unsigned char *) s2;
225 bytes = vec_perm(pixv[0], pixv[1], perm);
226
227 // convert the bytes into shorts
228 shorts2 = (vector signed short)vec_mergeh(zero, bytes);
229
230 // Do the subtraction
231 shorts1 = vec_sub(shorts1, shorts2);
232
233 // save the data to the block, we assume the block is 16-byte aligned
234 vec_st(shorts1, 0, (vector signed short*)block);
235
236 s1 += stride;
237 s2 += stride;
238 block += 8;
239 }
240 }
241
242
243 int has_altivec(void)
244 {
245 #if CONFIG_DARWIN
246 int sels[2] = {CTL_HW, HW_VECTORUNIT};
247 int has_vu = 0;
248 size_t len = sizeof(has_vu);
249 int err;
250
251 err = sysctl(sels, 2, &has_vu, &len, NULL, 0);
252
253 if (err == 0) return (has_vu != 0);
254 #endif
255 return 0;
256 }
257