Merge a repeating variable declaration into do_video_encoding.
[libav.git] / libavcodec / ppc / h264_altivec.c
CommitLineData
a6a12a8a
RD
1/*
2 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
3 *
b78e7197
DB
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
a6a12a8a
RD
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
b78e7197 9 * version 2.1 of the License, or (at your option) any later version.
a6a12a8a 10 *
b78e7197 11 * FFmpeg is distributed in the hope that it will be useful,
a6a12a8a
RD
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
b78e7197 17 * License along with FFmpeg; if not, write to the Free Software
5509bffa 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
a6a12a8a 19 */
115329f1 20
a6a12a8a
RD
21#include "../dsputil.h"
22
23#include "gcc_fixes.h"
24
25#include "dsputil_altivec.h"
26
27#define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
28#define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
29
30#define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC
31#define PREFIX_h264_chroma_mc8_altivec put_h264_chroma_mc8_altivec
32#define PREFIX_h264_chroma_mc8_num altivec_put_h264_chroma_mc8_num
33#define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec
34#define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num
35#define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec
36#define PREFIX_h264_qpel16_v_lowpass_num altivec_put_h264_qpel16_v_lowpass_num
37#define PREFIX_h264_qpel16_hv_lowpass_altivec put_h264_qpel16_hv_lowpass_altivec
38#define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num
b5f7e6eb 39#include "h264_template_altivec.c"
a6a12a8a
RD
40#undef OP_U8_ALTIVEC
41#undef PREFIX_h264_chroma_mc8_altivec
42#undef PREFIX_h264_chroma_mc8_num
43#undef PREFIX_h264_qpel16_h_lowpass_altivec
44#undef PREFIX_h264_qpel16_h_lowpass_num
45#undef PREFIX_h264_qpel16_v_lowpass_altivec
46#undef PREFIX_h264_qpel16_v_lowpass_num
47#undef PREFIX_h264_qpel16_hv_lowpass_altivec
48#undef PREFIX_h264_qpel16_hv_lowpass_num
49
50#define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC
51#define PREFIX_h264_chroma_mc8_altivec avg_h264_chroma_mc8_altivec
52#define PREFIX_h264_chroma_mc8_num altivec_avg_h264_chroma_mc8_num
53#define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec
54#define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num
55#define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec
56#define PREFIX_h264_qpel16_v_lowpass_num altivec_avg_h264_qpel16_v_lowpass_num
57#define PREFIX_h264_qpel16_hv_lowpass_altivec avg_h264_qpel16_hv_lowpass_altivec
58#define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num
b5f7e6eb 59#include "h264_template_altivec.c"
a6a12a8a
RD
60#undef OP_U8_ALTIVEC
61#undef PREFIX_h264_chroma_mc8_altivec
62#undef PREFIX_h264_chroma_mc8_num
63#undef PREFIX_h264_qpel16_h_lowpass_altivec
64#undef PREFIX_h264_qpel16_h_lowpass_num
65#undef PREFIX_h264_qpel16_v_lowpass_altivec
66#undef PREFIX_h264_qpel16_v_lowpass_num
67#undef PREFIX_h264_qpel16_hv_lowpass_altivec
68#undef PREFIX_h264_qpel16_hv_lowpass_num
69
70#define H264_MC(OPNAME, SIZE, CODETYPE) \
71static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uint8_t *src, int stride){\
72 OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
73}\
74\
75static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \
8047fe72 76 DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
a6a12a8a
RD
77 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
78 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
79}\
80\
81static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
82 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
83}\
84\
85static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
8047fe72 86 DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
a6a12a8a
RD
87 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
88 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
89}\
90\
91static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
8047fe72 92 DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
a6a12a8a
RD
93 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
94 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
95}\
96\
97static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
98 OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
99}\
100\
101static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
8047fe72 102 DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
a6a12a8a
RD
103 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
104 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
105}\
106\
107static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
8047fe72
LB
108 DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
109 DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
a6a12a8a
RD
110 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
111 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
112 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
113}\
114\
115static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
8047fe72
LB
116 DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
117 DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
a6a12a8a
RD
118 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
119 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
120 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
121}\
122\
123static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
8047fe72
LB
124 DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
125 DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
a6a12a8a
RD
126 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
127 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
128 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
129}\
130\
131static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
8047fe72
LB
132 DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
133 DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
a6a12a8a
RD
134 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
135 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
136 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
137}\
138\
139static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
8047fe72 140 DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
a6a12a8a
RD
141 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
142}\
143\
144static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
8047fe72
LB
145 DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
146 DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
147 DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
a6a12a8a
RD
148 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
149 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
150 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
151}\
152\
153static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
8047fe72
LB
154 DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
155 DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
156 DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
a6a12a8a
RD
157 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
158 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
159 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
160}\
161\
162static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
8047fe72
LB
163 DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
164 DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
165 DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
a6a12a8a
RD
166 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
167 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
168 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
169}\
170\
171static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
8047fe72
LB
172 DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
173 DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
174 DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
a6a12a8a
RD
175 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
176 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
177 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
178}\
179
60aae27a
KS
180/* this code assume that stride % 16 == 0 */
181void put_no_rnd_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
182 signed int ABCD[4] __attribute__((aligned(16))) =
183 {((8 - x) * (8 - y)),
184 ((x) * (8 - y)),
185 ((8 - x) * (y)),
186 ((x) * (y))};
187 register int i;
188 vector unsigned char fperm;
189 const vector signed int vABCD = vec_ld(0, ABCD);
190 const vector signed short vA = vec_splat((vector signed short)vABCD, 1);
191 const vector signed short vB = vec_splat((vector signed short)vABCD, 3);
192 const vector signed short vC = vec_splat((vector signed short)vABCD, 5);
193 const vector signed short vD = vec_splat((vector signed short)vABCD, 7);
194 const vector signed int vzero = vec_splat_s32(0);
195 const vector signed short v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
196 const vector unsigned short v6us = vec_splat_u16(6);
197 register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
198 register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
199
200 vector unsigned char vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;
201 vector unsigned char vsrc0uc, vsrc1uc;
202 vector signed short vsrc0ssH, vsrc1ssH;
203 vector unsigned char vsrcCuc, vsrc2uc, vsrc3uc;
204 vector signed short vsrc2ssH, vsrc3ssH, psum;
4b47d258 205 vector unsigned char vdst, ppsum, fsum;
60aae27a
KS
206
207 if (((unsigned long)dst) % 16 == 0) {
208 fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13,
209 0x14, 0x15, 0x16, 0x17,
210 0x08, 0x09, 0x0A, 0x0B,
211 0x0C, 0x0D, 0x0E, 0x0F);
212 } else {
213 fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03,
214 0x04, 0x05, 0x06, 0x07,
215 0x18, 0x19, 0x1A, 0x1B,
216 0x1C, 0x1D, 0x1E, 0x1F);
217 }
218
219 vsrcAuc = vec_ld(0, src);
220
221 if (loadSecond)
222 vsrcBuc = vec_ld(16, src);
223 vsrcperm0 = vec_lvsl(0, src);
224 vsrcperm1 = vec_lvsl(1, src);
225
226 vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
227 if (reallyBadAlign)
228 vsrc1uc = vsrcBuc;
229 else
230 vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
231
232 vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
233 (vector unsigned char)vsrc0uc);
234 vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
235 (vector unsigned char)vsrc1uc);
236
237 if (!loadSecond) {// -> !reallyBadAlign
238 for (i = 0 ; i < h ; i++) {
239
240
241 vsrcCuc = vec_ld(stride + 0, src);
242
243 vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
244 vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
245
246 vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
247 (vector unsigned char)vsrc2uc);
248 vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
249 (vector unsigned char)vsrc3uc);
250
251 psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
252 psum = vec_mladd(vB, vsrc1ssH, psum);
253 psum = vec_mladd(vC, vsrc2ssH, psum);
254 psum = vec_mladd(vD, vsrc3ssH, psum);
255 psum = vec_add(v28ss, psum);
256 psum = vec_sra(psum, v6us);
257
258 vdst = vec_ld(0, dst);
259 ppsum = (vector unsigned char)vec_packsu(psum, psum);
260 fsum = vec_perm(vdst, ppsum, fperm);
261
262 vec_st(fsum, 0, dst);
263
264 vsrc0ssH = vsrc2ssH;
265 vsrc1ssH = vsrc3ssH;
266
267 dst += stride;
268 src += stride;
269 }
270 } else {
271 vector unsigned char vsrcDuc;
272 for (i = 0 ; i < h ; i++) {
273 vsrcCuc = vec_ld(stride + 0, src);
274 vsrcDuc = vec_ld(stride + 16, src);
275
276 vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
277 if (reallyBadAlign)
278 vsrc3uc = vsrcDuc;
279 else
280 vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
281
282 vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
283 (vector unsigned char)vsrc2uc);
284 vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
285 (vector unsigned char)vsrc3uc);
286
287 psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
288 psum = vec_mladd(vB, vsrc1ssH, psum);
289 psum = vec_mladd(vC, vsrc2ssH, psum);
290 psum = vec_mladd(vD, vsrc3ssH, psum);
291 psum = vec_add(v28ss, psum);
292 psum = vec_sr(psum, v6us);
293
294 vdst = vec_ld(0, dst);
295 ppsum = (vector unsigned char)vec_pack(psum, psum);
296 fsum = vec_perm(vdst, ppsum, fperm);
297
298 vec_st(fsum, 0, dst);
299
300 vsrc0ssH = vsrc2ssH;
301 vsrc1ssH = vsrc3ssH;
302
303 dst += stride;
304 src += stride;
305 }
306 }
307}
308
0d18f798
LB
309static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
310 const uint8_t * src2, int dst_stride,
311 int src_stride1, int h)
312{
313 int i;
314 vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
a6a12a8a 315
0d18f798
LB
316 mask_ = vec_lvsl(0, src2);
317
318 for (i = 0; i < h; i++) {
319
320 tmp1 = vec_ld(i * src_stride1, src1);
321 mask = vec_lvsl(i * src_stride1, src1);
322 tmp2 = vec_ld(i * src_stride1 + 15, src1);
323
324 a = vec_perm(tmp1, tmp2, mask);
325
326 tmp1 = vec_ld(i * 16, src2);
327 tmp2 = vec_ld(i * 16 + 15, src2);
328
329 b = vec_perm(tmp1, tmp2, mask_);
330
331 tmp1 = vec_ld(0, dst);
332 mask = vec_lvsl(0, dst);
333 tmp2 = vec_ld(15, dst);
334
335 d = vec_avg(a, b);
336
337 edges = vec_perm(tmp2, tmp1, mask);
338
339 align = vec_lvsr(0, dst);
340
7e821457 341 tmp2 = vec_perm(d, edges, align);
27303c8a 342 tmp1 = vec_perm(edges, d, align);
0d18f798 343
cb243ea2 344 vec_st(tmp2, 15, dst);
27303c8a 345 vec_st(tmp1, 0 , dst);
0d18f798
LB
346
347 dst += dst_stride;
348 }
349}
350
351static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
352 const uint8_t * src2, int dst_stride,
353 int src_stride1, int h)
354{
355 int i;
356 vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
357
358 mask_ = vec_lvsl(0, src2);
359
360 for (i = 0; i < h; i++) {
361
362 tmp1 = vec_ld(i * src_stride1, src1);
363 mask = vec_lvsl(i * src_stride1, src1);
364 tmp2 = vec_ld(i * src_stride1 + 15, src1);
365
366 a = vec_perm(tmp1, tmp2, mask);
367
368 tmp1 = vec_ld(i * 16, src2);
369 tmp2 = vec_ld(i * 16 + 15, src2);
370
371 b = vec_perm(tmp1, tmp2, mask_);
372
373 tmp1 = vec_ld(0, dst);
374 mask = vec_lvsl(0, dst);
375 tmp2 = vec_ld(15, dst);
376
377 d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b));
378
379 edges = vec_perm(tmp2, tmp1, mask);
380
381 align = vec_lvsr(0, dst);
382
7e821457 383 tmp2 = vec_perm(d, edges, align);
27303c8a 384 tmp1 = vec_perm(edges, d, align);
0d18f798 385
cb243ea2 386 vec_st(tmp2, 15, dst);
27303c8a 387 vec_st(tmp1, 0 , dst);
0d18f798
LB
388
389 dst += dst_stride;
390 }
a6a12a8a
RD
391}
392
0d18f798 393/* Implemented but could be faster
a6a12a8a
RD
394#define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h)
395#define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
0d18f798 396 */
a6a12a8a 397
0d18f798
LB
398 H264_MC(put_, 16, altivec)
399 H264_MC(avg_, 16, altivec)
a6a12a8a
RD
400
401void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
115329f1 402
a6a12a8a
RD
403#ifdef HAVE_ALTIVEC
404 if (has_altivec()) {
405 c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
60aae27a 406 c->put_no_rnd_h264_chroma_pixels_tab[0] = put_no_rnd_h264_chroma_mc8_altivec;
a6a12a8a
RD
407 c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
408
409#define dspfunc(PFX, IDX, NUM) \
410 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \
411 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \
412 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \
413 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \
414 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \
415 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \
416 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \
417 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \
418 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \
419 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \
420 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \
421 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \
422 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \
423 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
424 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
425 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
115329f1 426
a6a12a8a
RD
427 dspfunc(put_h264_qpel, 0, 16);
428 dspfunc(avg_h264_qpel, 0, 16);
429#undef dspfunc
115329f1 430
a6a12a8a
RD
431 } else
432#endif /* HAVE_ALTIVEC */
433 {
434 // Non-AltiVec PPC optimisations
115329f1 435
a6a12a8a
RD
436 // ... pending ...
437 }
438}