2 * Copyright (c) 2002 Brian Foley
3 * Copyright (c) 2002 Dieter Shirley
4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of Libav.
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 #include "libavutil/attributes.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/ppc/cpu.h"
31 #include "libavutil/ppc/types_altivec.h"
32 #include "libavutil/ppc/util_altivec.h"
33 #include "libavcodec/avcodec.h"
34 #include "libavcodec/pixblockdsp.h"
38 static void get_pixels_altivec(int16_t *restrict block
, const uint8_t *pixels
,
42 vec_u8 perm
= vec_lvsl(0, pixels
);
43 const vec_u8 zero
= (const vec_u8
)vec_splat_u8(0);
45 for (i
= 0; i
< 8; i
++) {
46 /* Read potentially unaligned pixels.
47 * We're reading 16 pixels, and actually only want 8,
48 * but we simply ignore the extras. */
49 vec_u8 pixl
= vec_ld(0, pixels
);
50 vec_u8 pixr
= vec_ld(7, pixels
);
51 vec_u8 bytes
= vec_perm(pixl
, pixr
, perm
);
53 // Convert the bytes into shorts.
54 vec_s16 shorts
= (vec_s16
)vec_mergeh(zero
, bytes
);
56 // Save the data to the block, we assume the block is 16-byte aligned.
57 vec_st(shorts
, i
* 16, (vec_s16
*)block
);
63 static void diff_pixels_altivec(int16_t *restrict block
, const uint8_t *s1
,
64 const uint8_t *s2
, int stride
)
67 vec_u8 perm1
= vec_lvsl(0, s1
);
68 vec_u8 perm2
= vec_lvsl(0, s2
);
69 const vec_u8 zero
= (const vec_u8
)vec_splat_u8(0);
70 vec_s16 shorts1
, shorts2
;
72 for (i
= 0; i
< 4; i
++) {
73 /* Read potentially unaligned pixels.
74 * We're reading 16 pixels, and actually only want 8,
75 * but we simply ignore the extras. */
76 vec_u8 pixl
= vec_ld(0, s1
);
77 vec_u8 pixr
= vec_ld(15, s1
);
78 vec_u8 bytes
= vec_perm(pixl
, pixr
, perm1
);
80 // Convert the bytes into shorts.
81 shorts1
= (vec_s16
)vec_mergeh(zero
, bytes
);
83 // Do the same for the second block of pixels.
85 pixr
= vec_ld(15, s2
);
86 bytes
= vec_perm(pixl
, pixr
, perm2
);
88 // Convert the bytes into shorts.
89 shorts2
= (vec_s16
)vec_mergeh(zero
, bytes
);
91 // Do the subtraction.
92 shorts1
= vec_sub(shorts1
, shorts2
);
94 // Save the data to the block, we assume the block is 16-byte aligned.
95 vec_st(shorts1
, 0, (vec_s16
*)block
);
101 /* The code below is a copy of the code above...
102 * This is a manual unroll. */
104 /* Read potentially unaligned pixels.
105 * We're reading 16 pixels, and actually only want 8,
106 * but we simply ignore the extras. */
107 pixl
= vec_ld(0, s1
);
108 pixr
= vec_ld(15, s1
);
109 bytes
= vec_perm(pixl
, pixr
, perm1
);
111 // Convert the bytes into shorts.
112 shorts1
= (vec_s16
)vec_mergeh(zero
, bytes
);
114 // Do the same for the second block of pixels.
115 pixl
= vec_ld(0, s2
);
116 pixr
= vec_ld(15, s2
);
117 bytes
= vec_perm(pixl
, pixr
, perm2
);
119 // Convert the bytes into shorts.
120 shorts2
= (vec_s16
)vec_mergeh(zero
, bytes
);
122 // Do the subtraction.
123 shorts1
= vec_sub(shorts1
, shorts2
);
125 // Save the data to the block, we assume the block is 16-byte aligned.
126 vec_st(shorts1
, 0, (vec_s16
*)block
);
134 #endif /* HAVE_ALTIVEC */
136 av_cold
void ff_pixblockdsp_init_ppc(PixblockDSPContext
*c
,
137 AVCodecContext
*avctx
,
138 unsigned high_bit_depth
)
141 if (!PPC_ALTIVEC(av_get_cpu_flags()))
144 c
->diff_pixels
= diff_pixels_altivec
;
146 if (!high_bit_depth
) {
147 c
->get_pixels
= get_pixels_altivec
;
149 #endif /* HAVE_ALTIVEC */