ppc: vsx: Implement diff_pixels and get_pixels
[libav.git] / libavcodec / ppc / pixblockdsp.c
CommitLineData
f46bb608
DB
1/*
2 * Copyright (c) 2002 Brian Foley
3 * Copyright (c) 2002 Dieter Shirley
4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
5 *
6 * This file is part of Libav.
7 *
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23#include "config.h"
24#if HAVE_ALTIVEC_H
25#include <altivec.h>
26#endif
27
28#include "libavutil/attributes.h"
29#include "libavutil/cpu.h"
30#include "libavutil/ppc/cpu.h"
31#include "libavutil/ppc/types_altivec.h"
32#include "libavutil/ppc/util_altivec.h"
33#include "libavcodec/avcodec.h"
34#include "libavcodec/pixblockdsp.h"
35
da60b99a 36#if HAVE_ALTIVEC && HAVE_BIGENDIAN
f46bb608
DB
37
38static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
39 int line_size)
40{
41 int i;
7014b659
LB
42 vec_u8 perm = vec_lvsl(0, pixels);
43 const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
f46bb608
DB
44
45 for (i = 0; i < 8; i++) {
46 /* Read potentially unaligned pixels.
47 * We're reading 16 pixels, and actually only want 8,
48 * but we simply ignore the extras. */
7014b659
LB
49 vec_u8 pixl = vec_ld(0, pixels);
50 vec_u8 pixr = vec_ld(7, pixels);
51 vec_u8 bytes = vec_perm(pixl, pixr, perm);
f46bb608
DB
52
53 // Convert the bytes into shorts.
7014b659 54 vec_s16 shorts = (vec_s16)vec_mergeh(zero, bytes);
f46bb608
DB
55
56 // Save the data to the block, we assume the block is 16-byte aligned.
7014b659 57 vec_st(shorts, i * 16, (vec_s16 *)block);
f46bb608
DB
58
59 pixels += line_size;
60 }
61}
62
63static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
64 const uint8_t *s2, int stride)
65{
66 int i;
7014b659
LB
67 vec_u8 perm1 = vec_lvsl(0, s1);
68 vec_u8 perm2 = vec_lvsl(0, s2);
69 const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
70 vec_s16 shorts1, shorts2;
f46bb608
DB
71
72 for (i = 0; i < 4; i++) {
73 /* Read potentially unaligned pixels.
74 * We're reading 16 pixels, and actually only want 8,
75 * but we simply ignore the extras. */
7014b659
LB
76 vec_u8 pixl = vec_ld(0, s1);
77 vec_u8 pixr = vec_ld(15, s1);
78 vec_u8 bytes = vec_perm(pixl, pixr, perm1);
f46bb608
DB
79
80 // Convert the bytes into shorts.
7014b659 81 shorts1 = (vec_s16)vec_mergeh(zero, bytes);
f46bb608
DB
82
83 // Do the same for the second block of pixels.
84 pixl = vec_ld(0, s2);
85 pixr = vec_ld(15, s2);
86 bytes = vec_perm(pixl, pixr, perm2);
87
88 // Convert the bytes into shorts.
7014b659 89 shorts2 = (vec_s16)vec_mergeh(zero, bytes);
f46bb608
DB
90
91 // Do the subtraction.
92 shorts1 = vec_sub(shorts1, shorts2);
93
94 // Save the data to the block, we assume the block is 16-byte aligned.
7014b659 95 vec_st(shorts1, 0, (vec_s16 *)block);
f46bb608
DB
96
97 s1 += stride;
98 s2 += stride;
99 block += 8;
100
101 /* The code below is a copy of the code above...
102 * This is a manual unroll. */
103
104 /* Read potentially unaligned pixels.
105 * We're reading 16 pixels, and actually only want 8,
106 * but we simply ignore the extras. */
107 pixl = vec_ld(0, s1);
108 pixr = vec_ld(15, s1);
109 bytes = vec_perm(pixl, pixr, perm1);
110
111 // Convert the bytes into shorts.
7014b659 112 shorts1 = (vec_s16)vec_mergeh(zero, bytes);
f46bb608
DB
113
114 // Do the same for the second block of pixels.
115 pixl = vec_ld(0, s2);
116 pixr = vec_ld(15, s2);
117 bytes = vec_perm(pixl, pixr, perm2);
118
119 // Convert the bytes into shorts.
7014b659 120 shorts2 = (vec_s16)vec_mergeh(zero, bytes);
f46bb608
DB
121
122 // Do the subtraction.
123 shorts1 = vec_sub(shorts1, shorts2);
124
125 // Save the data to the block, we assume the block is 16-byte aligned.
7014b659 126 vec_st(shorts1, 0, (vec_s16 *)block);
f46bb608
DB
127
128 s1 += stride;
129 s2 += stride;
130 block += 8;
131 }
132}
133
134#endif /* HAVE_ALTIVEC */
135
d0bf20a4
LB
136#if HAVE_VSX
137static void get_pixels_vsx(int16_t *restrict block, const uint8_t *pixels,
138 int line_size)
139{
140 int i;
141 for (i = 0; i < 8; i++) {
142 vec_s16 shorts = vsx_ld_u8_s16(0, pixels);
143
144 vec_vsx_st(shorts, i * 16, block);
145
146 pixels += line_size;
147 }
148}
149
150static void diff_pixels_vsx(int16_t *restrict block, const uint8_t *s1,
151 const uint8_t *s2, int stride)
152{
153 int i;
154 vec_s16 shorts1, shorts2;
155 for (i = 0; i < 8; i++) {
156 shorts1 = vsx_ld_u8_s16(0, s1);
157 shorts2 = vsx_ld_u8_s16(0, s2);
158
159 shorts1 = vec_sub(shorts1, shorts2);
160
161 vec_vsx_st(shorts1, 0, block);
162
163 s1 += stride;
164 s2 += stride;
165 block += 8;
166 }
167}
168#endif /* HAVE_VSX */
169
f46bb608
DB
170av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
171 AVCodecContext *avctx,
172 unsigned high_bit_depth)
173{
da60b99a 174#if HAVE_ALTIVEC && HAVE_BIGENDIAN
f46bb608
DB
175 if (!PPC_ALTIVEC(av_get_cpu_flags()))
176 return;
177
178 c->diff_pixels = diff_pixels_altivec;
179
180 if (!high_bit_depth) {
181 c->get_pixels = get_pixels_altivec;
182 }
183#endif /* HAVE_ALTIVEC */
d0bf20a4
LB
184
185#if HAVE_VSX
186 if (!PPC_VSX(av_get_cpu_flags()))
187 return;
188
189 c->diff_pixels = diff_pixels_vsx;
190
191 if (!high_bit_depth)
192 c->get_pixels = get_pixels_vsx;
193#endif /* HAVE_VSX */
f46bb608 194}