2c28e29d3f596184abb9d1d7b9bdfd163e9327c6
[libav.git] / libavcodec / ppc / pixblockdsp.c
1 /*
2 * Copyright (c) 2002 Brian Foley
3 * Copyright (c) 2002 Dieter Shirley
4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
5 *
6 * This file is part of Libav.
7 *
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include "config.h"
24 #if HAVE_ALTIVEC_H
25 #include <altivec.h>
26 #endif
27
28 #include "libavutil/attributes.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/ppc/cpu.h"
31 #include "libavutil/ppc/types_altivec.h"
32 #include "libavutil/ppc/util_altivec.h"
33 #include "libavcodec/avcodec.h"
34 #include "libavcodec/pixblockdsp.h"
35
36 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
37
38 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
39 int line_size)
40 {
41 int i;
42 vec_u8 perm = vec_lvsl(0, pixels);
43 const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
44
45 for (i = 0; i < 8; i++) {
46 /* Read potentially unaligned pixels.
47 * We're reading 16 pixels, and actually only want 8,
48 * but we simply ignore the extras. */
49 vec_u8 pixl = vec_ld(0, pixels);
50 vec_u8 pixr = vec_ld(7, pixels);
51 vec_u8 bytes = vec_perm(pixl, pixr, perm);
52
53 // Convert the bytes into shorts.
54 vec_s16 shorts = (vec_s16)vec_mergeh(zero, bytes);
55
56 // Save the data to the block, we assume the block is 16-byte aligned.
57 vec_st(shorts, i * 16, (vec_s16 *)block);
58
59 pixels += line_size;
60 }
61 }
62
63 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
64 const uint8_t *s2, int stride)
65 {
66 int i;
67 vec_u8 perm1 = vec_lvsl(0, s1);
68 vec_u8 perm2 = vec_lvsl(0, s2);
69 const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
70 vec_s16 shorts1, shorts2;
71
72 for (i = 0; i < 4; i++) {
73 /* Read potentially unaligned pixels.
74 * We're reading 16 pixels, and actually only want 8,
75 * but we simply ignore the extras. */
76 vec_u8 pixl = vec_ld(0, s1);
77 vec_u8 pixr = vec_ld(15, s1);
78 vec_u8 bytes = vec_perm(pixl, pixr, perm1);
79
80 // Convert the bytes into shorts.
81 shorts1 = (vec_s16)vec_mergeh(zero, bytes);
82
83 // Do the same for the second block of pixels.
84 pixl = vec_ld(0, s2);
85 pixr = vec_ld(15, s2);
86 bytes = vec_perm(pixl, pixr, perm2);
87
88 // Convert the bytes into shorts.
89 shorts2 = (vec_s16)vec_mergeh(zero, bytes);
90
91 // Do the subtraction.
92 shorts1 = vec_sub(shorts1, shorts2);
93
94 // Save the data to the block, we assume the block is 16-byte aligned.
95 vec_st(shorts1, 0, (vec_s16 *)block);
96
97 s1 += stride;
98 s2 += stride;
99 block += 8;
100
101 /* The code below is a copy of the code above...
102 * This is a manual unroll. */
103
104 /* Read potentially unaligned pixels.
105 * We're reading 16 pixels, and actually only want 8,
106 * but we simply ignore the extras. */
107 pixl = vec_ld(0, s1);
108 pixr = vec_ld(15, s1);
109 bytes = vec_perm(pixl, pixr, perm1);
110
111 // Convert the bytes into shorts.
112 shorts1 = (vec_s16)vec_mergeh(zero, bytes);
113
114 // Do the same for the second block of pixels.
115 pixl = vec_ld(0, s2);
116 pixr = vec_ld(15, s2);
117 bytes = vec_perm(pixl, pixr, perm2);
118
119 // Convert the bytes into shorts.
120 shorts2 = (vec_s16)vec_mergeh(zero, bytes);
121
122 // Do the subtraction.
123 shorts1 = vec_sub(shorts1, shorts2);
124
125 // Save the data to the block, we assume the block is 16-byte aligned.
126 vec_st(shorts1, 0, (vec_s16 *)block);
127
128 s1 += stride;
129 s2 += stride;
130 block += 8;
131 }
132 }
133
134 #endif /* HAVE_ALTIVEC */
135
136 av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
137 AVCodecContext *avctx,
138 unsigned high_bit_depth)
139 {
140 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
141 if (!PPC_ALTIVEC(av_get_cpu_flags()))
142 return;
143
144 c->diff_pixels = diff_pixels_altivec;
145
146 if (!high_bit_depth) {
147 c->get_pixels = get_pixels_altivec;
148 }
149 #endif /* HAVE_ALTIVEC */
150 }