dsputil: Split off pixel block routines into their own context
[libav.git] / libavcodec / ppc / pixblockdsp.c
CommitLineData
f46bb608
DB
1/*
2 * Copyright (c) 2002 Brian Foley
3 * Copyright (c) 2002 Dieter Shirley
4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
5 *
6 * This file is part of Libav.
7 *
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23#include "config.h"
24#if HAVE_ALTIVEC_H
25#include <altivec.h>
26#endif
27
28#include "libavutil/attributes.h"
29#include "libavutil/cpu.h"
30#include "libavutil/ppc/cpu.h"
31#include "libavutil/ppc/types_altivec.h"
32#include "libavutil/ppc/util_altivec.h"
33#include "libavcodec/avcodec.h"
34#include "libavcodec/pixblockdsp.h"
35
36#if HAVE_ALTIVEC
37
38static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
39 int line_size)
40{
41 int i;
42 vector unsigned char perm = vec_lvsl(0, pixels);
43 const vector unsigned char zero =
44 (const vector unsigned char) vec_splat_u8(0);
45
46 for (i = 0; i < 8; i++) {
47 /* Read potentially unaligned pixels.
48 * We're reading 16 pixels, and actually only want 8,
49 * but we simply ignore the extras. */
50 vector unsigned char pixl = vec_ld(0, pixels);
51 vector unsigned char pixr = vec_ld(7, pixels);
52 vector unsigned char bytes = vec_perm(pixl, pixr, perm);
53
54 // Convert the bytes into shorts.
55 vector signed short shorts = (vector signed short) vec_mergeh(zero,
56 bytes);
57
58 // Save the data to the block, we assume the block is 16-byte aligned.
59 vec_st(shorts, i * 16, (vector signed short *) block);
60
61 pixels += line_size;
62 }
63}
64
65static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
66 const uint8_t *s2, int stride)
67{
68 int i;
69 vector unsigned char perm1 = vec_lvsl(0, s1);
70 vector unsigned char perm2 = vec_lvsl(0, s2);
71 const vector unsigned char zero =
72 (const vector unsigned char) vec_splat_u8(0);
73 vector signed short shorts1, shorts2;
74
75 for (i = 0; i < 4; i++) {
76 /* Read potentially unaligned pixels.
77 * We're reading 16 pixels, and actually only want 8,
78 * but we simply ignore the extras. */
79 vector unsigned char pixl = vec_ld(0, s1);
80 vector unsigned char pixr = vec_ld(15, s1);
81 vector unsigned char bytes = vec_perm(pixl, pixr, perm1);
82
83 // Convert the bytes into shorts.
84 shorts1 = (vector signed short) vec_mergeh(zero, bytes);
85
86 // Do the same for the second block of pixels.
87 pixl = vec_ld(0, s2);
88 pixr = vec_ld(15, s2);
89 bytes = vec_perm(pixl, pixr, perm2);
90
91 // Convert the bytes into shorts.
92 shorts2 = (vector signed short) vec_mergeh(zero, bytes);
93
94 // Do the subtraction.
95 shorts1 = vec_sub(shorts1, shorts2);
96
97 // Save the data to the block, we assume the block is 16-byte aligned.
98 vec_st(shorts1, 0, (vector signed short *) block);
99
100 s1 += stride;
101 s2 += stride;
102 block += 8;
103
104 /* The code below is a copy of the code above...
105 * This is a manual unroll. */
106
107 /* Read potentially unaligned pixels.
108 * We're reading 16 pixels, and actually only want 8,
109 * but we simply ignore the extras. */
110 pixl = vec_ld(0, s1);
111 pixr = vec_ld(15, s1);
112 bytes = vec_perm(pixl, pixr, perm1);
113
114 // Convert the bytes into shorts.
115 shorts1 = (vector signed short) vec_mergeh(zero, bytes);
116
117 // Do the same for the second block of pixels.
118 pixl = vec_ld(0, s2);
119 pixr = vec_ld(15, s2);
120 bytes = vec_perm(pixl, pixr, perm2);
121
122 // Convert the bytes into shorts.
123 shorts2 = (vector signed short) vec_mergeh(zero, bytes);
124
125 // Do the subtraction.
126 shorts1 = vec_sub(shorts1, shorts2);
127
128 // Save the data to the block, we assume the block is 16-byte aligned.
129 vec_st(shorts1, 0, (vector signed short *) block);
130
131 s1 += stride;
132 s2 += stride;
133 block += 8;
134 }
135}
136
137#endif /* HAVE_ALTIVEC */
138
139av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
140 AVCodecContext *avctx,
141 unsigned high_bit_depth)
142{
143#if HAVE_ALTIVEC
144 if (!PPC_ALTIVEC(av_get_cpu_flags()))
145 return;
146
147 c->diff_pixels = diff_pixels_altivec;
148
149 if (!high_bit_depth) {
150 c->get_pixels = get_pixels_altivec;
151 }
152#endif /* HAVE_ALTIVEC */
153}