Commit | Line | Data |
---|---|---|
241807f3 LB |
1 | /* |
2 | * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org> | |
3 | * | |
b78e7197 DB |
4 | * This file is part of FFmpeg. |
5 | * | |
6 | * FFmpeg is free software; you can redistribute it and/or | |
241807f3 LB |
7 | * modify it under the terms of the GNU Lesser General Public |
8 | * License as published by the Free Software Foundation; either | |
b78e7197 | 9 | * version 2.1 of the License, or (at your option) any later version. |
241807f3 | 10 | * |
b78e7197 | 11 | * FFmpeg is distributed in the hope that it will be useful, |
241807f3 LB |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
b78e7197 | 17 | * License along with FFmpeg; if not, write to the Free Software |
241807f3 LB |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | */ | |
20 | ||
21 | #include "../dsputil.h" | |
22 | ||
23 | #include "gcc_fixes.h" | |
24 | ||
25 | #include "dsputil_altivec.h" | |
26 | ||
27 | static void vector_fmul_altivec(float *dst, const float *src, int len) | |
28 | { | |
29 | int i; | |
30 | vector float d0, d1, s, zero = (vector float)vec_splat_u32(0); | |
31 | for(i=0; i<len-7; i+=8) { | |
32 | d0 = vec_ld(0, dst+i); | |
33 | s = vec_ld(0, src+i); | |
34 | d1 = vec_ld(16, dst+i); | |
35 | d0 = vec_madd(d0, s, zero); | |
36 | d1 = vec_madd(d1, vec_ld(16,src+i), zero); | |
37 | vec_st(d0, 0, dst+i); | |
38 | vec_st(d1, 16, dst+i); | |
39 | } | |
40 | } | |
41 | ||
42 | static void vector_fmul_reverse_altivec(float *dst, const float *src0, | |
43 | const float *src1, int len) | |
44 | { | |
45 | int i; | |
46 | vector float d, s0, s1, h0, l0, | |
47 | s2, s3, zero = (vector float)vec_splat_u32(0); | |
48 | src1 += len-4; | |
49 | for(i=0; i<len-7; i+=8) { | |
50 | s1 = vec_ld(0, src1-i); // [a,b,c,d] | |
51 | s0 = vec_ld(0, src0+i); | |
52 | l0 = vec_mergel(s1, s1); // [c,c,d,d] | |
53 | s3 = vec_ld(-16, src1-i); | |
54 | h0 = vec_mergeh(s1, s1); // [a,a,b,b] | |
55 | s2 = vec_ld(16, src0+i); | |
56 | s1 = vec_mergeh(vec_mergel(l0,h0), // [d,b,d,b] | |
57 | vec_mergeh(l0,h0)); // [c,a,c,a] | |
58 | // [d,c,b,a] | |
59 | l0 = vec_mergel(s3, s3); | |
60 | d = vec_madd(s0, s1, zero); | |
61 | h0 = vec_mergeh(s3, s3); | |
62 | vec_st(d, 0, dst+i); | |
63 | s3 = vec_mergeh(vec_mergel(l0,h0), | |
64 | vec_mergeh(l0,h0)); | |
65 | d = vec_madd(s2, s3, zero); | |
66 | vec_st(d, 16, dst+i); | |
67 | } | |
68 | } | |
69 | ||
70 | static void vector_fmul_add_add_altivec(float *dst, const float *src0, | |
71 | const float *src1, const float *src2, | |
72 | int src3, int len, int step) | |
73 | { | |
74 | int i; | |
75 | vector float d, s0, s1, s2, t0, t1, edges; | |
76 | vector unsigned char align = vec_lvsr(0,dst), | |
77 | mask = vec_lvsl(0, dst); | |
78 | ||
241807f3 LB |
79 | #if 0 //FIXME: there is still something wrong |
80 | if (step == 2) { | |
81 | int y; | |
82 | vector float d0, d1, s3, t2; | |
83 | vector unsigned int sel = | |
84 | vec_mergeh(vec_splat_u32(-1), vec_splat_u32(0)); | |
85 | t1 = vec_ld(16, dst); | |
86 | for (i=0,y=0; i<len-3; i+=4,y+=8) { | |
87 | ||
88 | s0 = vec_ld(0,src0+i); | |
89 | s1 = vec_ld(0,src1+i); | |
90 | s2 = vec_ld(0,src2+i); | |
91 | ||
92 | // t0 = vec_ld(0, dst+y); //[x x x|a] | |
93 | // t1 = vec_ld(16, dst+y); //[b c d|e] | |
94 | t2 = vec_ld(31, dst+y); //[f g h|x] | |
95 | ||
96 | d = vec_madd(s0,s1,s2); // [A B C D] | |
97 | ||
98 | // [A A B B] | |
99 | ||
100 | // [C C D D] | |
101 | ||
102 | d0 = vec_perm(t0, t1, mask); // [a b c d] | |
103 | ||
104 | d0 = vec_sel(vec_mergeh(d, d), d0, sel); // [A b B d] | |
105 | ||
106 | edges = vec_perm(t1, t0, mask); | |
107 | ||
108 | t0 = vec_perm(edges, d0, align); // [x x x|A] | |
109 | ||
110 | t1 = vec_perm(d0, edges, align); // [b B d|e] | |
111 | ||
112 | vec_stl(t0, 0, dst+y); | |
113 | ||
114 | d1 = vec_perm(t1, t2, mask); // [e f g h] | |
115 | ||
116 | d1 = vec_sel(vec_mergel(d, d), d1, sel); // [C f D h] | |
117 | ||
118 | edges = vec_perm(t2, t1, mask); | |
119 | ||
120 | t1 = vec_perm(edges, d1, align); // [b B d|C] | |
121 | ||
122 | t2 = vec_perm(d1, edges, align); // [f D h|x] | |
123 | ||
124 | vec_stl(t1, 16, dst+y); | |
125 | ||
126 | t0 = t1; | |
127 | ||
128 | vec_stl(t2, 31, dst+y); | |
129 | ||
130 | t1 = t2; | |
131 | } | |
132 | } else | |
133 | #endif | |
134 | if (step == 1 && src3 == 0) | |
135 | for (i=0; i<len-3; i+=4) { | |
6847e61a | 136 | t0 = vec_ld(0, dst+i); |
241807f3 LB |
137 | t1 = vec_ld(15, dst+i); |
138 | s0 = vec_ld(0, src0+i); | |
139 | s1 = vec_ld(0, src1+i); | |
140 | s2 = vec_ld(0, src2+i); | |
141 | edges = vec_perm(t1 ,t0, mask); | |
142 | d = vec_madd(s0,s1,s2); | |
27303c8a LB |
143 | t1 = vec_perm(d, edges, align); |
144 | t0 = vec_perm(edges, d, align); | |
241807f3 | 145 | vec_st(t1, 15, dst+i); |
27303c8a | 146 | vec_st(t0, 0, dst+i); |
241807f3 LB |
147 | } |
148 | else | |
149 | ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step); | |
150 | } | |
151 | ||
152 | void float_to_int16_altivec(int16_t *dst, const float *src, int len) | |
153 | { | |
154 | int i; | |
155 | vector float s0, s1; | |
156 | vector signed int t0, t1; | |
157 | vector signed short d0, d1, d; | |
158 | vector unsigned char align; | |
159 | if(((long)dst)&15) //FIXME | |
160 | for(i=0; i<len-7; i+=8) { | |
161 | s0 = vec_ld(0, src+i); | |
162 | s1 = vec_ld(16, src+i); | |
163 | t0 = vec_cts(s0, 0); | |
164 | d0 = vec_ld(0, dst+i); | |
165 | t1 = vec_cts(s1, 0); | |
166 | d1 = vec_ld(15, dst+i); | |
167 | d = vec_packs(t0,t1); | |
168 | d1 = vec_perm(d1, d0, vec_lvsl(0,dst+i)); | |
169 | align = vec_lvsr(0, dst+i); | |
99d239b3 LB |
170 | d0 = vec_perm(d1, d, align); |
171 | d1 = vec_perm(d, d1, align); | |
241807f3 LB |
172 | vec_st(d0, 0, dst+i); |
173 | vec_st(d1,15, dst+i); | |
174 | } | |
175 | else | |
176 | for(i=0; i<len-7; i+=8) { | |
177 | s0 = vec_ld(0, src+i); | |
178 | s1 = vec_ld(16, src+i); | |
179 | t0 = vec_cts(s0, 0); | |
180 | t1 = vec_cts(s1, 0); | |
181 | d = vec_packs(t0,t1); | |
182 | vec_st(d, 0, dst+i); | |
183 | } | |
184 | } | |
185 | ||
186 | void float_init_altivec(DSPContext* c, AVCodecContext *avctx) | |
187 | { | |
188 | c->vector_fmul = vector_fmul_altivec; | |
189 | c->vector_fmul_reverse = vector_fmul_reverse_altivec; | |
190 | c->vector_fmul_add_add = vector_fmul_add_add_altivec; | |
191 | if(!(avctx->flags & CODEC_FLAG_BITEXACT)) | |
192 | c->float_to_int16 = float_to_int16_altivec; | |
193 | } |