x86: check for AV_CPU_FLAG_AVXSLOW where useful
[libav.git] / libavresample / x86 / audio_convert_init.c
CommitLineData
c8af852b
JR
1/*
2 * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
3 *
4 * This file is part of Libav.
5 *
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "config.h"
22#include "libavutil/cpu.h"
e0c6cce4 23#include "libavutil/x86/cpu.h"
c8af852b
JR
24#include "libavresample/audio_convert.h"
25
c0e12535
JR
26/* flat conversions */
27
b6649ab5 28void ff_conv_s16_to_s32_sse2(int16_t *dst, const int32_t *src, int len);
1168e29d 29
b6649ab5
DB
30void ff_conv_s16_to_flt_sse2(float *dst, const int16_t *src, int len);
31void ff_conv_s16_to_flt_sse4(float *dst, const int16_t *src, int len);
d721f67d 32
b6649ab5
DB
33void ff_conv_s32_to_s16_mmx (int16_t *dst, const int32_t *src, int len);
34void ff_conv_s32_to_s16_sse2(int16_t *dst, const int32_t *src, int len);
5904f25b 35
b6649ab5
DB
36void ff_conv_s32_to_flt_sse2(float *dst, const int32_t *src, int len);
37void ff_conv_s32_to_flt_avx (float *dst, const int32_t *src, int len);
97ce1ba8 38
b6649ab5 39void ff_conv_flt_to_s16_sse2(int16_t *dst, const float *src, int len);
6c63cbfe 40
b6649ab5
DB
41void ff_conv_flt_to_s32_sse2(int32_t *dst, const float *src, int len);
42void ff_conv_flt_to_s32_avx (int32_t *dst, const float *src, int len);
4e4dd717 43
c0e12535
JR
44/* interleave conversions */
45
b6649ab5
DB
46void ff_conv_s16p_to_s16_2ch_sse2(int16_t *dst, int16_t *const *src,
47 int len, int channels);
48void ff_conv_s16p_to_s16_2ch_avx (int16_t *dst, int16_t *const *src,
49 int len, int channels);
50
51void ff_conv_s16p_to_s16_6ch_sse2(int16_t *dst, int16_t *const *src,
52 int len, int channels);
53void ff_conv_s16p_to_s16_6ch_sse2slow(int16_t *dst, int16_t *const *src,
54 int len, int channels);
55void ff_conv_s16p_to_s16_6ch_avx (int16_t *dst, int16_t *const *src,
56 int len, int channels);
57
58void ff_conv_s16p_to_flt_2ch_sse2(float *dst, int16_t *const *src,
59 int len, int channels);
60void ff_conv_s16p_to_flt_2ch_avx (float *dst, int16_t *const *src,
61 int len, int channels);
62
63void ff_conv_s16p_to_flt_6ch_sse2 (float *dst, int16_t *const *src,
64 int len, int channels);
65void ff_conv_s16p_to_flt_6ch_ssse3(float *dst, int16_t *const *src,
66 int len, int channels);
67void ff_conv_s16p_to_flt_6ch_avx (float *dst, int16_t *const *src,
68 int len, int channels);
69
70void ff_conv_fltp_to_s16_2ch_sse2 (int16_t *dst, float *const *src,
71 int len, int channels);
72void ff_conv_fltp_to_s16_2ch_ssse3(int16_t *dst, float *const *src,
73 int len, int channels);
74
75void ff_conv_fltp_to_s16_6ch_sse (int16_t *dst, float *const *src,
76 int len, int channels);
77void ff_conv_fltp_to_s16_6ch_sse2(int16_t *dst, float *const *src,
78 int len, int channels);
79void ff_conv_fltp_to_s16_6ch_avx (int16_t *dst, float *const *src,
80 int len, int channels);
81
82void ff_conv_fltp_to_flt_2ch_sse(float *dst, float *const *src, int len,
83 int channels);
84void ff_conv_fltp_to_flt_2ch_avx(float *dst, float *const *src, int len,
85 int channels);
86
87void ff_conv_fltp_to_flt_6ch_mmx (float *dst, float *const *src, int len,
88 int channels);
89void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int len,
90 int channels);
91void ff_conv_fltp_to_flt_6ch_avx (float *dst, float *const *src, int len,
92 int channels);
c8af852b 93
8eeffa8a
JR
94/* deinterleave conversions */
95
b6649ab5
DB
96void ff_conv_s16_to_s16p_2ch_sse2(int16_t *const *dst, int16_t *src,
97 int len, int channels);
98void ff_conv_s16_to_s16p_2ch_ssse3(int16_t *const *dst, int16_t *src,
99 int len, int channels);
100void ff_conv_s16_to_s16p_2ch_avx (int16_t *const *dst, int16_t *src,
101 int len, int channels);
102
103void ff_conv_s16_to_s16p_6ch_sse2 (int16_t *const *dst, int16_t *src,
104 int len, int channels);
105void ff_conv_s16_to_s16p_6ch_ssse3(int16_t *const *dst, int16_t *src,
106 int len, int channels);
107void ff_conv_s16_to_s16p_6ch_avx (int16_t *const *dst, int16_t *src,
108 int len, int channels);
109
110void ff_conv_s16_to_fltp_2ch_sse2(float *const *dst, int16_t *src,
111 int len, int channels);
112void ff_conv_s16_to_fltp_2ch_avx (float *const *dst, int16_t *src,
113 int len, int channels);
114
115void ff_conv_s16_to_fltp_6ch_sse2 (float *const *dst, int16_t *src,
116 int len, int channels);
117void ff_conv_s16_to_fltp_6ch_ssse3(float *const *dst, int16_t *src,
118 int len, int channels);
119void ff_conv_s16_to_fltp_6ch_sse4 (float *const *dst, int16_t *src,
120 int len, int channels);
121void ff_conv_s16_to_fltp_6ch_avx (float *const *dst, int16_t *src,
122 int len, int channels);
123
124void ff_conv_flt_to_s16p_2ch_sse2(int16_t *const *dst, float *src,
125 int len, int channels);
126void ff_conv_flt_to_s16p_2ch_avx (int16_t *const *dst, float *src,
127 int len, int channels);
128
129void ff_conv_flt_to_s16p_6ch_sse2 (int16_t *const *dst, float *src,
130 int len, int channels);
131void ff_conv_flt_to_s16p_6ch_ssse3(int16_t *const *dst, float *src,
132 int len, int channels);
133void ff_conv_flt_to_s16p_6ch_avx (int16_t *const *dst, float *src,
134 int len, int channels);
135
136void ff_conv_flt_to_fltp_2ch_sse(float *const *dst, float *src, int len,
137 int channels);
138void ff_conv_flt_to_fltp_2ch_avx(float *const *dst, float *src, int len,
139 int channels);
140
141void ff_conv_flt_to_fltp_6ch_sse2(float *const *dst, float *src, int len,
142 int channels);
143void ff_conv_flt_to_fltp_6ch_avx (float *const *dst, float *src, int len,
144 int channels);
06e751a4 145
c8af852b
JR
146av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
147{
3ac7fa81 148 int cpu_flags = av_get_cpu_flags();
c8af852b 149
3ac7fa81 150 if (EXTERNAL_MMX(cpu_flags)) {
5904f25b
JR
151 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32,
152 0, 1, 8, "MMX", ff_conv_s32_to_s16_mmx);
c8af852b
JR
153 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
154 6, 1, 4, "MMX", ff_conv_fltp_to_flt_6ch_mmx);
155 }
3ac7fa81 156 if (EXTERNAL_SSE(cpu_flags)) {
d5b4e50c
JR
157 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
158 6, 1, 2, "SSE", ff_conv_fltp_to_s16_6ch_sse);
b66e20d2
JR
159 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
160 2, 16, 8, "SSE", ff_conv_fltp_to_flt_2ch_sse);
e07c9705
JR
161 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
162 2, 16, 4, "SSE", ff_conv_flt_to_fltp_2ch_sse);
d5b4e50c 163 }
3ac7fa81
DB
164 if (EXTERNAL_SSE2(cpu_flags)) {
165 if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) {
5904f25b
JR
166 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32,
167 0, 16, 16, "SSE2", ff_conv_s32_to_s16_sse2);
13df7d2d
JR
168 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
169 6, 16, 8, "SSE2", ff_conv_s16p_to_s16_6ch_sse2);
d5b4e50c
JR
170 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
171 6, 16, 4, "SSE2", ff_conv_fltp_to_s16_6ch_sse2);
13df7d2d
JR
172 } else {
173 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
174 6, 1, 4, "SSE2SLOW", ff_conv_s16p_to_s16_6ch_sse2slow);
5904f25b 175 }
1168e29d
JR
176 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S16,
177 0, 16, 8, "SSE2", ff_conv_s16_to_s32_sse2);
d721f67d
JR
178 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
179 0, 16, 8, "SSE2", ff_conv_s16_to_flt_sse2);
97ce1ba8
JR
180 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32,
181 0, 16, 8, "SSE2", ff_conv_s32_to_flt_sse2);
6c63cbfe
JR
182 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT,
183 0, 16, 16, "SSE2", ff_conv_flt_to_s16_sse2);
4e4dd717
JR
184 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT,
185 0, 16, 16, "SSE2", ff_conv_flt_to_s32_sse2);
c0e12535
JR
186 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
187 2, 16, 16, "SSE2", ff_conv_s16p_to_s16_2ch_sse2);
46f929ad
JR
188 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
189 2, 16, 8, "SSE2", ff_conv_s16p_to_flt_2ch_sse2);
90cc27f8
JR
190 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
191 6, 16, 4, "SSE2", ff_conv_s16p_to_flt_6ch_sse2);
a58a0139
JR
192 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
193 2, 16, 4, "SSE2", ff_conv_fltp_to_s16_2ch_sse2);
8eeffa8a
JR
194 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
195 2, 16, 8, "SSE2", ff_conv_s16_to_s16p_2ch_sse2);
205ace88
JR
196 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
197 6, 16, 4, "SSE2", ff_conv_s16_to_s16p_6ch_sse2);
91851a7b
JR
198 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
199 2, 16, 8, "SSE2", ff_conv_s16_to_fltp_2ch_sse2);
6092dafb
JR
200 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
201 6, 16, 4, "SSE2", ff_conv_s16_to_fltp_6ch_sse2);
31d0d718
JR
202 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
203 2, 16, 8, "SSE2", ff_conv_flt_to_s16p_2ch_sse2);
5245c9f3
JR
204 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
205 6, 16, 4, "SSE2", ff_conv_flt_to_s16p_6ch_sse2);
06e751a4
JR
206 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
207 6, 16, 4, "SSE2", ff_conv_flt_to_fltp_6ch_sse2);
90cc27f8 208 }
3ac7fa81 209 if (EXTERNAL_SSSE3(cpu_flags)) {
90cc27f8
JR
210 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
211 6, 16, 4, "SSSE3", ff_conv_s16p_to_flt_6ch_ssse3);
a58a0139
JR
212 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
213 2, 16, 4, "SSSE3", ff_conv_fltp_to_s16_2ch_ssse3);
8eeffa8a
JR
214 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
215 2, 16, 8, "SSSE3", ff_conv_s16_to_s16p_2ch_ssse3);
205ace88
JR
216 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
217 6, 16, 4, "SSSE3", ff_conv_s16_to_s16p_6ch_ssse3);
6092dafb
JR
218 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
219 6, 16, 4, "SSSE3", ff_conv_s16_to_fltp_6ch_ssse3);
5245c9f3
JR
220 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
221 6, 16, 4, "SSSE3", ff_conv_flt_to_s16p_6ch_ssse3);
d721f67d 222 }
3ac7fa81 223 if (EXTERNAL_SSE4(cpu_flags)) {
d721f67d
JR
224 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
225 0, 16, 8, "SSE4", ff_conv_s16_to_flt_sse4);
14a34d90
JR
226 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
227 6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4);
1168e29d 228 }
d68c0538 229 if (EXTERNAL_AVX_FAST(cpu_flags)) {
97ce1ba8
JR
230 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32,
231 0, 32, 16, "AVX", ff_conv_s32_to_flt_avx);
4e4dd717
JR
232 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT,
233 0, 32, 32, "AVX", ff_conv_flt_to_s32_avx);
d68c0538
JA
234 }
235 if (EXTERNAL_AVX(cpu_flags)) {
c0e12535
JR
236 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
237 2, 16, 16, "AVX", ff_conv_s16p_to_s16_2ch_avx);
13df7d2d
JR
238 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
239 6, 16, 8, "AVX", ff_conv_s16p_to_s16_6ch_avx);
46f929ad
JR
240 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
241 2, 16, 8, "AVX", ff_conv_s16p_to_flt_2ch_avx);
90cc27f8
JR
242 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
243 6, 16, 4, "AVX", ff_conv_s16p_to_flt_6ch_avx);
d5b4e50c
JR
244 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
245 6, 16, 4, "AVX", ff_conv_fltp_to_s16_6ch_avx);
14a34d90
JR
246 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
247 6, 16, 4, "AVX", ff_conv_fltp_to_flt_6ch_avx);
8eeffa8a
JR
248 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
249 2, 16, 8, "AVX", ff_conv_s16_to_s16p_2ch_avx);
205ace88
JR
250 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
251 6, 16, 4, "AVX", ff_conv_s16_to_s16p_6ch_avx);
91851a7b
JR
252 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
253 2, 16, 8, "AVX", ff_conv_s16_to_fltp_2ch_avx);
6092dafb
JR
254 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
255 6, 16, 4, "AVX", ff_conv_s16_to_fltp_6ch_avx);
31d0d718
JR
256 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
257 2, 16, 8, "AVX", ff_conv_flt_to_s16p_2ch_avx);
5245c9f3
JR
258 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
259 6, 16, 4, "AVX", ff_conv_flt_to_s16p_6ch_avx);
e07c9705
JR
260 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
261 2, 16, 4, "AVX", ff_conv_flt_to_fltp_2ch_avx);
06e751a4
JR
262 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
263 6, 16, 4, "AVX", ff_conv_flt_to_fltp_6ch_avx);
97ce1ba8 264 }
c8af852b 265}