x86: check for AV_CPU_FLAG_AVXSLOW where useful
[libav.git] / libavresample / x86 / audio_mix_init.c
CommitLineData
c8af852b
JR
1/*
2 * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
3 *
4 * This file is part of Libav.
5 *
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "config.h"
22#include "libavutil/cpu.h"
e0c6cce4 23#include "libavutil/x86/cpu.h"
c8af852b
JR
24#include "libavresample/audio_mix.h"
25
b6649ab5
DB
26void ff_mix_2_to_1_fltp_flt_sse(float **src, float **matrix, int len,
27 int out_ch, int in_ch);
28void ff_mix_2_to_1_fltp_flt_avx(float **src, float **matrix, int len,
29 int out_ch, int in_ch);
c8af852b 30
b6649ab5
DB
31void ff_mix_2_to_1_s16p_flt_sse2(int16_t **src, float **matrix, int len,
32 int out_ch, int in_ch);
33void ff_mix_2_to_1_s16p_flt_sse4(int16_t **src, float **matrix, int len,
34 int out_ch, int in_ch);
c140fb2c 35
b6649ab5
DB
36void ff_mix_2_to_1_s16p_q8_sse2(int16_t **src, int16_t **matrix,
37 int len, int out_ch, int in_ch);
b75726cb 38
b6649ab5
DB
39void ff_mix_1_to_2_fltp_flt_sse(float **src, float **matrix, int len,
40 int out_ch, int in_ch);
41void ff_mix_1_to_2_fltp_flt_avx(float **src, float **matrix, int len,
42 int out_ch, int in_ch);
29f7490c 43
b6649ab5
DB
44void ff_mix_1_to_2_s16p_flt_sse2(int16_t **src, float **matrix, int len,
45 int out_ch, int in_ch);
46void ff_mix_1_to_2_s16p_flt_sse4(int16_t **src, float **matrix, int len,
47 int out_ch, int in_ch);
48void ff_mix_1_to_2_s16p_flt_avx (int16_t **src, float **matrix, int len,
49 int out_ch, int in_ch);
f61ce90c 50
b6649ab5
DB
51#define DEFINE_MIX_3_8_TO_1_2(chan) \
52void ff_mix_ ## chan ## _to_1_fltp_flt_sse(float **src, \
53 float **matrix, int len, \
54 int out_ch, int in_ch); \
55void ff_mix_ ## chan ## _to_2_fltp_flt_sse(float **src, \
56 float **matrix, int len, \
57 int out_ch, int in_ch); \
58 \
59void ff_mix_ ## chan ## _to_1_s16p_flt_sse2(int16_t **src, \
60 float **matrix, int len, \
61 int out_ch, int in_ch); \
62void ff_mix_ ## chan ## _to_2_s16p_flt_sse2(int16_t **src, \
63 float **matrix, int len, \
64 int out_ch, int in_ch); \
65 \
66void ff_mix_ ## chan ## _to_1_s16p_flt_sse4(int16_t **src, \
67 float **matrix, int len, \
68 int out_ch, int in_ch); \
69void ff_mix_ ## chan ## _to_2_s16p_flt_sse4(int16_t **src, \
70 float **matrix, int len, \
71 int out_ch, int in_ch); \
72 \
73void ff_mix_ ## chan ## _to_1_fltp_flt_avx(float **src, \
74 float **matrix, int len, \
75 int out_ch, int in_ch); \
76void ff_mix_ ## chan ## _to_2_fltp_flt_avx(float **src, \
77 float **matrix, int len, \
78 int out_ch, int in_ch); \
79 \
80void ff_mix_ ## chan ## _to_1_s16p_flt_avx(int16_t **src, \
81 float **matrix, int len, \
82 int out_ch, int in_ch); \
83void ff_mix_ ## chan ## _to_2_s16p_flt_avx(int16_t **src, \
84 float **matrix, int len, \
85 int out_ch, int in_ch); \
86 \
87void ff_mix_ ## chan ## _to_1_fltp_flt_fma4(float **src, \
88 float **matrix, int len, \
89 int out_ch, int in_ch); \
90void ff_mix_ ## chan ## _to_2_fltp_flt_fma4(float **src, \
91 float **matrix, int len, \
92 int out_ch, int in_ch); \
93 \
94void ff_mix_ ## chan ## _to_1_s16p_flt_fma4(int16_t **src, \
95 float **matrix, int len, \
96 int out_ch, int in_ch); \
97void ff_mix_ ## chan ## _to_2_s16p_flt_fma4(int16_t **src, \
98 float **matrix, int len, \
99 int out_ch, int in_ch);
2f096bb1
JR
100
101DEFINE_MIX_3_8_TO_1_2(3)
102DEFINE_MIX_3_8_TO_1_2(4)
103DEFINE_MIX_3_8_TO_1_2(5)
104DEFINE_MIX_3_8_TO_1_2(6)
105DEFINE_MIX_3_8_TO_1_2(7)
106DEFINE_MIX_3_8_TO_1_2(8)
107
108#define SET_MIX_3_8_TO_1_2(chan) \
3ac7fa81 109 if (EXTERNAL_SSE(cpu_flags)) { \
2f096bb1
JR
110 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
111 chan, 1, 16, 4, "SSE", \
112 ff_mix_ ## chan ## _to_1_fltp_flt_sse); \
113 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
114 chan, 2, 16, 4, "SSE", \
115 ff_mix_## chan ##_to_2_fltp_flt_sse); \
116 } \
3ac7fa81 117 if (EXTERNAL_SSE2(cpu_flags)) { \
2f096bb1
JR
118 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
119 chan, 1, 16, 8, "SSE2", \
120 ff_mix_ ## chan ## _to_1_s16p_flt_sse2); \
121 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
122 chan, 2, 16, 8, "SSE2", \
123 ff_mix_ ## chan ## _to_2_s16p_flt_sse2); \
124 } \
3ac7fa81 125 if (EXTERNAL_SSE4(cpu_flags)) { \
2f096bb1
JR
126 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
127 chan, 1, 16, 8, "SSE4", \
128 ff_mix_ ## chan ## _to_1_s16p_flt_sse4); \
129 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
130 chan, 2, 16, 8, "SSE4", \
131 ff_mix_ ## chan ## _to_2_s16p_flt_sse4); \
132 } \
3ac7fa81 133 if (EXTERNAL_AVX(cpu_flags)) { \
2f096bb1
JR
134 int ptr_align = 32; \
135 int smp_align = 8; \
136 if (ARCH_X86_32 || chan >= 6) { \
137 ptr_align = 16; \
138 smp_align = 4; \
139 } \
140 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
141 chan, 1, ptr_align, smp_align, "AVX", \
142 ff_mix_ ## chan ## _to_1_fltp_flt_avx); \
143 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
144 chan, 2, ptr_align, smp_align, "AVX", \
145 ff_mix_ ## chan ## _to_2_fltp_flt_avx); \
146 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
147 chan, 1, 16, 8, "AVX", \
148 ff_mix_ ## chan ## _to_1_s16p_flt_avx); \
149 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
150 chan, 2, 16, 8, "AVX", \
151 ff_mix_ ## chan ## _to_2_s16p_flt_avx); \
152 } \
3ac7fa81 153 if (EXTERNAL_FMA4(cpu_flags)) { \
2f096bb1
JR
154 int ptr_align = 32; \
155 int smp_align = 8; \
156 if (ARCH_X86_32 || chan >= 6) { \
157 ptr_align = 16; \
158 smp_align = 4; \
159 } \
160 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
161 chan, 1, ptr_align, smp_align, "FMA4", \
162 ff_mix_ ## chan ## _to_1_fltp_flt_fma4); \
163 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
164 chan, 2, ptr_align, smp_align, "FMA4", \
165 ff_mix_ ## chan ## _to_2_fltp_flt_fma4); \
166 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
167 chan, 1, 16, 8, "FMA4", \
168 ff_mix_ ## chan ## _to_1_s16p_flt_fma4); \
169 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
170 chan, 2, 16, 8, "FMA4", \
171 ff_mix_ ## chan ## _to_2_s16p_flt_fma4); \
172 }
173
c8af852b
JR
174av_cold void ff_audio_mix_init_x86(AudioMix *am)
175{
3ac7fa81 176 int cpu_flags = av_get_cpu_flags();
c8af852b 177
3ac7fa81 178 if (EXTERNAL_SSE(cpu_flags)) {
c8af852b
JR
179 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
180 2, 1, 16, 8, "SSE", ff_mix_2_to_1_fltp_flt_sse);
29f7490c
JR
181 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
182 1, 2, 16, 4, "SSE", ff_mix_1_to_2_fltp_flt_sse);
c8af852b 183 }
3ac7fa81 184 if (EXTERNAL_SSE2(cpu_flags)) {
c140fb2c
JR
185 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
186 2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_flt_sse2);
b75726cb
JR
187 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_Q8,
188 2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_q8_sse2);
f61ce90c
JR
189 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
190 1, 2, 16, 8, "SSE2", ff_mix_1_to_2_s16p_flt_sse2);
c140fb2c 191 }
3ac7fa81 192 if (EXTERNAL_SSE4(cpu_flags)) {
c140fb2c
JR
193 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
194 2, 1, 16, 8, "SSE4", ff_mix_2_to_1_s16p_flt_sse4);
f61ce90c
JR
195 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
196 1, 2, 16, 8, "SSE4", ff_mix_1_to_2_s16p_flt_sse4);
c140fb2c 197 }
d68c0538 198 if (EXTERNAL_AVX_FAST(cpu_flags)) {
c8af852b
JR
199 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
200 2, 1, 32, 16, "AVX", ff_mix_2_to_1_fltp_flt_avx);
29f7490c
JR
201 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
202 1, 2, 32, 8, "AVX", ff_mix_1_to_2_fltp_flt_avx);
d68c0538
JA
203 }
204 if (EXTERNAL_AVX(cpu_flags)) {
f61ce90c
JR
205 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
206 1, 2, 16, 8, "AVX", ff_mix_1_to_2_s16p_flt_avx);
c8af852b 207 }
2f096bb1
JR
208
209 SET_MIX_3_8_TO_1_2(3)
210 SET_MIX_3_8_TO_1_2(4)
211 SET_MIX_3_8_TO_1_2(5)
212 SET_MIX_3_8_TO_1_2(6)
213 SET_MIX_3_8_TO_1_2(7)
214 SET_MIX_3_8_TO_1_2(8)
c8af852b 215}