Add libavresample
[libav.git] / libavresample / x86 / audio_convert.asm
1 ;******************************************************************************
2 ;* x86 optimized Format Conversion Utils
3 ;* Copyright (c) 2008 Loren Merritt
4 ;*
5 ;* This file is part of Libav.
6 ;*
7 ;* Libav is free software; you can redistribute it and/or
8 ;* modify it under the terms of the GNU Lesser General Public
9 ;* License as published by the Free Software Foundation; either
10 ;* version 2.1 of the License, or (at your option) any later version.
11 ;*
12 ;* Libav is distributed in the hope that it will be useful,
13 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;* Lesser General Public License for more details.
16 ;*
17 ;* You should have received a copy of the GNU Lesser General Public
18 ;* License along with Libav; if not, write to the Free Software
19 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 ;******************************************************************************
21
22 %include "x86inc.asm"
23 %include "x86util.asm"
24
25 SECTION_TEXT
26
27 ;-----------------------------------------------------------------------------
28 ; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len,
29 ; int channels);
30 ;-----------------------------------------------------------------------------
31
32 %macro CONV_FLTP_TO_FLT_6CH 0
33 cglobal conv_fltp_to_flt_6ch, 2,8,7, dst, src, src1, src2, src3, src4, src5, len
34 %if ARCH_X86_64
35 mov lend, r2d
36 %else
37 %define lend dword r2m
38 %endif
39 mov src1q, [srcq+1*gprsize]
40 mov src2q, [srcq+2*gprsize]
41 mov src3q, [srcq+3*gprsize]
42 mov src4q, [srcq+4*gprsize]
43 mov src5q, [srcq+5*gprsize]
44 mov srcq, [srcq]
45 sub src1q, srcq
46 sub src2q, srcq
47 sub src3q, srcq
48 sub src4q, srcq
49 sub src5q, srcq
50 .loop:
51 mova m0, [srcq ]
52 mova m1, [srcq+src1q]
53 mova m2, [srcq+src2q]
54 mova m3, [srcq+src3q]
55 mova m4, [srcq+src4q]
56 mova m5, [srcq+src5q]
57 %if cpuflag(sse)
58 SBUTTERFLYPS 0, 1, 6
59 SBUTTERFLYPS 2, 3, 6
60 SBUTTERFLYPS 4, 5, 6
61
62 movaps m6, m4
63 shufps m4, m0, q3210
64 movlhps m0, m2
65 movhlps m6, m2
66 movaps [dstq ], m0
67 movaps [dstq+16], m4
68 movaps [dstq+32], m6
69
70 movaps m6, m5
71 shufps m5, m1, q3210
72 movlhps m1, m3
73 movhlps m6, m3
74 movaps [dstq+48], m1
75 movaps [dstq+64], m5
76 movaps [dstq+80], m6
77 %else ; mmx
78 SBUTTERFLY dq, 0, 1, 6
79 SBUTTERFLY dq, 2, 3, 6
80 SBUTTERFLY dq, 4, 5, 6
81
82 movq [dstq ], m0
83 movq [dstq+ 8], m2
84 movq [dstq+16], m4
85 movq [dstq+24], m1
86 movq [dstq+32], m3
87 movq [dstq+40], m5
88 %endif
89 add srcq, mmsize
90 add dstq, mmsize*6
91 sub lend, mmsize/4
92 jg .loop
93 %if mmsize == 8
94 emms
95 RET
96 %else
97 REP_RET
98 %endif
99 %endmacro
100
101 INIT_MMX mmx
102 CONV_FLTP_TO_FLT_6CH
103 INIT_XMM sse
104 CONV_FLTP_TO_FLT_6CH