a19b83d83c086d97b35e680351ebe3ac3c6ee9b9
[libav.git] / libavcodec / x86 / dsputil_init.c
1 /*
2 * This file is part of Libav.
3 *
4 * Libav is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * Libav is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with Libav; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include "config.h"
20 #include "libavutil/attributes.h"
21 #include "libavutil/cpu.h"
22 #include "libavutil/x86/cpu.h"
23 #include "libavcodec/avcodec.h"
24 #include "libavcodec/dsputil.h"
25 #include "libavcodec/simple_idct.h"
26 #include "dsputil_x86.h"
27 #include "idct_xvid.h"
28
29 int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2,
30 int order);
31 int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2,
32 int order);
33
34 void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w);
35 void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w);
36
37 void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src,
38 int32_t min, int32_t max, unsigned int len);
39 void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src,
40 int32_t min, int32_t max, unsigned int len);
41 void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src,
42 int32_t min, int32_t max, unsigned int len);
43 void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src,
44 int32_t min, int32_t max, unsigned int len);
45
46 static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
47 int cpu_flags, unsigned high_bit_depth)
48 {
49 #if HAVE_MMX_INLINE
50 c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
51 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
52 c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
53
54 if (!high_bit_depth) {
55 c->draw_edges = ff_draw_edges_mmx;
56
57 switch (avctx->idct_algo) {
58 case FF_IDCT_AUTO:
59 case FF_IDCT_SIMPLEMMX:
60 c->idct_put = ff_simple_idct_put_mmx;
61 c->idct_add = ff_simple_idct_add_mmx;
62 c->idct = ff_simple_idct_mmx;
63 c->idct_permutation_type = FF_SIMPLE_IDCT_PERM;
64 break;
65 case FF_IDCT_XVIDMMX:
66 c->idct_put = ff_idct_xvid_mmx_put;
67 c->idct_add = ff_idct_xvid_mmx_add;
68 c->idct = ff_idct_xvid_mmx;
69 break;
70 }
71 }
72
73 c->gmc = ff_gmc_mmx;
74 #endif /* HAVE_MMX_INLINE */
75
76 #if HAVE_MMX_EXTERNAL
77 c->vector_clip_int32 = ff_vector_clip_int32_mmx;
78 #endif /* HAVE_MMX_EXTERNAL */
79 }
80
81 static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
82 int cpu_flags, unsigned high_bit_depth)
83 {
84 #if HAVE_MMXEXT_INLINE
85 if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) {
86 c->idct_put = ff_idct_xvid_mmxext_put;
87 c->idct_add = ff_idct_xvid_mmxext_add;
88 c->idct = ff_idct_xvid_mmxext;
89 }
90 #endif /* HAVE_MMXEXT_INLINE */
91
92 #if HAVE_MMXEXT_EXTERNAL
93 c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext;
94 #endif /* HAVE_MMXEXT_EXTERNAL */
95 }
96
97 static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx,
98 int cpu_flags, unsigned high_bit_depth)
99 {
100 #if HAVE_SSE_INLINE
101 c->vector_clipf = ff_vector_clipf_sse;
102 #endif /* HAVE_SSE_INLINE */
103 }
104
105 static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
106 int cpu_flags, unsigned high_bit_depth)
107 {
108 #if HAVE_SSE2_INLINE
109 if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) {
110 c->idct_put = ff_idct_xvid_sse2_put;
111 c->idct_add = ff_idct_xvid_sse2_add;
112 c->idct = ff_idct_xvid_sse2;
113 c->idct_permutation_type = FF_SSE2_IDCT_PERM;
114 }
115 #endif /* HAVE_SSE2_INLINE */
116
117 #if HAVE_SSE2_EXTERNAL
118 c->scalarproduct_int16 = ff_scalarproduct_int16_sse2;
119 if (cpu_flags & AV_CPU_FLAG_ATOM) {
120 c->vector_clip_int32 = ff_vector_clip_int32_int_sse2;
121 } else {
122 c->vector_clip_int32 = ff_vector_clip_int32_sse2;
123 }
124 c->bswap_buf = ff_bswap32_buf_sse2;
125 #endif /* HAVE_SSE2_EXTERNAL */
126 }
127
128 static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
129 int cpu_flags, unsigned high_bit_depth)
130 {
131 #if HAVE_SSSE3_EXTERNAL
132 c->bswap_buf = ff_bswap32_buf_ssse3;
133 #endif /* HAVE_SSSE3_EXTERNAL */
134 }
135
136 static av_cold void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx,
137 int cpu_flags, unsigned high_bit_depth)
138 {
139 #if HAVE_SSE4_EXTERNAL
140 c->vector_clip_int32 = ff_vector_clip_int32_sse4;
141 #endif /* HAVE_SSE4_EXTERNAL */
142 }
143
144 av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx,
145 unsigned high_bit_depth)
146 {
147 int cpu_flags = av_get_cpu_flags();
148
149 if (X86_MMX(cpu_flags))
150 dsputil_init_mmx(c, avctx, cpu_flags, high_bit_depth);
151
152 if (X86_MMXEXT(cpu_flags))
153 dsputil_init_mmxext(c, avctx, cpu_flags, high_bit_depth);
154
155 if (X86_SSE(cpu_flags))
156 dsputil_init_sse(c, avctx, cpu_flags, high_bit_depth);
157
158 if (X86_SSE2(cpu_flags))
159 dsputil_init_sse2(c, avctx, cpu_flags, high_bit_depth);
160
161 if (EXTERNAL_SSSE3(cpu_flags))
162 dsputil_init_ssse3(c, avctx, cpu_flags, high_bit_depth);
163
164 if (EXTERNAL_SSE4(cpu_flags))
165 dsputil_init_sse4(c, avctx, cpu_flags, high_bit_depth);
166
167 if (CONFIG_ENCODERS)
168 ff_dsputilenc_init_mmx(c, avctx, high_bit_depth);
169 }