cpu detection: avoid a signed overflow
[libav.git] / libavutil / x86 / cpu.c
CommitLineData
04d7f601
DB
1/*
2 * CPU detection code, extracted from mmx.h
3 * (c)1997-99 by H. Dietz and R. Fisher
4 * Converted to C and improved by Fabrice Bellard.
5 *
2912e87a 6 * This file is part of Libav.
b78e7197 7 *
2912e87a 8 * Libav is free software; you can redistribute it and/or
04d7f601
DB
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
b78e7197 11 * version 2.1 of the License, or (at your option) any later version.
04d7f601 12 *
2912e87a 13 * Libav is distributed in the hope that it will be useful,
04d7f601
DB
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
2912e87a 19 * License along with Libav; if not, write to the Free Software
04d7f601
DB
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
de6d9b64
FB
22
23#include <stdlib.h>
65d45cea 24#include <string.h>
245976da 25#include "libavutil/x86_cpu.h"
c6c98d08 26#include "libavutil/cpu.h"
ade6e7f3 27
1d20b11a
NK
28/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
29#define cpuid(index,eax,ebx,ecx,edx)\
be449fca 30 __asm__ volatile\
bb270c08 31 ("mov %%"REG_b", %%"REG_S"\n\t"\
1d20b11a 32 "cpuid\n\t"\
053dea12 33 "xchg %%"REG_b", %%"REG_S\
1d20b11a
NK
34 : "=a" (eax), "=S" (ebx),\
35 "=c" (ecx), "=d" (edx)\
c10e9f70 36 : "0" (index));
de6d9b64 37
87f1355f 38#define xgetbv(index,eax,edx) \
ef669538 39 __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (index))
87f1355f 40
de6d9b64 41/* Function to test if multimedia instructions are supported... */
9275438a 42int ff_get_cpu_flags_x86(void)
de6d9b64 43{
e7ddb0cf 44 int rval = 0;
de6d9b64 45 int eax, ebx, ecx, edx;
e42a152b 46 int max_std_level, max_ext_level, std_caps=0, ext_caps=0;
6526976f
RB
47 int family=0, model=0;
48 union { int i[3]; char c[12]; } vendor;
115329f1 49
d05f808d
ZM
50#if ARCH_X86_32
51 x86_reg a, c;
be449fca 52 __asm__ volatile (
80465c7e
DB
53 /* See if CPUID instruction is supported ... */
54 /* ... Get copies of EFLAGS into eax and ecx */
d05f808d 55 "pushfl\n\t"
80465c7e
DB
56 "pop %0\n\t"
57 "mov %0, %1\n\t"
115329f1 58
80465c7e
DB
59 /* ... Toggle the ID bit in one copy and store */
60 /* to the EFLAGS reg */
61 "xor $0x200000, %0\n\t"
62 "push %0\n\t"
d05f808d 63 "popfl\n\t"
115329f1 64
80465c7e 65 /* ... Get the (hopefully modified) EFLAGS */
d05f808d 66 "pushfl\n\t"
80465c7e
DB
67 "pop %0\n\t"
68 : "=a" (a), "=c" (c)
69 :
70 : "cc"
71 );
115329f1 72
8b4d077f 73 if (a == c)
de6d9b64 74 return 0; /* CPUID not supported */
d05f808d 75#endif
de6d9b64 76
6526976f 77 cpuid(0, max_std_level, vendor.i[0], vendor.i[2], vendor.i[1]);
e42a152b
MN
78
79 if(max_std_level >= 1){
80 cpuid(1, eax, ebx, ecx, std_caps);
6526976f
RB
81 family = ((eax>>8)&0xf) + ((eax>>20)&0xff);
82 model = ((eax>>4)&0xf) + ((eax>>12)&0xf0);
e42a152b 83 if (std_caps & (1<<23))
7160bb71 84 rval |= AV_CPU_FLAG_MMX;
115329f1 85 if (std_caps & (1<<25))
7160bb71 86 rval |= AV_CPU_FLAG_MMX2
b250f9c6 87#if HAVE_SSE
7160bb71 88 | AV_CPU_FLAG_SSE;
115329f1 89 if (std_caps & (1<<26))
7160bb71 90 rval |= AV_CPU_FLAG_SSE2;
392f6da8 91 if (ecx & 1)
7160bb71 92 rval |= AV_CPU_FLAG_SSE3;
5a5c770d 93 if (ecx & 0x00000200 )
7160bb71 94 rval |= AV_CPU_FLAG_SSSE3;
710441c2 95 if (ecx & 0x00080000 )
7160bb71 96 rval |= AV_CPU_FLAG_SSE4;
710441c2 97 if (ecx & 0x00100000 )
7160bb71 98 rval |= AV_CPU_FLAG_SSE42;
87f1355f
MR
99#if HAVE_AVX
100 /* Check OXSAVE and AVX bits */
101 if ((ecx & 0x18000000) == 0x18000000) {
102 /* Check for OS support */
103 xgetbv(0, eax, edx);
104 if ((eax & 0x6) == 0x6)
105 rval |= AV_CPU_FLAG_AVX;
106 }
107#endif
1435e4cc
MN
108#endif
109 ;
e42a152b
MN
110 }
111
112 cpuid(0x80000000, max_ext_level, ebx, ecx, edx);
113
114 if(max_ext_level >= 0x80000001){
115 cpuid(0x80000001, eax, ebx, ecx, ext_caps);
5938e021 116 if (ext_caps & (1U<<31))
7160bb71 117 rval |= AV_CPU_FLAG_3DNOW;
e42a152b 118 if (ext_caps & (1<<30))
7160bb71 119 rval |= AV_CPU_FLAG_3DNOWEXT;
e42a152b 120 if (ext_caps & (1<<23))
7160bb71 121 rval |= AV_CPU_FLAG_MMX;
392f6da8 122 if (ext_caps & (1<<22))
7160bb71 123 rval |= AV_CPU_FLAG_MMX2;
74b1f968
JR
124
125 /* Allow for selectively disabling SSE2 functions on AMD processors
126 with SSE2 support but not SSE4a. This includes Athlon64, some
127 Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
128 than SSE2 often enough to utilize this special-case flag.
129 AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
130 so that SSE2 is used unless explicitly disabled by checking
131 AV_CPU_FLAG_SSE2SLOW. */
132 if (!strncmp(vendor.c, "AuthenticAMD", 12) &&
133 rval & AV_CPU_FLAG_SSE2 && !(ecx & 0x00000040)) {
134 rval |= AV_CPU_FLAG_SSE2SLOW;
135 }
de6d9b64 136 }
392f6da8 137
eba586b0
JR
138 if (!strncmp(vendor.c, "GenuineIntel", 12)) {
139 if (family == 6 && (model == 9 || model == 13 || model == 14)) {
45ed8225
JR
140 /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 6/14 (core1 "yonah")
141 * theoretically support sse2, but it's usually slower than mmx,
142 * so let's just pretend they don't. AV_CPU_FLAG_SSE2 is disabled and
143 * AV_CPU_FLAG_SSE2SLOW is enabled so that SSE2 is not used unless
144 * explicitly enabled by checking AV_CPU_FLAG_SSE2SLOW. The same
145 * situation applies for AV_CPU_FLAG_SSE3 and AV_CPU_FLAG_SSE3SLOW. */
146 if (rval & AV_CPU_FLAG_SSE2) rval ^= AV_CPU_FLAG_SSE2SLOW|AV_CPU_FLAG_SSE2;
147 if (rval & AV_CPU_FLAG_SSE3) rval ^= AV_CPU_FLAG_SSE3SLOW|AV_CPU_FLAG_SSE3;
eba586b0
JR
148 }
149 /* The Atom processor has SSSE3 support, which is useful in many cases,
150 * but sometimes the SSSE3 version is slower than the SSE2 equivalent
151 * on the Atom, but is generally faster on other processors supporting
152 * SSSE3. This flag allows for selectively disabling certain SSSE3
153 * functions on the Atom. */
154 if (family == 6 && model == 28)
155 rval |= AV_CPU_FLAG_ATOM;
6526976f
RB
156 }
157
e42a152b 158 return rval;
de6d9b64 159}