Commit | Line | Data |
---|---|---|
8bc67ec2 HG |
1 | /* |
2 | * Assembly testing and benchmarking tool | |
3 | * Copyright (c) 2015 Henrik Gramner | |
4 | * Copyright (c) 2008 Loren Merritt | |
5 | * | |
6 | * This file is part of Libav. | |
7 | * | |
8 | * Libav is free software; you can redistribute it and/or modify | |
9 | * it under the terms of the GNU General Public License as published by | |
10 | * the Free Software Foundation; either version 2 of the License, or | |
11 | * (at your option) any later version. | |
12 | * | |
13 | * Libav is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | * GNU General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU General Public License along | |
19 | * with Libav; if not, write to the Free Software Foundation, Inc., | |
20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
21 | */ | |
22 | ||
23 | #include <stdarg.h> | |
24 | #include <stdio.h> | |
25 | #include <stdlib.h> | |
26 | #include <string.h> | |
27 | #include "checkasm.h" | |
28 | #include "libavutil/common.h" | |
29 | #include "libavutil/cpu.h" | |
9d218d57 | 30 | #include "libavutil/intfloat.h" |
8bc67ec2 HG |
31 | #include "libavutil/random_seed.h" |
32 | ||
bf0cef5c MS |
33 | #if HAVE_IO_H |
34 | #include <io.h> | |
35 | #endif | |
36 | ||
8bc67ec2 HG |
37 | #if HAVE_SETCONSOLETEXTATTRIBUTE |
38 | #include <windows.h> | |
39 | #define COLOR_RED FOREGROUND_RED | |
40 | #define COLOR_GREEN FOREGROUND_GREEN | |
41 | #define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN) | |
42 | #else | |
43 | #define COLOR_RED 1 | |
44 | #define COLOR_GREEN 2 | |
45 | #define COLOR_YELLOW 3 | |
46 | #endif | |
47 | ||
48 | #if HAVE_UNISTD_H | |
49 | #include <unistd.h> | |
50 | #endif | |
51 | ||
52 | #if !HAVE_ISATTY | |
53 | #define isatty(fd) 1 | |
54 | #endif | |
55 | ||
26ec75ae MS |
56 | #if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL |
57 | #include "libavutil/arm/cpu.h" | |
58 | ||
59 | void (*checkasm_checked_call)(void *func, int dummy, ...) = checkasm_checked_call_novfp; | |
60 | #endif | |
61 | ||
8bc67ec2 | 62 | /* List of tests to invoke */ |
65c14801 HG |
63 | static const struct { |
64 | const char *name; | |
65 | void (*func)(void); | |
66 | } tests[] = { | |
d37f2326 | 67 | #if CONFIG_BSWAPDSP |
65c14801 | 68 | { "bswapdsp", checkasm_check_bswapdsp }, |
d37f2326 | 69 | #endif |
e71b747e JG |
70 | #if CONFIG_DCA_DECODER |
71 | { "dcadsp", checkasm_check_dcadsp }, | |
568a4323 | 72 | { "synth_filter", checkasm_check_synth_filter }, |
e71b747e | 73 | #endif |
489e6add JG |
74 | #if CONFIG_FMTCONVERT |
75 | { "fmtconvert", checkasm_check_fmtconvert }, | |
76 | #endif | |
105998fb MS |
77 | #if CONFIG_H264DSP |
78 | { "h264dsp", checkasm_check_h264dsp }, | |
79 | #endif | |
8bc67ec2 | 80 | #if CONFIG_H264PRED |
65c14801 | 81 | { "h264pred", checkasm_check_h264pred }, |
8bc67ec2 | 82 | #endif |
2cb34f82 | 83 | #if CONFIG_H264QPEL |
65c14801 | 84 | { "h264qpel", checkasm_check_h264qpel }, |
2cb34f82 | 85 | #endif |
0cef06df AK |
86 | #if CONFIG_HEVC_DECODER |
87 | { "hevc_mc", checkasm_check_hevc_mc }, | |
9064777d | 88 | { "hevc_idct", checkasm_check_hevc_idct }, |
0cef06df | 89 | #endif |
3cdda78d HG |
90 | #if CONFIG_V210_ENCODER |
91 | { "v210enc", checkasm_check_v210enc }, | |
92 | #endif | |
f8d17d53 MS |
93 | #if CONFIG_VP8DSP |
94 | { "vp8dsp", checkasm_check_vp8dsp }, | |
95 | #endif | |
e99ecda5 RB |
96 | #if CONFIG_VP9_DECODER |
97 | { "vp9dsp", checkasm_check_vp9dsp }, | |
98 | #endif | |
65c14801 | 99 | { NULL } |
8bc67ec2 HG |
100 | }; |
101 | ||
102 | /* List of cpu flags to check */ | |
103 | static const struct { | |
104 | const char *name; | |
105 | const char *suffix; | |
106 | int flag; | |
107 | } cpus[] = { | |
82e6ac85 JG |
108 | #if ARCH_AARCH64 |
109 | { "ARMV8", "armv8", AV_CPU_FLAG_ARMV8 }, | |
110 | { "NEON", "neon", AV_CPU_FLAG_NEON }, | |
111 | #elif ARCH_ARM | |
112 | { "ARMV5TE", "armv5te", AV_CPU_FLAG_ARMV5TE }, | |
113 | { "ARMV6", "armv6", AV_CPU_FLAG_ARMV6 }, | |
114 | { "ARMV6T2", "armv6t2", AV_CPU_FLAG_ARMV6T2 }, | |
115 | { "VFP", "vfp", AV_CPU_FLAG_VFP }, | |
e2710e79 | 116 | { "VFP_VM", "vfp_vm", AV_CPU_FLAG_VFP_VM }, |
82e6ac85 JG |
117 | { "VFPV3", "vfp3", AV_CPU_FLAG_VFPV3 }, |
118 | { "NEON", "neon", AV_CPU_FLAG_NEON }, | |
119 | #elif ARCH_PPC | |
120 | { "ALTIVEC", "altivec", AV_CPU_FLAG_ALTIVEC }, | |
121 | { "VSX", "vsx", AV_CPU_FLAG_VSX }, | |
122 | { "POWER8", "power8", AV_CPU_FLAG_POWER8 }, | |
123 | #elif ARCH_X86 | |
8bc67ec2 HG |
124 | { "MMX", "mmx", AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV }, |
125 | { "MMXEXT", "mmxext", AV_CPU_FLAG_MMXEXT }, | |
126 | { "3DNOW", "3dnow", AV_CPU_FLAG_3DNOW }, | |
127 | { "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT }, | |
128 | { "SSE", "sse", AV_CPU_FLAG_SSE }, | |
129 | { "SSE2", "sse2", AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW }, | |
130 | { "SSE3", "sse3", AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW }, | |
131 | { "SSSE3", "ssse3", AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM }, | |
132 | { "SSE4.1", "sse4", AV_CPU_FLAG_SSE4 }, | |
133 | { "SSE4.2", "sse42", AV_CPU_FLAG_SSE42 }, | |
134 | { "AVX", "avx", AV_CPU_FLAG_AVX }, | |
135 | { "XOP", "xop", AV_CPU_FLAG_XOP }, | |
136 | { "FMA3", "fma3", AV_CPU_FLAG_FMA3 }, | |
137 | { "FMA4", "fma4", AV_CPU_FLAG_FMA4 }, | |
138 | { "AVX2", "avx2", AV_CPU_FLAG_AVX2 }, | |
139 | #endif | |
140 | { NULL } | |
141 | }; | |
142 | ||
143 | typedef struct CheckasmFuncVersion { | |
144 | struct CheckasmFuncVersion *next; | |
515b69f8 | 145 | void *func; |
8bc67ec2 HG |
146 | int ok; |
147 | int cpu; | |
148 | int iterations; | |
149 | uint64_t cycles; | |
150 | } CheckasmFuncVersion; | |
151 | ||
152 | /* Binary search tree node */ | |
153 | typedef struct CheckasmFunc { | |
154 | struct CheckasmFunc *child[2]; | |
155 | CheckasmFuncVersion versions; | |
5405584b | 156 | uint8_t color; /* 0 = red, 1 = black */ |
8bc67ec2 HG |
157 | char name[1]; |
158 | } CheckasmFunc; | |
159 | ||
160 | /* Internal state */ | |
161 | static struct { | |
162 | CheckasmFunc *funcs; | |
163 | CheckasmFunc *current_func; | |
164 | CheckasmFuncVersion *current_func_ver; | |
65c14801 | 165 | const char *current_test_name; |
8bc67ec2 HG |
166 | const char *bench_pattern; |
167 | int bench_pattern_len; | |
168 | int num_checked; | |
169 | int num_failed; | |
170 | int nop_time; | |
171 | int cpu_flag; | |
172 | const char *cpu_flag_name; | |
173 | } state; | |
174 | ||
175 | /* PRNG state */ | |
176 | AVLFG checkasm_lfg; | |
177 | ||
9d218d57 JG |
178 | /* float compare support code */ |
179 | static int is_negative(union av_intfloat32 u) | |
180 | { | |
181 | return u.i >> 31; | |
182 | } | |
183 | ||
184 | int float_near_ulp(float a, float b, unsigned max_ulp) | |
185 | { | |
186 | union av_intfloat32 x, y; | |
187 | ||
188 | x.f = a; | |
189 | y.f = b; | |
190 | ||
191 | if (is_negative(x) != is_negative(y)) { | |
192 | // handle -0.0 == +0.0 | |
193 | return a == b; | |
194 | } | |
195 | ||
40ad05ba | 196 | if (llabs((int64_t)x.i - y.i) <= max_ulp) |
9d218d57 JG |
197 | return 1; |
198 | ||
199 | return 0; | |
200 | } | |
201 | ||
202 | int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp, | |
203 | unsigned len) | |
204 | { | |
205 | unsigned i; | |
206 | ||
207 | for (i = 0; i < len; i++) { | |
208 | if (!float_near_ulp(a[i], b[i], max_ulp)) | |
209 | return 0; | |
210 | } | |
211 | return 1; | |
212 | } | |
213 | ||
214 | int float_near_abs_eps(float a, float b, float eps) | |
215 | { | |
216 | float abs_diff = fabsf(a - b); | |
217 | ||
218 | return abs_diff < eps; | |
219 | } | |
220 | ||
221 | int float_near_abs_eps_array(const float *a, const float *b, float eps, | |
222 | unsigned len) | |
223 | { | |
224 | unsigned i; | |
225 | ||
226 | for (i = 0; i < len; i++) { | |
227 | if (!float_near_abs_eps(a[i], b[i], eps)) | |
228 | return 0; | |
229 | } | |
230 | return 1; | |
231 | } | |
232 | ||
233 | int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp) | |
234 | { | |
235 | return float_near_ulp(a, b, max_ulp) || float_near_abs_eps(a, b, eps); | |
236 | } | |
237 | ||
238 | int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps, | |
239 | unsigned max_ulp, unsigned len) | |
240 | { | |
241 | unsigned i; | |
242 | ||
243 | for (i = 0; i < len; i++) { | |
244 | if (!float_near_abs_eps_ulp(a[i], b[i], eps, max_ulp)) | |
245 | return 0; | |
246 | } | |
247 | return 1; | |
248 | } | |
249 | ||
8bc67ec2 HG |
250 | /* Print colored text to stderr if the terminal supports it */ |
251 | static void color_printf(int color, const char *fmt, ...) | |
252 | { | |
253 | static int use_color = -1; | |
254 | va_list arg; | |
255 | ||
256 | #if HAVE_SETCONSOLETEXTATTRIBUTE | |
257 | static HANDLE con; | |
258 | static WORD org_attributes; | |
259 | ||
260 | if (use_color < 0) { | |
261 | CONSOLE_SCREEN_BUFFER_INFO con_info; | |
262 | con = GetStdHandle(STD_ERROR_HANDLE); | |
263 | if (con && con != INVALID_HANDLE_VALUE && GetConsoleScreenBufferInfo(con, &con_info)) { | |
264 | org_attributes = con_info.wAttributes; | |
265 | use_color = 1; | |
266 | } else | |
267 | use_color = 0; | |
268 | } | |
269 | if (use_color) | |
270 | SetConsoleTextAttribute(con, (org_attributes & 0xfff0) | (color & 0x0f)); | |
271 | #else | |
272 | if (use_color < 0) { | |
273 | const char *term = getenv("TERM"); | |
274 | use_color = term && strcmp(term, "dumb") && isatty(2); | |
275 | } | |
276 | if (use_color) | |
277 | fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07); | |
278 | #endif | |
279 | ||
280 | va_start(arg, fmt); | |
281 | vfprintf(stderr, fmt, arg); | |
282 | va_end(arg); | |
283 | ||
284 | if (use_color) { | |
285 | #if HAVE_SETCONSOLETEXTATTRIBUTE | |
286 | SetConsoleTextAttribute(con, org_attributes); | |
287 | #else | |
288 | fprintf(stderr, "\x1b[0m"); | |
289 | #endif | |
290 | } | |
291 | } | |
292 | ||
293 | /* Deallocate a tree */ | |
294 | static void destroy_func_tree(CheckasmFunc *f) | |
295 | { | |
296 | if (f) { | |
297 | CheckasmFuncVersion *v = f->versions.next; | |
298 | while (v) { | |
299 | CheckasmFuncVersion *next = v->next; | |
300 | free(v); | |
301 | v = next; | |
302 | } | |
303 | ||
304 | destroy_func_tree(f->child[0]); | |
305 | destroy_func_tree(f->child[1]); | |
306 | free(f); | |
307 | } | |
308 | } | |
309 | ||
310 | /* Allocate a zero-initialized block, clean up and exit on failure */ | |
311 | static void *checkasm_malloc(size_t size) | |
312 | { | |
313 | void *ptr = calloc(1, size); | |
314 | if (!ptr) { | |
315 | fprintf(stderr, "checkasm: malloc failed\n"); | |
316 | destroy_func_tree(state.funcs); | |
317 | exit(1); | |
318 | } | |
319 | return ptr; | |
320 | } | |
321 | ||
322 | /* Get the suffix of the specified cpu flag */ | |
323 | static const char *cpu_suffix(int cpu) | |
324 | { | |
325 | int i = FF_ARRAY_ELEMS(cpus); | |
326 | ||
327 | while (--i >= 0) | |
328 | if (cpu & cpus[i].flag) | |
329 | return cpus[i].suffix; | |
330 | ||
331 | return "c"; | |
332 | } | |
333 | ||
334 | #ifdef AV_READ_TIME | |
335 | static int cmp_nop(const void *a, const void *b) | |
336 | { | |
337 | return *(const uint16_t*)a - *(const uint16_t*)b; | |
338 | } | |
339 | ||
340 | /* Measure the overhead of the timing code (in decicycles) */ | |
341 | static int measure_nop_time(void) | |
342 | { | |
343 | uint16_t nops[10000]; | |
344 | int i, nop_sum = 0; | |
345 | ||
346 | for (i = 0; i < 10000; i++) { | |
347 | uint64_t t = AV_READ_TIME(); | |
348 | nops[i] = AV_READ_TIME() - t; | |
349 | } | |
350 | ||
351 | qsort(nops, 10000, sizeof(uint16_t), cmp_nop); | |
352 | for (i = 2500; i < 7500; i++) | |
353 | nop_sum += nops[i]; | |
354 | ||
355 | return nop_sum / 500; | |
356 | } | |
357 | ||
358 | /* Print benchmark results */ | |
359 | static void print_benchs(CheckasmFunc *f) | |
360 | { | |
361 | if (f) { | |
362 | print_benchs(f->child[0]); | |
363 | ||
364 | /* Only print functions with at least one assembly version */ | |
365 | if (f->versions.cpu || f->versions.next) { | |
366 | CheckasmFuncVersion *v = &f->versions; | |
367 | do { | |
368 | if (v->iterations) { | |
369 | int decicycles = (10*v->cycles/v->iterations - state.nop_time) / 4; | |
370 | printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu), decicycles/10, decicycles%10); | |
371 | } | |
372 | } while ((v = v->next)); | |
373 | } | |
374 | ||
375 | print_benchs(f->child[1]); | |
376 | } | |
377 | } | |
378 | #endif | |
379 | ||
380 | /* ASCIIbetical sort except preserving natural order for numbers */ | |
381 | static int cmp_func_names(const char *a, const char *b) | |
382 | { | |
8bb376cf | 383 | const char *start = a; |
8bc67ec2 HG |
384 | int ascii_diff, digit_diff; |
385 | ||
8bb376cf | 386 | for (; !(ascii_diff = *(const unsigned char*)a - *(const unsigned char*)b) && *a; a++, b++); |
8bc67ec2 HG |
387 | for (; av_isdigit(*a) && av_isdigit(*b); a++, b++); |
388 | ||
8bb376cf HG |
389 | if (a > start && av_isdigit(a[-1]) && (digit_diff = av_isdigit(*a) - av_isdigit(*b))) |
390 | return digit_diff; | |
391 | ||
392 | return ascii_diff; | |
8bc67ec2 HG |
393 | } |
394 | ||
5405584b HG |
395 | /* Perform a tree rotation in the specified direction and return the new root */ |
396 | static CheckasmFunc *rotate_tree(CheckasmFunc *f, int dir) | |
397 | { | |
398 | CheckasmFunc *r = f->child[dir^1]; | |
399 | f->child[dir^1] = r->child[dir]; | |
400 | r->child[dir] = f; | |
401 | r->color = f->color; | |
402 | f->color = 0; | |
403 | return r; | |
404 | } | |
405 | ||
406 | #define is_red(f) ((f) && !(f)->color) | |
407 | ||
408 | /* Balance a left-leaning red-black tree at the specified node */ | |
409 | static void balance_tree(CheckasmFunc **root) | |
410 | { | |
411 | CheckasmFunc *f = *root; | |
412 | ||
413 | if (is_red(f->child[0]) && is_red(f->child[1])) { | |
414 | f->color ^= 1; | |
415 | f->child[0]->color = f->child[1]->color = 1; | |
416 | } | |
417 | ||
418 | if (!is_red(f->child[0]) && is_red(f->child[1])) | |
419 | *root = rotate_tree(f, 0); /* Rotate left */ | |
420 | else if (is_red(f->child[0]) && is_red(f->child[0]->child[0])) | |
421 | *root = rotate_tree(f, 1); /* Rotate right */ | |
422 | } | |
423 | ||
8bc67ec2 | 424 | /* Get a node with the specified name, creating it if it doesn't exist */ |
5405584b | 425 | static CheckasmFunc *get_func(CheckasmFunc **root, const char *name) |
8bc67ec2 | 426 | { |
5405584b | 427 | CheckasmFunc *f = *root; |
8bc67ec2 | 428 | |
5405584b HG |
429 | if (f) { |
430 | /* Search the tree for a matching node */ | |
8bc67ec2 | 431 | int cmp = cmp_func_names(name, f->name); |
5405584b HG |
432 | if (cmp) { |
433 | f = get_func(&f->child[cmp > 0], name); | |
8bc67ec2 | 434 | |
5405584b HG |
435 | /* Rebalance the tree on the way up if a new node was inserted */ |
436 | if (!f->versions.func) | |
437 | balance_tree(root); | |
438 | } | |
439 | } else { | |
440 | /* Allocate and insert a new node into the tree */ | |
441 | int name_length = strlen(name); | |
442 | f = *root = checkasm_malloc(sizeof(CheckasmFunc) + name_length); | |
443 | memcpy(f->name, name, name_length + 1); | |
8bc67ec2 HG |
444 | } |
445 | ||
8bc67ec2 HG |
446 | return f; |
447 | } | |
448 | ||
449 | /* Perform tests and benchmarks for the specified cpu flag if supported by the host */ | |
450 | static void check_cpu_flag(const char *name, int flag) | |
451 | { | |
452 | int old_cpu_flag = state.cpu_flag; | |
453 | ||
454 | flag |= old_cpu_flag; | |
455 | av_set_cpu_flags_mask(flag); | |
456 | state.cpu_flag = av_get_cpu_flags(); | |
457 | ||
458 | if (!flag || state.cpu_flag != old_cpu_flag) { | |
459 | int i; | |
460 | ||
461 | state.cpu_flag_name = name; | |
65c14801 HG |
462 | for (i = 0; tests[i].func; i++) { |
463 | state.current_test_name = tests[i].name; | |
464 | tests[i].func(); | |
465 | } | |
8bc67ec2 HG |
466 | } |
467 | } | |
468 | ||
469 | /* Print the name of the current CPU flag, but only do it once */ | |
470 | static void print_cpu_name(void) | |
471 | { | |
472 | if (state.cpu_flag_name) { | |
473 | color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name); | |
474 | state.cpu_flag_name = NULL; | |
475 | } | |
476 | } | |
477 | ||
478 | int main(int argc, char *argv[]) | |
479 | { | |
e89cef40 LB |
480 | unsigned int seed; |
481 | int i, ret = 0; | |
8bc67ec2 | 482 | |
26ec75ae MS |
483 | #if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL |
484 | if (have_vfp(av_get_cpu_flags()) || have_neon(av_get_cpu_flags())) | |
485 | checkasm_checked_call = checkasm_checked_call_vfp; | |
486 | #endif | |
487 | ||
65c14801 | 488 | if (!tests[0].func || !cpus[0].flag) { |
8bc67ec2 | 489 | fprintf(stderr, "checkasm: no tests to perform\n"); |
6cc4d3e9 | 490 | return 0; |
8bc67ec2 HG |
491 | } |
492 | ||
493 | if (argc > 1 && !strncmp(argv[1], "--bench", 7)) { | |
494 | #ifndef AV_READ_TIME | |
495 | fprintf(stderr, "checkasm: --bench is not supported on your system\n"); | |
496 | return 1; | |
497 | #endif | |
498 | if (argv[1][7] == '=') { | |
499 | state.bench_pattern = argv[1] + 8; | |
500 | state.bench_pattern_len = strlen(state.bench_pattern); | |
501 | } else | |
502 | state.bench_pattern = ""; | |
503 | ||
504 | argc--; | |
505 | argv++; | |
506 | } | |
507 | ||
e89cef40 | 508 | seed = (argc > 1) ? strtoul(argv[1], NULL, 10) : av_get_random_seed(); |
8bc67ec2 HG |
509 | fprintf(stderr, "checkasm: using random seed %u\n", seed); |
510 | av_lfg_init(&checkasm_lfg, seed); | |
511 | ||
512 | check_cpu_flag(NULL, 0); | |
513 | for (i = 0; cpus[i].flag; i++) | |
514 | check_cpu_flag(cpus[i].name, cpus[i].flag); | |
515 | ||
516 | if (state.num_failed) { | |
517 | fprintf(stderr, "checkasm: %d of %d tests have failed\n", state.num_failed, state.num_checked); | |
518 | ret = 1; | |
519 | } else { | |
520 | fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked); | |
521 | #ifdef AV_READ_TIME | |
522 | if (state.bench_pattern) { | |
523 | state.nop_time = measure_nop_time(); | |
524 | printf("nop: %d.%d\n", state.nop_time/10, state.nop_time%10); | |
525 | print_benchs(state.funcs); | |
526 | } | |
527 | #endif | |
528 | } | |
529 | ||
530 | destroy_func_tree(state.funcs); | |
531 | return ret; | |
532 | } | |
533 | ||
534 | /* Decide whether or not the specified function needs to be tested and | |
535 | * allocate/initialize data structures if needed. Returns a pointer to a | |
536 | * reference function if the function should be tested, otherwise NULL */ | |
515b69f8 | 537 | void *checkasm_check_func(void *func, const char *name, ...) |
8bc67ec2 HG |
538 | { |
539 | char name_buf[256]; | |
515b69f8 | 540 | void *ref = func; |
8bc67ec2 HG |
541 | CheckasmFuncVersion *v; |
542 | int name_length; | |
543 | va_list arg; | |
544 | ||
545 | va_start(arg, name); | |
546 | name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg); | |
547 | va_end(arg); | |
548 | ||
549 | if (!func || name_length <= 0 || name_length >= sizeof(name_buf)) | |
550 | return NULL; | |
551 | ||
5405584b HG |
552 | state.current_func = get_func(&state.funcs, name_buf); |
553 | state.funcs->color = 1; | |
8bc67ec2 HG |
554 | v = &state.current_func->versions; |
555 | ||
556 | if (v->func) { | |
557 | CheckasmFuncVersion *prev; | |
558 | do { | |
559 | /* Only test functions that haven't already been tested */ | |
560 | if (v->func == func) | |
561 | return NULL; | |
562 | ||
563 | if (v->ok) | |
564 | ref = v->func; | |
565 | ||
566 | prev = v; | |
567 | } while ((v = v->next)); | |
568 | ||
569 | v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion)); | |
570 | } | |
571 | ||
572 | v->func = func; | |
573 | v->ok = 1; | |
574 | v->cpu = state.cpu_flag; | |
575 | state.current_func_ver = v; | |
576 | ||
577 | if (state.cpu_flag) | |
578 | state.num_checked++; | |
579 | ||
580 | return ref; | |
581 | } | |
582 | ||
583 | /* Decide whether or not the current function needs to be benchmarked */ | |
584 | int checkasm_bench_func(void) | |
585 | { | |
586 | return !state.num_failed && state.bench_pattern && | |
587 | !strncmp(state.current_func->name, state.bench_pattern, state.bench_pattern_len); | |
588 | } | |
589 | ||
590 | /* Indicate that the current test has failed */ | |
591 | void checkasm_fail_func(const char *msg, ...) | |
592 | { | |
593 | if (state.current_func_ver->cpu && state.current_func_ver->ok) { | |
594 | va_list arg; | |
595 | ||
596 | print_cpu_name(); | |
597 | fprintf(stderr, " %s_%s (", state.current_func->name, cpu_suffix(state.current_func_ver->cpu)); | |
598 | va_start(arg, msg); | |
599 | vfprintf(stderr, msg, arg); | |
600 | va_end(arg); | |
601 | fprintf(stderr, ")\n"); | |
602 | ||
603 | state.current_func_ver->ok = 0; | |
604 | state.num_failed++; | |
605 | } | |
606 | } | |
607 | ||
608 | /* Update benchmark results of the current function */ | |
609 | void checkasm_update_bench(int iterations, uint64_t cycles) | |
610 | { | |
611 | state.current_func_ver->iterations += iterations; | |
612 | state.current_func_ver->cycles += cycles; | |
613 | } | |
614 | ||
615 | /* Print the outcome of all tests performed since the last time this function was called */ | |
616 | void checkasm_report(const char *name, ...) | |
617 | { | |
618 | static int prev_checked, prev_failed, max_length; | |
619 | ||
620 | if (state.num_checked > prev_checked) { | |
65c14801 HG |
621 | int pad_length = max_length + 4; |
622 | va_list arg; | |
8bc67ec2 | 623 | |
65c14801 HG |
624 | print_cpu_name(); |
625 | pad_length -= fprintf(stderr, " - %s.", state.current_test_name); | |
626 | va_start(arg, name); | |
627 | pad_length -= vfprintf(stderr, name, arg); | |
628 | va_end(arg); | |
629 | fprintf(stderr, "%*c", FFMAX(pad_length, 0) + 2, '['); | |
8bc67ec2 HG |
630 | |
631 | if (state.num_failed == prev_failed) | |
632 | color_printf(COLOR_GREEN, "OK"); | |
633 | else | |
634 | color_printf(COLOR_RED, "FAILED"); | |
635 | fprintf(stderr, "]\n"); | |
636 | ||
637 | prev_checked = state.num_checked; | |
638 | prev_failed = state.num_failed; | |
639 | } else if (!state.cpu_flag) { | |
8bc67ec2 | 640 | /* Calculate the amount of padding required to make the output vertically aligned */ |
65c14801 HG |
641 | int length = strlen(state.current_test_name); |
642 | va_list arg; | |
643 | ||
644 | va_start(arg, name); | |
645 | length += vsnprintf(NULL, 0, name, arg); | |
646 | va_end(arg); | |
8bc67ec2 HG |
647 | |
648 | if (length > max_length) | |
649 | max_length = length; | |
650 | } | |
651 | } |