diff options
Diffstat (limited to 'src/contrib/SDL-3.2.20/test/testautomation_intrinsics.c')
-rw-r--r-- | src/contrib/SDL-3.2.20/test/testautomation_intrinsics.c | 702 |
1 files changed, 702 insertions, 0 deletions
diff --git a/src/contrib/SDL-3.2.20/test/testautomation_intrinsics.c b/src/contrib/SDL-3.2.20/test/testautomation_intrinsics.c new file mode 100644 index 0000000..8338d33 --- /dev/null +++ b/src/contrib/SDL-3.2.20/test/testautomation_intrinsics.c | |||
@@ -0,0 +1,702 @@ | |||
1 | /** | ||
2 | * Intrinsics test suite | ||
3 | */ | ||
4 | |||
5 | #ifdef HAVE_BUILD_CONFIG | ||
6 | /* Disable intrinsics that are unsupported by the current compiler */ | ||
7 | #include "SDL_build_config.h" | ||
8 | #endif | ||
9 | |||
10 | #include <SDL3/SDL.h> | ||
11 | #include <SDL3/SDL_intrin.h> | ||
12 | #include <SDL3/SDL_test.h> | ||
13 | #include "testautomation_suites.h" | ||
14 | |||
15 | // FIXME: missing tests for loongarch lsx/lasx | ||
16 | // FIXME: missing tests for powerpc altivec | ||
17 | |||
18 | /* ================= Test Case Implementation ================== */ | ||
19 | |||
20 | /* Helper functions */ | ||
21 | |||
22 | static int allocate_random_uint_arrays(Uint32 **dest, Uint32 **a, Uint32 **b, size_t *size) { | ||
23 | size_t i; | ||
24 | |||
25 | *size = (size_t)SDLTest_RandomIntegerInRange(127, 999); | ||
26 | *dest = SDL_malloc(sizeof(Uint32) * *size); | ||
27 | *a = SDL_malloc(sizeof(Uint32) * *size); | ||
28 | *b = SDL_malloc(sizeof(Uint32) * *size); | ||
29 | |||
30 | if (!*dest || !*a || !*b) { | ||
31 | SDLTest_AssertCheck(false, "SDL_malloc failed"); | ||
32 | return -1; | ||
33 | } | ||
34 | |||
35 | for (i = 0; i < *size; ++i) { | ||
36 | (*a)[i] = SDLTest_RandomUint32(); | ||
37 | (*b)[i] = SDLTest_RandomUint32(); | ||
38 | } | ||
39 | return 0; | ||
40 | } | ||
41 | |||
42 | static int allocate_random_float_arrays(float **dest, float **a, float **b, size_t *size) { | ||
43 | size_t i; | ||
44 | |||
45 | *size = (size_t)SDLTest_RandomIntegerInRange(127, 999); | ||
46 | *dest = SDL_malloc(sizeof(float) * *size); | ||
47 | *a = SDL_malloc(sizeof(float) * *size); | ||
48 | *b = SDL_malloc(sizeof(float) * *size); | ||
49 | |||
50 | if (!*dest || !*a || !*b) { | ||
51 | SDLTest_AssertCheck(false, "SDL_malloc failed"); | ||
52 | return -1; | ||
53 | } | ||
54 | |||
55 | for (i = 0; i < *size; ++i) { | ||
56 | (*a)[i] = SDLTest_RandomUnitFloat(); | ||
57 | (*b)[i] = SDLTest_RandomUnitFloat(); | ||
58 | } | ||
59 | |||
60 | return 0; | ||
61 | } | ||
62 | |||
63 | static int allocate_random_double_arrays(double **dest, double **a, double **b, size_t *size) { | ||
64 | size_t i; | ||
65 | |||
66 | *size = (size_t)SDLTest_RandomIntegerInRange(127, 999); | ||
67 | *dest = SDL_malloc(sizeof(double) * *size); | ||
68 | *a = SDL_malloc(sizeof(double) * *size); | ||
69 | *b = SDL_malloc(sizeof(double) * *size); | ||
70 | |||
71 | if (!*dest || !*a || !*b) { | ||
72 | SDLTest_AssertCheck(false, "SDL_malloc failed"); | ||
73 | return -1; | ||
74 | } | ||
75 | |||
76 | for (i = 0; i < *size; ++i) { | ||
77 | (*a)[i] = SDLTest_RandomUnitDouble(); | ||
78 | (*b)[i] = SDLTest_RandomUnitDouble(); | ||
79 | } | ||
80 | |||
81 | return 0; | ||
82 | } | ||
83 | |||
84 | static void free_arrays(void *dest, void *a, void *b) { | ||
85 | SDL_free(dest); | ||
86 | SDL_free(a); | ||
87 | SDL_free(b); | ||
88 | } | ||
89 | |||
90 | /** | ||
91 | * Verify element-wise addition of 2 int arrays. | ||
92 | */ | ||
93 | static void verify_uints_addition(const Uint32 *dest, const Uint32 *a, const Uint32 *b, size_t size, const char *desc) { | ||
94 | size_t i; | ||
95 | int all_good = 1; | ||
96 | |||
97 | for (i = 0; i < size; ++i) { | ||
98 | Uint32 expected = a[i] + b[i]; | ||
99 | if (dest[i] != expected) { | ||
100 | SDLTest_AssertCheck(false, "%" SDL_PRIs32 " + %" SDL_PRIs32 " = %" SDL_PRIs32 ", expected %" SDL_PRIs32 " ([%" SDL_PRIu32 "/%" SDL_PRIu32 "] %s)", | ||
101 | a[i], b[i], dest[i], expected, (Uint32)i, (Uint32)size, desc); | ||
102 | all_good = 0; | ||
103 | } | ||
104 | } | ||
105 | if (all_good) { | ||
106 | SDLTest_AssertCheck(true, "All int additions were correct (%s)", desc); | ||
107 | } | ||
108 | } | ||
109 | |||
110 | /** | ||
111 | * Verify element-wise multiplication of 2 uint arrays. | ||
112 | */ | ||
113 | static void verify_uints_multiplication(const Uint32 *dest, const Uint32 *a, const Uint32 *b, size_t size, const char *desc) { | ||
114 | size_t i; | ||
115 | int all_good = 1; | ||
116 | |||
117 | for (i = 0; i < size; ++i) { | ||
118 | Uint32 expected = a[i] * b[i]; | ||
119 | if (dest[i] != expected) { | ||
120 | SDLTest_AssertCheck(false, "%" SDL_PRIu32 " * %" SDL_PRIu32 " = %" SDL_PRIu32 ", expected %" SDL_PRIu32 " ([%" SDL_PRIu32 "/%" SDL_PRIu32 "] %s)", | ||
121 | a[i], b[i], dest[i], expected, (Uint32)i, (Uint32)size, desc); | ||
122 | all_good = 0; | ||
123 | } | ||
124 | } | ||
125 | if (all_good) { | ||
126 | SDLTest_AssertCheck(true, "All int multiplication were correct (%s)", desc); | ||
127 | } | ||
128 | } | ||
129 | |||
130 | /** | ||
131 | * Verify element-wise addition of 2 float arrays. | ||
132 | */ | ||
133 | static void verify_floats_addition(const float *dest, const float *a, const float *b, size_t size, const char *desc) { | ||
134 | size_t i; | ||
135 | int all_good = 1; | ||
136 | |||
137 | for (i = 0; i < size; ++i) { | ||
138 | float expected = a[i] + b[i]; | ||
139 | float abs_error = SDL_fabsf(dest[i] - expected); | ||
140 | if (abs_error > 1.0e-5f) { | ||
141 | SDLTest_AssertCheck(false, "%g + %g = %g, expected %g (error = %g) ([%" SDL_PRIu32 "/%" SDL_PRIu32 "] %s)", | ||
142 | a[i], b[i], dest[i], expected, abs_error, (Uint32) i, (Uint32) size, desc); | ||
143 | all_good = 0; | ||
144 | } | ||
145 | } | ||
146 | if (all_good) { | ||
147 | SDLTest_AssertCheck(true, "All float additions were correct (%s)", desc); | ||
148 | } | ||
149 | } | ||
150 | |||
151 | /** | ||
152 | * Verify element-wise addition of 2 double arrays. | ||
153 | */ | ||
154 | static void verify_doubles_addition(const double *dest, const double *a, const double *b, size_t size, const char *desc) { | ||
155 | size_t i; | ||
156 | int all_good = 1; | ||
157 | |||
158 | for (i = 0; i < size; ++i) { | ||
159 | double expected = a[i] + b[i]; | ||
160 | double abs_error = SDL_fabs(dest[i] - expected); | ||
161 | if (abs_error > 1.0e-5) { | ||
162 | SDLTest_AssertCheck(abs_error < 1.0e-5f, "%g + %g = %g, expected %g (error = %g) ([%" SDL_PRIu32 "/%" SDL_PRIu32 "] %s)", | ||
163 | a[i], b[i], dest[i], expected, abs_error, (Uint32) i, (Uint32) size, desc); | ||
164 | all_good = false; | ||
165 | } | ||
166 | } | ||
167 | if (all_good) { | ||
168 | SDLTest_AssertCheck(true, "All double additions were correct (%s)", desc); | ||
169 | } | ||
170 | } | ||
171 | |||
172 | /* Intrinsic kernels */ | ||
173 | |||
174 | static void kernel_uints_add_cpu(Uint32 *dest, const Uint32 *a, const Uint32 *b, size_t size) { | ||
175 | for (; size; --size, ++dest, ++a, ++b) { | ||
176 | *dest = *a + *b; | ||
177 | } | ||
178 | } | ||
179 | |||
180 | static void kernel_uints_mul_cpu(Uint32 *dest, const Uint32 *a, const Uint32 *b, size_t size) { | ||
181 | for (; size; --size, ++dest, ++a, ++b) { | ||
182 | *dest = *a * *b; | ||
183 | } | ||
184 | } | ||
185 | |||
186 | static void kernel_floats_add_cpu(float *dest, const float *a, const float *b, size_t size) { | ||
187 | for (; size; --size, ++dest, ++a, ++b) { | ||
188 | *dest = *a + *b; | ||
189 | } | ||
190 | } | ||
191 | |||
192 | static void kernel_doubles_add_cpu(double *dest, const double *a, const double *b, size_t size) { | ||
193 | for (; size; --size, ++dest, ++a, ++b) { | ||
194 | *dest = *a + *b; | ||
195 | } | ||
196 | } | ||
197 | |||
198 | #ifdef SDL_MMX_INTRINSICS | ||
199 | SDL_TARGETING("mmx") static void kernel_uints_add_mmx(Uint32 *dest, const Uint32 *a, const Uint32 *b, size_t size) { | ||
200 | for (; size >= 2; size -= 2, dest += 2, a += 2, b += 2) { | ||
201 | *(__m64*)dest = _mm_add_pi32(*(__m64*)a, *(__m64*)b); | ||
202 | } | ||
203 | if (size) { | ||
204 | *dest = *a + *b; | ||
205 | } | ||
206 | _mm_empty(); | ||
207 | } | ||
208 | #endif | ||
209 | |||
210 | #ifdef SDL_SSE_INTRINSICS | ||
211 | SDL_TARGETING("sse") static void kernel_floats_add_sse(float *dest, const float *a, const float *b, size_t size) { | ||
212 | for (; size >= 4; size -= 4, dest += 4, a += 4, b += 4) { | ||
213 | _mm_storeu_ps(dest, _mm_add_ps(_mm_loadu_ps(a), _mm_loadu_ps (b))); | ||
214 | } | ||
215 | for (; size; size--, ++dest, ++a, ++b) { | ||
216 | *dest = *a + *b; | ||
217 | } | ||
218 | } | ||
219 | #endif | ||
220 | |||
221 | #ifdef SDL_SSE2_INTRINSICS | ||
222 | SDL_TARGETING("sse2") static void kernel_doubles_add_sse2(double *dest, const double *a, const double *b, size_t size) { | ||
223 | for (; size >= 2; size -= 2, dest += 2, a += 2, b += 2) { | ||
224 | _mm_storeu_pd(dest, _mm_add_pd(_mm_loadu_pd(a), _mm_loadu_pd(b))); | ||
225 | } | ||
226 | if (size) { | ||
227 | *dest = *a + *b; | ||
228 | } | ||
229 | } | ||
230 | #endif | ||
231 | |||
232 | #ifdef SDL_SSE3_INTRINSICS | ||
233 | SDL_TARGETING("sse3") static void kernel_uints_add_sse3(Uint32 *dest, const Uint32 *a, const Uint32 *b, size_t size) { | ||
234 | for (; size >= 4; size -= 4, dest += 4, a += 4, b += 4) { | ||
235 | _mm_storeu_si128((__m128i*)dest, _mm_add_epi32(_mm_lddqu_si128((__m128i*)a), _mm_lddqu_si128((__m128i*)b))); | ||
236 | } | ||
237 | for (;size; --size, ++dest, ++a, ++b) { | ||
238 | *dest = *a + *b; | ||
239 | } | ||
240 | } | ||
241 | #endif | ||
242 | |||
243 | #ifdef SDL_SSE4_1_INTRINSICS | ||
244 | SDL_TARGETING("sse4.1") static void kernel_uints_mul_sse4_1(Uint32 *dest, const Uint32 *a, const Uint32 *b, size_t size) { | ||
245 | for (; size >= 4; size -= 4, dest += 4, a += 4, b += 4) { | ||
246 | _mm_storeu_si128((__m128i*)dest, _mm_mullo_epi32(_mm_lddqu_si128((__m128i*)a), _mm_lddqu_si128((__m128i*)b))); | ||
247 | } | ||
248 | for (;size; --size, ++dest, ++a, ++b) { | ||
249 | *dest = *a * *b; | ||
250 | } | ||
251 | } | ||
252 | #endif | ||
253 | |||
254 | #ifdef SDL_SSE4_2_INTRINSICS | ||
255 | SDL_TARGETING("sse4.2") static Uint32 calculate_crc32c_sse4_2(const char *text) { | ||
256 | Uint32 crc32c = ~0u; | ||
257 | size_t len = SDL_strlen(text); | ||
258 | |||
259 | #if defined(__x86_64__) || defined(_M_X64) | ||
260 | for (; len >= 8; len -= 8, text += 8) { | ||
261 | crc32c = (Uint32)_mm_crc32_u64(crc32c, *(Sint64*)text); | ||
262 | } | ||
263 | if (len >= 4) { | ||
264 | crc32c = (Uint32)_mm_crc32_u32(crc32c, *(Sint32*)text); | ||
265 | len -= 4; | ||
266 | text += 4; | ||
267 | } | ||
268 | #else | ||
269 | for (; len >= 4; len -= 4, text += 4) { | ||
270 | crc32c = (Uint32)_mm_crc32_u32(crc32c, *(Sint32*)text); | ||
271 | } | ||
272 | #endif | ||
273 | if (len >= 2) { | ||
274 | crc32c = (Uint32)_mm_crc32_u16(crc32c, *(Sint16*)text); | ||
275 | len -= 2; | ||
276 | text += 2; | ||
277 | } | ||
278 | if (len) { | ||
279 | crc32c = (Uint32)_mm_crc32_u8(crc32c, *text); | ||
280 | } | ||
281 | return ~crc32c; | ||
282 | } | ||
283 | #endif | ||
284 | |||
285 | #ifdef SDL_AVX_INTRINSICS | ||
286 | SDL_TARGETING("avx") static void kernel_floats_add_avx(float *dest, const float *a, const float *b, size_t size) { | ||
287 | for (; size >= 8; size -= 8, dest += 8, a += 8, b += 8) { | ||
288 | _mm256_storeu_ps(dest, _mm256_add_ps(_mm256_loadu_ps(a), _mm256_loadu_ps(b))); | ||
289 | } | ||
290 | for (; size; size--, ++dest, ++a, ++b) { | ||
291 | *dest = *a + *b; | ||
292 | } | ||
293 | } | ||
294 | #endif | ||
295 | |||
296 | #ifdef SDL_AVX2_INTRINSICS | ||
297 | SDL_TARGETING("avx2") static void kernel_uints_add_avx2(Uint32 *dest, const Uint32 *a, const Uint32 *b, size_t size) { | ||
298 | for (; size >= 8; size -= 8, dest += 8, a += 8, b += 8) { | ||
299 | _mm256_storeu_si256((__m256i*)dest, _mm256_add_epi32(_mm256_loadu_si256((__m256i*)a), _mm256_loadu_si256((__m256i*)b))); | ||
300 | } | ||
301 | for (; size; size--, ++dest, ++a, ++b) { | ||
302 | *dest = *a + *b; | ||
303 | } | ||
304 | } | ||
305 | #endif | ||
306 | |||
307 | #ifdef SDL_AVX512F_INTRINSICS | ||
308 | SDL_TARGETING("avx512f") static void kernel_floats_add_avx512f(float *dest, const float *a, const float *b, size_t size) { | ||
309 | for (; size >= 16; size -= 16, dest += 16, a += 16, b += 16) { | ||
310 | _mm512_storeu_ps(dest, _mm512_add_ps(_mm512_loadu_ps(a), _mm512_loadu_ps(b))); | ||
311 | } | ||
312 | for (; size; --size) { | ||
313 | *dest++ = *a++ + *b++; | ||
314 | } | ||
315 | } | ||
316 | #endif | ||
317 | |||
318 | /* Test case functions */ | ||
319 | |||
320 | static int SDLCALL intrinsics_selftest(void *arg) | ||
321 | { | ||
322 | { | ||
323 | size_t size; | ||
324 | Uint32 *dest, *a, *b; | ||
325 | if (allocate_random_uint_arrays(&dest, &a, &b, &size) < 0) { | ||
326 | free_arrays(dest, a, b); | ||
327 | return TEST_ABORTED; | ||
328 | } | ||
329 | kernel_uints_mul_cpu(dest, a, b, size); | ||
330 | verify_uints_multiplication(dest, a, b, size, "CPU"); | ||
331 | free_arrays(dest, a, b); | ||
332 | } | ||
333 | { | ||
334 | size_t size; | ||
335 | Uint32 *dest, *a, *b; | ||
336 | if (allocate_random_uint_arrays(&dest, &a, &b, &size) < 0) { | ||
337 | free_arrays(dest, a, b); | ||
338 | return TEST_ABORTED; | ||
339 | } | ||
340 | kernel_uints_add_cpu(dest, a, b, size); | ||
341 | verify_uints_addition(dest, a, b, size, "CPU"); | ||
342 | free_arrays(dest, a, b); | ||
343 | } | ||
344 | { | ||
345 | size_t size; | ||
346 | float *dest, *a, *b; | ||
347 | if (allocate_random_float_arrays(&dest, &a, &b, &size) < 0) { | ||
348 | free_arrays(dest, a, b); | ||
349 | return TEST_ABORTED; | ||
350 | } | ||
351 | kernel_floats_add_cpu(dest, a, b, size); | ||
352 | verify_floats_addition(dest, a, b, size, "CPU"); | ||
353 | free_arrays(dest, a, b); | ||
354 | } | ||
355 | { | ||
356 | size_t size; | ||
357 | double *dest, *a, *b; | ||
358 | if (allocate_random_double_arrays(&dest, &a, &b, &size) < 0) { | ||
359 | free_arrays(dest, a, b); | ||
360 | return TEST_ABORTED; | ||
361 | } | ||
362 | kernel_doubles_add_cpu(dest, a, b, size); | ||
363 | verify_doubles_addition(dest, a, b, size, "CPU"); | ||
364 | free_arrays(dest, a, b); | ||
365 | } | ||
366 | return TEST_COMPLETED; | ||
367 | } | ||
368 | |||
369 | static int SDLCALL intrinsics_testMMX(void *arg) | ||
370 | { | ||
371 | if (SDL_HasMMX()) { | ||
372 | SDLTest_AssertCheck(true, "CPU of test machine has MMX support."); | ||
373 | #ifdef SDL_MMX_INTRINSICS | ||
374 | { | ||
375 | size_t size; | ||
376 | Uint32 *dest, *a, *b; | ||
377 | |||
378 | SDLTest_AssertCheck(true, "Test executable uses MMX intrinsics."); | ||
379 | if (allocate_random_uint_arrays(&dest, &a, &b, &size) < 0) { | ||
380 | free_arrays(dest, a, b); | ||
381 | return TEST_ABORTED; | ||
382 | } | ||
383 | kernel_uints_add_mmx(dest, a, b, size); | ||
384 | verify_uints_addition(dest, a, b, size, "MMX"); | ||
385 | free_arrays(dest, a, b); | ||
386 | |||
387 | return TEST_COMPLETED; | ||
388 | } | ||
389 | #else | ||
390 | SDLTest_AssertCheck(true, "Test executable does NOT use MMX intrinsics."); | ||
391 | #endif | ||
392 | } else { | ||
393 | SDLTest_AssertCheck(true, "CPU of test machine has NO MMX support."); | ||
394 | } | ||
395 | return TEST_SKIPPED; | ||
396 | } | ||
397 | |||
398 | static int SDLCALL intrinsics_testSSE(void *arg) | ||
399 | { | ||
400 | if (SDL_HasSSE()) { | ||
401 | SDLTest_AssertCheck(true, "CPU of test machine has SSE support."); | ||
402 | #ifdef SDL_SSE_INTRINSICS | ||
403 | { | ||
404 | size_t size; | ||
405 | float *dest, *a, *b; | ||
406 | |||
407 | SDLTest_AssertCheck(true, "Test executable uses SSE intrinsics."); | ||
408 | if (allocate_random_float_arrays(&dest, &a, &b, &size) < 0) { | ||
409 | free_arrays(dest, a, b); | ||
410 | return TEST_ABORTED; | ||
411 | } | ||
412 | kernel_floats_add_sse(dest, a, b, size); | ||
413 | verify_floats_addition(dest, a, b, size, "SSE"); | ||
414 | free_arrays(dest, a, b); | ||
415 | |||
416 | return TEST_COMPLETED; | ||
417 | } | ||
418 | #else | ||
419 | SDLTest_AssertCheck(true, "Test executable does NOT use SSE intrinsics."); | ||
420 | #endif | ||
421 | } else { | ||
422 | SDLTest_AssertCheck(true, "CPU of test machine has NO SSE support."); | ||
423 | } | ||
424 | return TEST_SKIPPED; | ||
425 | } | ||
426 | |||
427 | static int SDLCALL intrinsics_testSSE2(void *arg) | ||
428 | { | ||
429 | if (SDL_HasSSE2()) { | ||
430 | SDLTest_AssertCheck(true, "CPU of test machine has SSE2 support."); | ||
431 | #ifdef SDL_SSE2_INTRINSICS | ||
432 | { | ||
433 | size_t size; | ||
434 | double *dest, *a, *b; | ||
435 | |||
436 | SDLTest_AssertCheck(true, "Test executable uses SSE2 intrinsics."); | ||
437 | if (allocate_random_double_arrays(&dest, &a, &b, &size) < 0) { | ||
438 | free_arrays(dest, a, b); | ||
439 | return TEST_ABORTED; | ||
440 | } | ||
441 | kernel_doubles_add_sse2(dest, a, b, size); | ||
442 | verify_doubles_addition(dest, a, b, size, "SSE2"); | ||
443 | free_arrays(dest, a, b); | ||
444 | |||
445 | return TEST_COMPLETED; | ||
446 | } | ||
447 | #else | ||
448 | SDLTest_AssertCheck(true, "Test executable does NOT use SSE2 intrinsics."); | ||
449 | #endif | ||
450 | } else { | ||
451 | SDLTest_AssertCheck(true, "CPU of test machine has NO SSE2 support."); | ||
452 | } | ||
453 | return TEST_SKIPPED; | ||
454 | } | ||
455 | |||
456 | static int SDLCALL intrinsics_testSSE3(void *arg) | ||
457 | { | ||
458 | if (SDL_HasSSE3()) { | ||
459 | SDLTest_AssertCheck(true, "CPU of test machine has SSE3 support."); | ||
460 | #ifdef SDL_SSE3_INTRINSICS | ||
461 | { | ||
462 | size_t size; | ||
463 | Uint32 *dest, *a, *b; | ||
464 | |||
465 | SDLTest_AssertCheck(true, "Test executable uses SSE3 intrinsics."); | ||
466 | if (allocate_random_uint_arrays(&dest, &a, &b, &size) < 0) { | ||
467 | free_arrays(dest, a, b); | ||
468 | return TEST_ABORTED; | ||
469 | } | ||
470 | kernel_uints_add_sse3(dest, a, b, size); | ||
471 | verify_uints_addition(dest, a, b, size, "SSE3"); | ||
472 | free_arrays(dest, a, b); | ||
473 | |||
474 | return TEST_COMPLETED; | ||
475 | } | ||
476 | #else | ||
477 | SDLTest_AssertCheck(true, "Test executable does NOT use SSE3 intrinsics."); | ||
478 | #endif | ||
479 | } else { | ||
480 | SDLTest_AssertCheck(true, "CPU of test machine has NO SSE3 support."); | ||
481 | } | ||
482 | return TEST_SKIPPED; | ||
483 | } | ||
484 | |||
485 | static int SDLCALL intrinsics_testSSE4_1(void *arg) | ||
486 | { | ||
487 | if (SDL_HasSSE41()) { | ||
488 | SDLTest_AssertCheck(true, "CPU of test machine has SSE4.1 support."); | ||
489 | #ifdef SDL_SSE4_1_INTRINSICS | ||
490 | { | ||
491 | size_t size; | ||
492 | Uint32 *dest, *a, *b; | ||
493 | |||
494 | SDLTest_AssertCheck(true, "Test executable uses SSE4.1 intrinsics."); | ||
495 | if (allocate_random_uint_arrays(&dest, &a, &b, &size) < 0) { | ||
496 | free_arrays(dest, a, b); | ||
497 | return TEST_ABORTED; | ||
498 | } | ||
499 | kernel_uints_mul_sse4_1(dest, a, b, size); | ||
500 | verify_uints_multiplication(dest, a, b, size, "SSE4.1"); | ||
501 | free_arrays(dest, a, b); | ||
502 | |||
503 | return TEST_COMPLETED; | ||
504 | } | ||
505 | #else | ||
506 | SDLTest_AssertCheck(true, "Test executable does NOT use SSE4.1 intrinsics."); | ||
507 | #endif | ||
508 | } else { | ||
509 | SDLTest_AssertCheck(true, "CPU of test machine has NO SSE4.1 support."); | ||
510 | } | ||
511 | return TEST_SKIPPED; | ||
512 | } | ||
513 | |||
514 | static int SDLCALL intrinsics_testSSE4_2(void *arg) | ||
515 | { | ||
516 | if (SDL_HasSSE42()) { | ||
517 | SDLTest_AssertCheck(true, "CPU of test machine has SSE4.2 support."); | ||
518 | #ifdef SDL_SSE4_2_INTRINSICS | ||
519 | { | ||
520 | struct { | ||
521 | const char *input; | ||
522 | Uint32 crc32c; | ||
523 | } references[] = { | ||
524 | {"", 0x00000000}, | ||
525 | {"Hello world", 0x72b51f78}, | ||
526 | {"Simple DirectMedia Layer", 0x56f85341, }, | ||
527 | }; | ||
528 | size_t i; | ||
529 | |||
530 | SDLTest_AssertCheck(true, "Test executable uses SSE4.2 intrinsics."); | ||
531 | |||
532 | for (i = 0; i < SDL_arraysize(references); ++i) { | ||
533 | Uint32 actual = calculate_crc32c_sse4_2(references[i].input); | ||
534 | SDLTest_AssertCheck(actual == references[i].crc32c, "CRC32-C(\"%s\")=0x%08x, got 0x%08x", | ||
535 | references[i].input, references[i].crc32c, actual); | ||
536 | } | ||
537 | |||
538 | return TEST_COMPLETED; | ||
539 | } | ||
540 | #else | ||
541 | SDLTest_AssertCheck(true, "Test executable does NOT use SSE4.2 intrinsics."); | ||
542 | #endif | ||
543 | } else { | ||
544 | SDLTest_AssertCheck(true, "CPU of test machine has NO SSE4.2 support."); | ||
545 | } | ||
546 | return TEST_SKIPPED; | ||
547 | } | ||
548 | |||
549 | static int SDLCALL intrinsics_testAVX(void *arg) | ||
550 | { | ||
551 | if (SDL_HasAVX()) { | ||
552 | SDLTest_AssertCheck(true, "CPU of test machine has AVX support."); | ||
553 | #ifdef SDL_AVX_INTRINSICS | ||
554 | { | ||
555 | size_t size; | ||
556 | float *dest, *a, *b; | ||
557 | |||
558 | SDLTest_AssertCheck(true, "Test executable uses AVX intrinsics."); | ||
559 | if (allocate_random_float_arrays(&dest, &a, &b, &size) < 0) { | ||
560 | free_arrays(dest, a, b); | ||
561 | return TEST_ABORTED; | ||
562 | } | ||
563 | kernel_floats_add_avx(dest, a, b, size); | ||
564 | verify_floats_addition(dest, a, b, size, "AVX"); | ||
565 | free_arrays(dest, a, b); | ||
566 | |||
567 | return TEST_COMPLETED; | ||
568 | } | ||
569 | #else | ||
570 | SDLTest_AssertCheck(true, "Test executable does NOT use AVX intrinsics."); | ||
571 | #endif | ||
572 | } else { | ||
573 | SDLTest_AssertCheck(true, "CPU of test machine has NO AVX support."); | ||
574 | } | ||
575 | return TEST_SKIPPED; | ||
576 | } | ||
577 | |||
578 | static int SDLCALL intrinsics_testAVX2(void *arg) | ||
579 | { | ||
580 | if (SDL_HasAVX2()) { | ||
581 | SDLTest_AssertCheck(true, "CPU of test machine has AVX2 support."); | ||
582 | #ifdef SDL_AVX2_INTRINSICS | ||
583 | { | ||
584 | size_t size; | ||
585 | Uint32 *dest, *a, *b; | ||
586 | |||
587 | SDLTest_AssertCheck(true, "Test executable uses AVX2 intrinsics."); | ||
588 | if (allocate_random_uint_arrays(&dest, &a, &b, &size) < 0) { | ||
589 | free_arrays(dest, a, b); | ||
590 | return TEST_ABORTED; | ||
591 | } | ||
592 | kernel_uints_add_avx2(dest, a, b, size); | ||
593 | verify_uints_addition(dest, a, b, size, "AVX2"); | ||
594 | free_arrays(dest, a, b); | ||
595 | |||
596 | return TEST_COMPLETED; | ||
597 | } | ||
598 | #else | ||
599 | SDLTest_AssertCheck(true, "Test executable does NOT use AVX2 intrinsics."); | ||
600 | #endif | ||
601 | } else { | ||
602 | SDLTest_AssertCheck(true, "CPU of test machine has NO AVX2 support."); | ||
603 | } | ||
604 | return TEST_SKIPPED; | ||
605 | } | ||
606 | |||
607 | static int SDLCALL intrinsics_testAVX512F(void *arg) | ||
608 | { | ||
609 | if (SDL_HasAVX512F()) { | ||
610 | SDLTest_AssertCheck(true, "CPU of test machine has AVX512F support."); | ||
611 | #ifdef SDL_AVX512F_INTRINSICS | ||
612 | { | ||
613 | size_t size; | ||
614 | float *dest, *a, *b; | ||
615 | |||
616 | SDLTest_AssertCheck(true, "Test executable uses AVX512F intrinsics."); | ||
617 | if (allocate_random_float_arrays(&dest, &a, &b, &size) < 0) { | ||
618 | free_arrays(dest, a, b); | ||
619 | return TEST_ABORTED; | ||
620 | } | ||
621 | kernel_floats_add_avx512f(dest, a, b, size); | ||
622 | verify_floats_addition(dest, a, b, size, "AVX512F"); | ||
623 | free_arrays(dest, a, b); | ||
624 | |||
625 | return TEST_COMPLETED; | ||
626 | } | ||
627 | #else | ||
628 | SDLTest_AssertCheck(true, "Test executable does NOT use AVX512F intrinsics."); | ||
629 | #endif | ||
630 | } else { | ||
631 | SDLTest_AssertCheck(true, "CPU of test machine has NO AVX512F support."); | ||
632 | } | ||
633 | |||
634 | return TEST_SKIPPED; | ||
635 | } | ||
636 | |||
637 | /* ================= Test References ================== */ | ||
638 | |||
639 | /* Intrinsics test cases */ | ||
640 | |||
641 | static const SDLTest_TestCaseReference intrinsicsTest1 = { | ||
642 | intrinsics_selftest, "intrinsics_selftest", "Intrinsics testautomation selftest", TEST_ENABLED | ||
643 | }; | ||
644 | |||
645 | static const SDLTest_TestCaseReference intrinsicsTest2 = { | ||
646 | intrinsics_testMMX, "intrinsics_testMMX", "Tests MMX intrinsics", TEST_ENABLED | ||
647 | }; | ||
648 | |||
649 | static const SDLTest_TestCaseReference intrinsicsTest3 = { | ||
650 | intrinsics_testSSE, "intrinsics_testSSE", "Tests SSE intrinsics", TEST_ENABLED | ||
651 | }; | ||
652 | |||
653 | static const SDLTest_TestCaseReference intrinsicsTest4 = { | ||
654 | intrinsics_testSSE2, "intrinsics_testSSE2", "Tests SSE2 intrinsics", TEST_ENABLED | ||
655 | }; | ||
656 | |||
657 | static const SDLTest_TestCaseReference intrinsicsTest5 = { | ||
658 | intrinsics_testSSE3, "intrinsics_testSSE3", "Tests SSE3 intrinsics", TEST_ENABLED | ||
659 | }; | ||
660 | |||
661 | static const SDLTest_TestCaseReference intrinsicsTest6 = { | ||
662 | intrinsics_testSSE4_1, "intrinsics_testSSE4.1", "Tests SSE4.1 intrinsics", TEST_ENABLED | ||
663 | }; | ||
664 | |||
665 | static const SDLTest_TestCaseReference intrinsicsTest7 = { | ||
666 | intrinsics_testSSE4_2, "intrinsics_testSSE4.2", "Tests SSE4.2 intrinsics", TEST_ENABLED | ||
667 | }; | ||
668 | |||
669 | static const SDLTest_TestCaseReference intrinsicsTest8 = { | ||
670 | intrinsics_testAVX, "intrinsics_testAVX", "Tests AVX intrinsics", TEST_ENABLED | ||
671 | }; | ||
672 | |||
673 | static const SDLTest_TestCaseReference intrinsicsTest9 = { | ||
674 | intrinsics_testAVX2, "intrinsics_testAVX2", "Tests AVX2 intrinsics", TEST_ENABLED | ||
675 | }; | ||
676 | |||
677 | static const SDLTest_TestCaseReference intrinsicsTest10 = { | ||
678 | intrinsics_testAVX512F, "intrinsics_testAVX512F", "Tests AVX512F intrinsics", TEST_ENABLED | ||
679 | }; | ||
680 | |||
681 | /* Sequence of Platform test cases */ | ||
682 | static const SDLTest_TestCaseReference *platformTests[] = { | ||
683 | &intrinsicsTest1, | ||
684 | &intrinsicsTest2, | ||
685 | &intrinsicsTest3, | ||
686 | &intrinsicsTest4, | ||
687 | &intrinsicsTest5, | ||
688 | &intrinsicsTest6, | ||
689 | &intrinsicsTest7, | ||
690 | &intrinsicsTest8, | ||
691 | &intrinsicsTest9, | ||
692 | &intrinsicsTest10, | ||
693 | NULL | ||
694 | }; | ||
695 | |||
696 | /* Platform test suite (global) */ | ||
697 | SDLTest_TestSuiteReference intrinsicsTestSuite = { | ||
698 | "Intrinsics", | ||
699 | NULL, | ||
700 | platformTests, | ||
701 | NULL | ||
702 | }; | ||