Last active
December 17, 2015 10:58
-
-
Save trast/5598179 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <stdio.h> | |
| #include <float.h> | |
| #include <assert.h> | |
| #include <fenv.h> | |
| #include <math.h> | |
| /* | |
| * Yay C template programming! | |
| * | |
| * The problem here is that the C++ support for the vector types is | |
| * not complete as of GCC 4.7/4.8. It is slightly more complete for | |
| * C, so we use pure C here, at the cost of silly contortions. | |
| */ | |
| #define deftype_1(T, width, V) typedef T V __attribute__ ((vector_size (width))); | |
| #define deftype(typ, width) deftype_1(typ, width, v##width##typ) | |
| deftype(float, 8) | |
| deftype(float, 16) | |
| deftype(float, 32) | |
| deftype(double, 16) | |
| deftype(double, 32) | |
| v8float sqrt_v8float(v8float x) { | |
| v8float r; | |
| int i; | |
| for (i = 0; i < 2; i++) | |
| r[i] = sqrtf(x[i]); | |
| return r; | |
| } | |
| v16float sqrt_v16float(v16float x) { | |
| v16float r; | |
| #ifdef __SSE__ | |
| __asm__ __volatile__ ("SQRTPS %0, %1" : "=x"(r) : "xm"(x)); | |
| #else | |
| int i; | |
| for (i = 0; i < 4; i++) | |
| r[i] = sqrtf(x[i]); | |
| #endif | |
| return r; | |
| } | |
| v32float sqrt_v32float(v32float x) { | |
| v32float r; | |
| #ifdef __AVX__ | |
| __asm__ __volatile__ ("VSQRTPS %0, %1" : "=x"(r) : "xm"(x)); | |
| #else | |
| int i; | |
| for (i = 0; i < 8; i++) | |
| r[i] = sqrtf(x[i]); | |
| #endif | |
| return r; | |
| } | |
| v16double sqrt_v16double(v16double x) { | |
| v16double r; | |
| #ifdef __SSE__ | |
| __asm__ __volatile__ ("SQRTPD %0, %1" : "=x"(r) : "xm"(x)); | |
| #else | |
| int i; | |
| for (i = 0; i < 2; i++) | |
| r[i] = sqrt(x[i]); | |
| #endif | |
| return r; | |
| } | |
| v32double sqrt_v32double(v32double x) { | |
| v32double r; | |
| #ifdef __AVX__ | |
| __asm__ __volatile__ ("VSQRTPD %0, %1" : "=x"(r) : "xm"(x)); | |
| #else | |
| int i; | |
| for (i = 0; i < 4; i++) | |
| r[i] = sqrt(x[i]); | |
| #endif | |
| return r; | |
| } | |
| #define definetest_1(T, width, V, epsilon) \ | |
| \ | |
| void printvec_##V(int n, const char *prefix, V vec) \ | |
| { \ | |
| int i; \ | |
| printf("%s =", prefix); \ | |
| for (i = 0; i < n; i++) \ | |
| printf(" %a", vec[i]); \ | |
| putchar('\n'); \ | |
| } \ | |
| \ | |
| void dotest_##V(int roundingmode, const char *desc) \ | |
| { \ | |
| const int n = width / sizeof(T); \ | |
| int i; \ | |
| V eps; \ | |
| for (i = 0; i < n; i++) { \ | |
| eps[i] = epsilon; \ | |
| } \ | |
| V eps14 = eps/4.f; \ | |
| V eps12 = eps/2.f; \ | |
| V eps34 = 3.f*eps/4.f; \ | |
| V sqrteps = sqrt_##V(eps); \ | |
| V sqrteps14 = sqrt_##V(eps14); \ | |
| fesetround(roundingmode); \ | |
| printf("-- %s --\n", desc); \ | |
| printvec_##V(n, "eps = ", eps); \ | |
| printvec_##V(n, "eps14 = ", eps14); \ | |
| printvec_##V(n, "eps12 = ", eps12); \ | |
| printvec_##V(n, "eps34 = ", eps34); \ | |
| printvec_##V(n, "sqrteps = ", sqrteps); \ | |
| printvec_##V(n, "sqrteps14 = ", sqrteps14); \ | |
| printvec_##V(n, "1 + eps = ", 1.f + eps); \ | |
| printvec_##V(n, "1 - eps = ", 1.f - eps); \ | |
| printvec_##V(n, "1 + eps14 = ", 1.f + eps14); \ | |
| printvec_##V(n, "1 - eps14 = ", 1.f - eps14); \ | |
| printvec_##V(n, "1 + eps12 = ", 1.f + eps12); \ | |
| printvec_##V(n, "1 - eps12 = ", 1.f - eps12); \ | |
| printvec_##V(n, "1 + eps34 = ", 1.f + eps34); \ | |
| printvec_##V(n, "1 - eps34 = ", 1.f - eps34); \ | |
| printvec_##V(n, "-1 + eps = ", -1.f + eps); \ | |
| printvec_##V(n, "-1 - eps = ", -1.f - eps); \ | |
| printvec_##V(n, "-1 + eps14 = ", -1.f + eps14); \ | |
| printvec_##V(n, "-1 - eps14 = ", -1.f - eps14); \ | |
| printvec_##V(n, "-1 + eps12 = ", -1.f + eps12); \ | |
| printvec_##V(n, "-1 - eps12 = ", -1.f - eps12); \ | |
| printvec_##V(n, "-1 + eps34 = ", -1.f + eps34); \ | |
| printvec_##V(n, "-1 - eps34 = ", -1.f - eps34); \ | |
| printvec_##V(n, "(1 + sqrteps)^2 = ", (1+sqrteps)*(1+sqrteps)); \ | |
| printvec_##V(n, "(1 + sqrteps14)^2 = ", (1+sqrteps14)*(1+sqrteps14)); \ | |
| printvec_##V(n, "1 / (1+eps) = ", 1.f / (1+eps)); \ | |
| } | |
| #define definetest(typ, width, epsilon) definetest_1(typ, width, v##width##typ, epsilon) | |
| definetest(float, 8, FLT_EPSILON) | |
| definetest(float, 16, FLT_EPSILON) | |
| definetest(float, 32, FLT_EPSILON) | |
| definetest(double, 16, DBL_EPSILON) | |
| definetest(double, 32, DBL_EPSILON) | |
| #define dotest(mode, typ, width) dotest_v##width##typ(mode, #mode " " #width " " #typ) | |
| int main () | |
| { | |
| dotest(FE_TONEAREST, float, 8); | |
| dotest(FE_TONEAREST, float, 16); | |
| dotest(FE_TONEAREST, float, 32); | |
| dotest(FE_TONEAREST, double, 16); | |
| dotest(FE_TONEAREST, double, 32); | |
| dotest(FE_UPWARD, float, 8); | |
| dotest(FE_UPWARD, float, 16); | |
| dotest(FE_UPWARD, float, 32); | |
| dotest(FE_UPWARD, double, 16); | |
| dotest(FE_UPWARD, double, 32); | |
| dotest(FE_DOWNWARD, float, 8); | |
| dotest(FE_DOWNWARD, float, 16); | |
| dotest(FE_DOWNWARD, float, 32); | |
| dotest(FE_DOWNWARD, double, 16); | |
| dotest(FE_DOWNWARD, double, 32); | |
| dotest(FE_TOWARDZERO, float, 8); | |
| dotest(FE_TOWARDZERO, float, 16); | |
| dotest(FE_TOWARDZERO, float, 32); | |
| dotest(FE_TOWARDZERO, double, 16); | |
| dotest(FE_TOWARDZERO, double, 32); | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment