trast · December 17, 2015 10:58
diff --git a/gistfile1.c b/gistfile1.c
 #include <stdio.h>
 #include <float.h>
 #include <assert.h>
 #include <fenv.h>
 #include <math.h>

 /*
 * Yay C template programming!
 *
 * The problem here is that the C++ support for the vector types is
 * not complete as of GCC 4.7/4.8.  It is slightly more complete for
 * C, so we use pure C here, at the cost of silly contortions.
 */

 #define deftype_1(T, width, V) typedef T V __attribute__ ((vector_size (width)));
 #define deftype(typ, width) deftype_1(typ, width, v##width##typ)

 deftype(float, 8)
 deftype(float, 16)
 deftype(float, 32)
 deftype(double, 16)
 deftype(double, 32)

 v8float sqrt_v8float(v8float x) {
 	v8float r;
 	int i;
 	for (i = 0; i < 2; i++)
 		r[i] = sqrtf(x[i]);
 	return r;
 }
 v16float sqrt_v16float(v16float x) {
 	v16float r;
 #ifdef __SSE__
 	__asm__ __volatile__ ("SQRTPS %0, %1" : "=x"(r) : "xm"(x));
 #else
 	int i;
 	for (i = 0; i < 4; i++)
 		r[i] = sqrtf(x[i]);
 #endif
 	return r;
 }
 v32float sqrt_v32float(v32float x) {
 	v32float r;
 #ifdef __AVX__
 	__asm__ __volatile__ ("VSQRTPS %0, %1" : "=x"(r) : "xm"(x));
 #else
 	int i;
 	for (i = 0; i < 8; i++)
 		r[i] = sqrtf(x[i]);
 #endif
 	return r;
 }
 v16double sqrt_v16double(v16double x) {
 	v16double r;
 #ifdef __SSE__
 	__asm__ __volatile__ ("SQRTPD %0, %1" : "=x"(r) : "xm"(x));
 #else
 	int i;
 	for (i = 0; i < 2; i++)
 		r[i] = sqrt(x[i]);
 #endif
 	return r;
 }
 v32double sqrt_v32double(v32double x) {
 	v32double r;
 #ifdef __AVX__
 	__asm__ __volatile__ ("VSQRTPD %0, %1" : "=x"(r) : "xm"(x));
 #else
 	int i;
 	for (i = 0; i < 4; i++)
 		r[i] = sqrt(x[i]);
 #endif
 	return r;
 }

 #define definetest_1(T, width, V, epsilon)				\
 									\
 	void printvec_##V(int n, const char *prefix, V vec)		\
 	{								\
 		int i;							\
 		printf("%s =", prefix);					\
 		for (i = 0; i < n; i++)					\
 			printf(" %a", vec[i]);				\
 		putchar('\n');						\
 	}								\
 									\
 	void dotest_##V(int roundingmode, const char *desc)		\
 	{								\
 		const int n = width / sizeof(T);			\
 		int i;							\
 		V eps;							\
 		for (i = 0; i < n; i++) {				\
 			eps[i] = epsilon;				\
 		}							\
 		V eps14 = eps/4.f;					\
 		V eps12 = eps/2.f;					\
 		V eps34 = 3.f*eps/4.f;					\
 		V sqrteps = sqrt_##V(eps);				\
 		V sqrteps14 = sqrt_##V(eps14);				\
 		fesetround(roundingmode);				\
 		printf("-- %s --\n", desc);				\
 		printvec_##V(n, "eps = ", eps);				\
 		printvec_##V(n, "eps14 = ", eps14);			\
 		printvec_##V(n, "eps12 = ", eps12);			\
 		printvec_##V(n, "eps34 = ", eps34);			\
 		printvec_##V(n, "sqrteps = ", sqrteps);			\
 		printvec_##V(n, "sqrteps14 = ", sqrteps14);		\
 		printvec_##V(n, "1 + eps = ", 1.f + eps);		\
 		printvec_##V(n, "1 - eps = ", 1.f - eps);		\
 		printvec_##V(n, "1 + eps14 = ", 1.f + eps14);		\
 		printvec_##V(n, "1 - eps14 = ", 1.f - eps14);		\
 		printvec_##V(n, "1 + eps12 = ", 1.f + eps12);		\
 		printvec_##V(n, "1 - eps12 = ", 1.f - eps12);		\
 		printvec_##V(n, "1 + eps34 = ", 1.f + eps34);		\
 		printvec_##V(n, "1 - eps34 = ", 1.f - eps34);		\
 		printvec_##V(n, "-1 + eps = ", -1.f + eps);		\
 		printvec_##V(n, "-1 - eps = ", -1.f - eps);		\
 		printvec_##V(n, "-1 + eps14 = ", -1.f + eps14);		\
 		printvec_##V(n, "-1 - eps14 = ", -1.f - eps14);		\
 		printvec_##V(n, "-1 + eps12 = ", -1.f + eps12);		\
 		printvec_##V(n, "-1 - eps12 = ", -1.f - eps12);		\
 		printvec_##V(n, "-1 + eps34 = ", -1.f + eps34);		\
 		printvec_##V(n, "-1 - eps34 = ", -1.f - eps34);		\
 		printvec_##V(n, "(1 + sqrteps)^2 = ", (1+sqrteps)*(1+sqrteps)); \
 		printvec_##V(n, "(1 + sqrteps14)^2 = ", (1+sqrteps14)*(1+sqrteps14)); \
 		printvec_##V(n, "1 / (1+eps) = ", 1.f / (1+eps));	\
 	}

 #define definetest(typ, width, epsilon) definetest_1(typ, width, v##width##typ, epsilon)

 definetest(float, 8, FLT_EPSILON)
 definetest(float, 16, FLT_EPSILON)
 definetest(float, 32, FLT_EPSILON)
 definetest(double, 16, DBL_EPSILON)
 definetest(double, 32, DBL_EPSILON)

 #define dotest(mode, typ, width) dotest_v##width##typ(mode, #mode " " #width " " #typ)

 int main ()
 {
 	dotest(FE_TONEAREST, float, 8);
 	dotest(FE_TONEAREST, float, 16);
 	dotest(FE_TONEAREST, float, 32);
 	dotest(FE_TONEAREST, double, 16);
 	dotest(FE_TONEAREST, double, 32);

 	dotest(FE_UPWARD, float, 8);
 	dotest(FE_UPWARD, float, 16);
 	dotest(FE_UPWARD, float, 32);
 	dotest(FE_UPWARD, double, 16);
 	dotest(FE_UPWARD, double, 32);

 	dotest(FE_DOWNWARD, float, 8);
 	dotest(FE_DOWNWARD, float, 16);
 	dotest(FE_DOWNWARD, float, 32);
 	dotest(FE_DOWNWARD, double, 16);
 	dotest(FE_DOWNWARD, double, 32);

 	dotest(FE_TOWARDZERO, float, 8);
 	dotest(FE_TOWARDZERO, float, 16);
 	dotest(FE_TOWARDZERO, float, 32);
 	dotest(FE_TOWARDZERO, double, 16);
 	dotest(FE_TOWARDZERO, double, 32);

 	return 0;
 }
	#include <stdio.h>
	#include <float.h>
	#include <assert.h>
	#include <fenv.h>
	#include <math.h>

	/*
	* Yay C template programming!
	*
	* The problem here is that the C++ support for the vector types is
	* not complete as of GCC 4.7/4.8. It is slightly more complete for
	* C, so we use pure C here, at the cost of silly contortions.
	*/

	#define deftype_1(T, width, V) typedef T V __attribute__ ((vector_size (width)));
	#define deftype(typ, width) deftype_1(typ, width, v##width##typ)

	deftype(float, 8)
	deftype(float, 16)
	deftype(float, 32)
	deftype(double, 16)
	deftype(double, 32)

	v8float sqrt_v8float(v8float x) {
	v8float r;
	int i;
	for (i = 0; i < 2; i++)
	r[i] = sqrtf(x[i]);
	return r;
	}
	v16float sqrt_v16float(v16float x) {
	v16float r;
	#ifdef __SSE__
	__asm__ __volatile__ ("SQRTPS %0, %1" : "=x"(r) : "xm"(x));
	#else
	int i;
	for (i = 0; i < 4; i++)
	r[i] = sqrtf(x[i]);
	#endif
	return r;
	}
	v32float sqrt_v32float(v32float x) {
	v32float r;
	#ifdef __AVX__
	__asm__ __volatile__ ("VSQRTPS %0, %1" : "=x"(r) : "xm"(x));
	#else
	int i;
	for (i = 0; i < 8; i++)
	r[i] = sqrtf(x[i]);
	#endif
	return r;
	}
	v16double sqrt_v16double(v16double x) {
	v16double r;
	#ifdef __SSE__
	__asm__ __volatile__ ("SQRTPD %0, %1" : "=x"(r) : "xm"(x));
	#else
	int i;
	for (i = 0; i < 2; i++)
	r[i] = sqrt(x[i]);
	#endif
	return r;
	}
	v32double sqrt_v32double(v32double x) {
	v32double r;
	#ifdef __AVX__
	__asm__ __volatile__ ("VSQRTPD %0, %1" : "=x"(r) : "xm"(x));
	#else
	int i;
	for (i = 0; i < 4; i++)
	r[i] = sqrt(x[i]);
	#endif
	return r;
	}

	#define definetest_1(T, width, V, epsilon) \
	\
	void printvec_##V(int n, const char *prefix, V vec) \
	{ \
	int i; \
	printf("%s =", prefix); \
	for (i = 0; i < n; i++) \
	printf(" %a", vec[i]); \
	putchar('\n'); \
	} \
	\
	void dotest_##V(int roundingmode, const char *desc) \
	{ \
	const int n = width / sizeof(T); \
	int i; \
	V eps; \
	for (i = 0; i < n; i++) { \
	eps[i] = epsilon; \
	} \
	V eps14 = eps/4.f; \
	V eps12 = eps/2.f; \
	V eps34 = 3.f*eps/4.f; \
	V sqrteps = sqrt_##V(eps); \
	V sqrteps14 = sqrt_##V(eps14); \
	fesetround(roundingmode); \
	printf("-- %s --\n", desc); \
	printvec_##V(n, "eps = ", eps); \
	printvec_##V(n, "eps14 = ", eps14); \
	printvec_##V(n, "eps12 = ", eps12); \
	printvec_##V(n, "eps34 = ", eps34); \
	printvec_##V(n, "sqrteps = ", sqrteps); \
	printvec_##V(n, "sqrteps14 = ", sqrteps14); \
	printvec_##V(n, "1 + eps = ", 1.f + eps); \
	printvec_##V(n, "1 - eps = ", 1.f - eps); \
	printvec_##V(n, "1 + eps14 = ", 1.f + eps14); \
	printvec_##V(n, "1 - eps14 = ", 1.f - eps14); \
	printvec_##V(n, "1 + eps12 = ", 1.f + eps12); \
	printvec_##V(n, "1 - eps12 = ", 1.f - eps12); \
	printvec_##V(n, "1 + eps34 = ", 1.f + eps34); \
	printvec_##V(n, "1 - eps34 = ", 1.f - eps34); \
	printvec_##V(n, "-1 + eps = ", -1.f + eps); \
	printvec_##V(n, "-1 - eps = ", -1.f - eps); \
	printvec_##V(n, "-1 + eps14 = ", -1.f + eps14); \
	printvec_##V(n, "-1 - eps14 = ", -1.f - eps14); \
	printvec_##V(n, "-1 + eps12 = ", -1.f + eps12); \
	printvec_##V(n, "-1 - eps12 = ", -1.f - eps12); \
	printvec_##V(n, "-1 + eps34 = ", -1.f + eps34); \
	printvec_##V(n, "-1 - eps34 = ", -1.f - eps34); \
	printvec_##V(n, "(1 + sqrteps)^2 = ", (1+sqrteps)*(1+sqrteps)); \
	printvec_##V(n, "(1 + sqrteps14)^2 = ", (1+sqrteps14)*(1+sqrteps14)); \
	printvec_##V(n, "1 / (1+eps) = ", 1.f / (1+eps)); \
	}

	#define definetest(typ, width, epsilon) definetest_1(typ, width, v##width##typ, epsilon)

	definetest(float, 8, FLT_EPSILON)
	definetest(float, 16, FLT_EPSILON)
	definetest(float, 32, FLT_EPSILON)
	definetest(double, 16, DBL_EPSILON)
	definetest(double, 32, DBL_EPSILON)

	#define dotest(mode, typ, width) dotest_v##width##typ(mode, #mode " " #width " " #typ)

	int main ()
	{
	dotest(FE_TONEAREST, float, 8);
	dotest(FE_TONEAREST, float, 16);
	dotest(FE_TONEAREST, float, 32);
	dotest(FE_TONEAREST, double, 16);
	dotest(FE_TONEAREST, double, 32);

	dotest(FE_UPWARD, float, 8);
	dotest(FE_UPWARD, float, 16);
	dotest(FE_UPWARD, float, 32);
	dotest(FE_UPWARD, double, 16);
	dotest(FE_UPWARD, double, 32);

	dotest(FE_DOWNWARD, float, 8);
	dotest(FE_DOWNWARD, float, 16);
	dotest(FE_DOWNWARD, float, 32);
	dotest(FE_DOWNWARD, double, 16);
	dotest(FE_DOWNWARD, double, 32);

	dotest(FE_TOWARDZERO, float, 8);
	dotest(FE_TOWARDZERO, float, 16);
	dotest(FE_TOWARDZERO, float, 32);
	dotest(FE_TOWARDZERO, double, 16);
	dotest(FE_TOWARDZERO, double, 32);

	return 0;
	}
No results found