Last active
March 7, 2017 16:24
-
-
Save ChiHsiang/d10a20d0f5eedd25e5d2c1a6da6d2a4e to your computer and use it in GitHub Desktop.
SIMD-avx
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #1 | |
| static inline | |
| void add_vector(const double *a, const double *b, double *out) | |
| { | |
| for (int i = 0; i < 3; i++) | |
| out[i] = a[i] + b[i]; | |
| } | |
| ================================================================================================== | |
| #2 | |
| static inline | |
| void add_vector(const double *a, const double *b, double *out) | |
| { | |
| __m256i mask = _mm256_set_epi64x(0x0000000000000000, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff); | |
| __m256d c = _mm256_maskload_pd(a, mask); | |
| __m256d d = _mm256_maskload_pd(b, mask); | |
| __m256d dst = _mm256_add_pd(c, d); | |
| _mm256_maskstore_pd(out, mask, dst); | |
| } | |
| ================================================================================================== | |
| #3 | |
| static inline | |
| void add_vector(const double *a, const double *b, double *out) | |
| { | |
| __m256d c = _mm256_loadu_pd(a); | |
| __m256d d = _mm256_loadu_pd(b); | |
| __m256d dst = _mm256_add_pd(c, d); | |
| _mm256_storeu_pd(out, dst); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment