// build with gcc -Ofast -g -lpthread -lblas -lrt openblas-bug478.c or // gcc -Ofast -g -lpthread -lopenblas -lrt openblas-bug478.c // run with OPENBLAS_NUM_THREADS=1 #include #include #include #include #include // number of loop for a 1x1 matrix. Change it if the test is // too slow on you computer. #define NLOOP 20e9 typedef struct { int matrix_size; int n_loop; int threaded; } BenchParam; void * dgemv_bench(BenchParam * param) { int i, n; n = param->n_loop; int size = param->matrix_size; double v = 1.01; int one = 1; double * A = calloc(size*size, sizeof(double)); double * x = calloc(size, sizeof(double)); double * y = calloc(size, sizeof(double)); for(i = 0; i < size; i++) y[i] = i; for(i = 0; i < size * size; i++) A[i] = i; for(i = 0; i < param->n_loop; i++) { dgemv_("N", &size, &size, &v, A, &size, y, &one, &v, y, &one); } if(param->threaded) pthread_exit(NULL); return NULL; } double thread_bench(int nloop, int nb_threads, int matrix_size, double reftime) { BenchParam param; pthread_t threads[nb_threads]; int t, rc; struct timespec tick, tock; param.matrix_size = matrix_size; clock_gettime(CLOCK_MONOTONIC, &tick); param.threaded = 1; for(t=0; t 0) { printf("Nb threads %d, matrix size %d, time %g, speedup %g\n", nb_threads, matrix_size, dt, reftime/dt); return reftime; } else { printf("Nb threads %d, matrix size %d, time %g\n", nb_threads, matrix_size, dt); return dt; } } int main(int argc, char * argv[]) { int i, j; struct timespec tick, tock; int nb_threads[5] = {1, 2, 4, 6, 12}; int matrix_sizes[5] = {20, 40, 60, 80, 200}; for(j = 0; j < 5; j++) { double ms = matrix_sizes[j]; double reftime = -1; for(i = 0; i < 5; i++) reftime = thread_bench((int)(NLOOP/(ms*ms)), nb_threads[i], ms, reftime); puts(""); } }