Created
October 19, 2020 05:08
-
-
Save randompast/742443c9be23b48ab120c78c9f67fd77 to your computer and use it in GitHub Desktop.
Revisions
-
randompast created this gist
Oct 19, 2020 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,174 @@ import time import cusignal as cs import numpy as np import cupy as cp from numba import cuda, njit, jit @njit def nbconv_1d3o(x,k): #numba_conv 1 dimension 3rd order y = np.zeros(x.size-k.shape[0]+1) for n in range(0, y.size): d = n+k.shape[0]-1 for i in range(k.shape[0]): for j in range(k.shape[1]): for l in range(k.shape[2]): # print(n, x[d-i] * x[d-j] * k[i,j], x[d-i], x[d-j], k[i,j]) y[n] += x[d-i] * x[d-j] * x[d-l] * k[i,j,l] return y @njit def nbconv_1d2o(x,k): #numba_conv 1 dimension 2nd order y = np.zeros(x.size-k.shape[0]+1) for n in range(0, y.size): d = n+k.shape[0]-1 for i in range(k.shape[0]): for j in range(k.shape[1]): # print(n, x[d-i] * x[d-j] * k[i,j], x[d-i], x[d-j], k[i,j]) y[n] += x[d-i] * x[d-j] * k[i,j] return y @njit def nbconv_1d1o(x,k): #numba_conv 1 dimension 1st order y = np.zeros(x.size-k.shape[0]+1) for n in range(0, y.size): d = n+k.shape[0]-1 for i in range(k.shape[0]): y[n] += x[d-i] * k[i] return y @cuda.jit def nbconv_1d3o_device(x,k,y): #numba_conv n = cuda.grid(1) if (0 <= n) and (n < y.size): d = n+k.shape[0]-1 for i in range(k.shape[0]): for j in range(k.shape[1]): for l in range(k.shape[2]): y[n] += x[d-i] * x[d-j] * x[d-l] * k[i,j,l] @cuda.jit def nbconv_1d2o_device(x,k,y): #numba_conv n = cuda.grid(1) if (0 <= n) and (n < y.size): d = n+k.shape[0]-1 for i in range(k.shape[0]): for j in range(k.shape[1]): y[n] += x[d-i] * x[d-j] * k[i,j] @cuda.jit def nbconv_1d1o_device(x,k,y): #numba_conv n = cuda.grid(1) if (0 <= n) and (n < y.size): d = n+k.shape[0]-1 for i in range(k.shape[0]): y[n] += x[d-i] * k[i] def test_nbc_f(f, x, k): l = x.size - k.shape[0] + 1 y = cp.zeros(l) th = 128 b = y.size//th+1 f[b,th](x, k, y) return y def test_1d_1o(): a = np.arange(5) b = np.arange(2)+1 c = cs.convolve(a,b, "valid") print("cs-1o", c.size, c) c = np.convolve(a,b, "valid") print("np-1o", c.size, c) c = nbconv_1d1o(a,b) print("nj-1o", c.size, c.astype(dtype=int)) c = test_nbc_f(nbconv_1d1o_device, a, b) print("cj-1o", c.size, c.astype(dtype=int)) def test_1d_2o(): a = np.arange(5) b = np.arange(4).reshape(2,2)+1 c = cs.convolve1d2o(a,b) #valid print("cs-2o", c.size, c) c = nbconv_1d2o(a,b) print("nj-2o", c.size, c.astype(dtype=int)) c = test_nbc_f(nbconv_1d2o_device, a, b) print("cj-2o", c.size, c.astype(dtype=int)) def test_1d_3o(): a = np.arange(5) b = np.arange(8).reshape(2,2,2)+1 c = cs.convolve1d3o(a,b) #valid print("cs-3o", c.size, c) c = nbconv_1d3o(a,b) print("nj-3o", c.size, c.astype(dtype=int)) c = test_nbc_f(nbconv_1d3o_device, a, b) print("cj-3o", c.size, c.astype(dtype=int)) def test_1d_1o2o3o(): test_1d_1o() test_1d_2o() test_1d_3o() def time_n(n,f,a,b): start = time.time() for i in range(n): c = f(a,b) elapsed = time.time() - start return elapsed def time_n_cuda(n,f,a,b): start = time.time() for i in range(n): c = test_nbc_f(f,a,b) elapsed = time.time() - start return elapsed def benchmark_1d2o(m,n,d): a = np.random.uniform(-1,1,(n)) b = np.random.uniform(-1,1,(d,d)) t = time_n(m,cs.convolve1d2o,a,b) print("cs-1d2o", t) t = time_n(m,nbconv_1d2o,a,b) print("nj-1d2o", t) t = time_n_cuda(m,nbconv_1d2o_device,a,b) print("cj-1d2o", t) def benchmark_1d3o(m,n,d): a = np.random.uniform(-1,1,(n)) b = np.random.uniform(-1,1,(d,d,d)) t = time_n(m,cs.convolve1d3o,a,b) print("cs-1d3o", t) t = time_n(m,nbconv_1d3o,a,b) print("nj-1d3o", t) t = time_n_cuda(m,nbconv_1d3o_device,a,b) print("cj-1d3o", t) if __name__ == "__main__": m, n, d = 50, 200, 50 print("first run") benchmark_1d2o(m,n,d) benchmark_1d3o(m,n,d) print("second run") benchmark_1d2o(m,n,d) benchmark_1d3o(m,n,d) test_1d_1o2o3o()