Last active
October 22, 2020 08:23
-
-
Save randompast/73b23a7d2560305be8bddb3a2b9f3a53 to your computer and use it in GitHub Desktop.
Revisions
-
randompast renamed this gist
Oct 22, 2020 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
randompast revised this gist
Oct 22, 2020 . No changes.There are no files selected for viewing
-
randompast renamed this gist
Oct 22, 2020 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
randompast created this gist
Oct 22, 2020 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,156 @@ # (cusignal-dev) ~/Desktop/code/cs_fork/cusignal$ python3 discourse_2.py # [-0.06169884 -0.07237074 0.08575766 -0.03845544 0.37396236] # [-0.06169884 -0.07237074 0.08575766 -0.03845544 0.37396236] # [-0.06169884 -0.07237074 0.08575766 -0.03845544 0.37396236] # [-0.06169884 -0.07237074 0.08575766 -0.03845544 0.37396236] # True True True # 481 -135.8448081001215 # 481 -135.84480810012172 # 481 -135.8448081001215 # 481 -135.84480810012175 # init 0.22148 0.25764 0.15601 0.18100 # size 1k-ops cusig njit cuda_1 cuda_2 # 10 491 0.02558 0.07133 0.12641 0.11735 # 20 3848 0.02641 0.51370 0.34600 0.31686 # 30 12717 0.03262 1.67938 0.88682 0.86864 # 40 29504 0.04509 4.28684 1.99569 2.01400 # 50 56375 0.06790 7.96369 3.92720 3.92942 import time from numba import cuda, njit, jit import numpy as np import cupy as cp import cusignal as cs import warnings warnings.filterwarnings('ignore') @cuda.jit def conv_1d3o_cuda_1(x,k,y): #convolution 1 dimension 3rd order n = cuda.grid(1) if (0 <= n) and (n < y.size): d = n+k.shape[0]-1 for i in range(k.shape[0]): for j in range(k.shape[1]): for l in range(k.shape[2]): y[n] += x[d-i] * x[d-j] * x[d-l] * k[i,j,l] @cuda.jit def conv_1d3o_cuda_2(x,k,y): #convolution 1 dimension 3rd order n = cuda.grid(1) stride = cuda.gridsize(1) for i in range(n, y.size, stride): d = n+k.shape[0]-1 for i in range(k.shape[0]): for j in range(k.shape[1]): for l in range(k.shape[2]): cuda.atomic.add( y, n, x[d-i] * x[d-j] * x[d-l] * k[i,j,l] ) @njit def conv_1d3o_njit(x,k,y): #convolution 1 dimension 3rd order for n in range(0, y.size): d = n+k.shape[0]-1 for i in range(k.shape[0]): for j in range(k.shape[1]): for l in range(k.shape[2]): y[n] += x[d-i] * x[d-j] * x[d-l] * k[i,j,l] def conv_1d3o_cj(f,x,k): xc = cuda.to_device(x) kc = cuda.to_device(k) device_id = cp.cuda.Device() numSM = device_id.attributes["MultiProcessorCount"] th = (128, ) b = (numSM * 20,) # y = cuda.device_array_like(x) y = cp.zeros(x.size - k.shape[0] + 1) f[b,th](xc, kc, y) # return y.copy_to_host()[:-k.shape[0]+1] return y def conv_1d3o_nj(x, k): y = np.zeros(x.size-k.shape[0]+1) conv_1d3o_njit(x,k,y) return y def make_xKs(d, xsize): np.random.seed(0) x = np.random.uniform(-1,1,xsize) k = np.random.uniform(-1,1,(d,d,d)) return x,k def test_time_1d_conv(n,f,x,k): start = time.time() for i in range(n): y = f(x,k) elapsed = time.time() - start return y, elapsed def test_time_1d_conv_cuda(n,f,x,k): start = time.time() for i in range(n): y = conv_1d3o_cj(f,x,k) elapsed = time.time() - start return y, elapsed def prime(): args = make_xKs(2, 5) n = 1 y0, t0 = test_time_1d_conv(n, cs.convolve1d3o, *args) y1, t1 = test_time_1d_conv(n, conv_1d3o_nj, *args) y2, t2 = test_time_1d_conv_cuda(n, conv_1d3o_cuda_1, *args) y3, t3 = test_time_1d_conv_cuda(n, conv_1d3o_cuda_2, *args) return t0, t1, t2, t3 def benchmark(n): print("{:s}\t{:s}\t{:s}\t{:s}\t{:s}\t{:s}".format("size", "1k-ops", "cusig", "njit", "cuda_1", "cuda_2")) for d in range(10,60,10): args = make_xKs(d, 500) x, k = args ops = k.shape[0] * k.shape[1] * k.shape[2] * (x.size - k.shape[0] + 1) // 1000 y0, t0 = test_time_1d_conv(n, cs.convolve1d3o, *args) y1, t1 = test_time_1d_conv(n, conv_1d3o_nj, *args) y2, t2 = test_time_1d_conv_cuda(n, conv_1d3o_cuda_1, *args) y3, t3 = test_time_1d_conv_cuda(n, conv_1d3o_cuda_2, *args) print("{:4d}\t{:4d}\t{:0.5f}\t{:0.5f}\t{:0.5f}\t{:0.5f}".format(d, ops, t0, t1, t2, t3)) def check_simple(): args = x, k = np.arange(5), np.arange(8).reshape(2,2,2) args = x, k = make_xKs(4, 8) y0 = cs.convolve1d3o(*args) y1 = conv_1d3o_nj(*args) y2 = conv_1d3o_cj(conv_1d3o_cuda_1, *args) y3 = conv_1d3o_cj(conv_1d3o_cuda_2, *args) # print(args[0]) # print(args[1]) print(y0) print(y1) print(y2) print(y3) c1 = np.all( np.isclose(y0, y1) ) c2 = np.all( np.isclose(y0, y2) ) c3 = np.all( np.isclose(y0, y3) ) print(c1, c2, c3) def check_consistancy(): args = make_xKs(20, 500) y0 = cs.convolve1d3o(*args) y1 = conv_1d3o_nj(*args) y2 = conv_1d3o_cj(conv_1d3o_cuda_1, *args) y3 = conv_1d3o_cj(conv_1d3o_cuda_2, *args) print(y0.size, np.sum(y0)) print(y1.size, np.sum(y1)) print(y2.size, np.sum(y2)) print(y3.size, np.sum(y3)) def check_sanity(): t0, t1 ,t2, t3 = prime() check_simple() check_consistancy() print("init\t\t{:0.5f}\t{:0.5f}\t{:0.5f}\t{:0.5f}".format(t0, t1, t2, t3)) if __name__ == '__main__': check_sanity() benchmark(100)