randompast · October 22, 2020 08:23 · Oct 22, 2020 · Oct 22, 2020 · Oct 22, 2020 · Oct 22, 2020
diff --git a/cusignal1d3o.py → cusignal1d3o_comparison.py b/cusignal1d3o.py → cusignal1d3o_comparison.py
diff --git a/cusignal1d3o → cusignal1d3o.py b/cusignal1d3o → cusignal1d3o.py
diff --git a/cusignal1d3o b/cusignal1d3o
@@ -0,0 +1,156 @@
+# (cusignal-dev) ~/Desktop/code/cs_fork/cusignal$ python3 discourse_2.py 
+# [-0.06169884 -0.07237074  0.08575766 -0.03845544  0.37396236]
+# [-0.06169884 -0.07237074  0.08575766 -0.03845544  0.37396236]
+# [-0.06169884 -0.07237074  0.08575766 -0.03845544  0.37396236]
+# [-0.06169884 -0.07237074  0.08575766 -0.03845544  0.37396236]
+# True True True
+# 481 -135.8448081001215
+# 481 -135.84480810012172
+# 481 -135.8448081001215
+# 481 -135.84480810012175
+# init		    0.22148	0.25764	0.15601	0.18100
+# size	1k-ops	cusig	njit	cuda_1	cuda_2
+#   10	 491	0.02558	0.07133	0.12641	0.11735
+#   20	3848	0.02641	0.51370	0.34600	0.31686
+#   30	12717	0.03262	1.67938	0.88682	0.86864
+#   40	29504	0.04509	4.28684	1.99569	2.01400
+#   50	56375	0.06790	7.96369	3.92720	3.92942
+
+
+import time
+from numba import cuda, njit, jit
+import numpy as np
+import cupy as cp
+import cusignal as cs
+
+import warnings
+warnings.filterwarnings('ignore')
+
+@cuda.jit
+def conv_1d3o_cuda_1(x,k,y): #convolution 1 dimension 3rd order
+    n = cuda.grid(1)
+    if (0 <= n) and (n < y.size):
+        d = n+k.shape[0]-1
+        for i in range(k.shape[0]):
+            for j in range(k.shape[1]):
+                for l in range(k.shape[2]):
+                    y[n] += x[d-i] * x[d-j] * x[d-l] * k[i,j,l]
+
+@cuda.jit
+def conv_1d3o_cuda_2(x,k,y): #convolution 1 dimension 3rd order
+    n = cuda.grid(1)
+    stride = cuda.gridsize(1)
+    for i in range(n, y.size, stride):
+        d = n+k.shape[0]-1
+        for i in range(k.shape[0]):
+            for j in range(k.shape[1]):
+                for l in range(k.shape[2]):
+                    cuda.atomic.add( y, n, x[d-i] * x[d-j] * x[d-l] * k[i,j,l] )
+
+@njit
+def conv_1d3o_njit(x,k,y): #convolution 1 dimension 3rd order
+    for n in range(0, y.size):
+        d = n+k.shape[0]-1
+        for i in range(k.shape[0]):
+            for j in range(k.shape[1]):
+                for l in range(k.shape[2]):
+                    y[n] += x[d-i] * x[d-j] * x[d-l] * k[i,j,l]
+
+
+def conv_1d3o_cj(f,x,k):
+    xc = cuda.to_device(x)
+    kc = cuda.to_device(k)
+    device_id = cp.cuda.Device()
+    numSM = device_id.attributes["MultiProcessorCount"]
+    th = (128, )
+    b = (numSM * 20,)
+    # y = cuda.device_array_like(x)
+    y = cp.zeros(x.size - k.shape[0] + 1)
+    f[b,th](xc, kc, y)
+    # return y.copy_to_host()[:-k.shape[0]+1]
+    return y
+
+def conv_1d3o_nj(x, k):
+    y = np.zeros(x.size-k.shape[0]+1)
+    conv_1d3o_njit(x,k,y)
+    return y
+
+def make_xKs(d, xsize):
+    np.random.seed(0)
+    x = np.random.uniform(-1,1,xsize)
+    k = np.random.uniform(-1,1,(d,d,d))
+    return x,k
+
+def test_time_1d_conv(n,f,x,k):
+    start = time.time()
+    for i in range(n):
+        y = f(x,k)
+    elapsed = time.time() - start
+    return y, elapsed
+
+def test_time_1d_conv_cuda(n,f,x,k):
+    start = time.time()
+    for i in range(n):
+        y = conv_1d3o_cj(f,x,k)
+    elapsed = time.time() - start
+    return y, elapsed
+
+def prime():
+    args = make_xKs(2, 5)
+    n = 1
+    y0, t0 = test_time_1d_conv(n, cs.convolve1d3o, *args)
+    y1, t1 = test_time_1d_conv(n, conv_1d3o_nj, *args)
+    y2, t2 = test_time_1d_conv_cuda(n, conv_1d3o_cuda_1, *args)
+    y3, t3 = test_time_1d_conv_cuda(n, conv_1d3o_cuda_2, *args)
+    return t0, t1, t2, t3
+
+def benchmark(n):
+    print("{:s}\t{:s}\t{:s}\t{:s}\t{:s}\t{:s}".format("size", "1k-ops", "cusig", "njit", "cuda_1", "cuda_2"))
+    for d in range(10,60,10):
+        args = make_xKs(d, 500)
+        x, k = args
+        ops = k.shape[0] * k.shape[1] * k.shape[2] * (x.size - k.shape[0] + 1) // 1000
+        y0, t0 = test_time_1d_conv(n, cs.convolve1d3o, *args)
+        y1, t1 = test_time_1d_conv(n, conv_1d3o_nj, *args)
+        y2, t2 = test_time_1d_conv_cuda(n, conv_1d3o_cuda_1, *args)
+        y3, t3 = test_time_1d_conv_cuda(n, conv_1d3o_cuda_2, *args)
+        print("{:4d}\t{:4d}\t{:0.5f}\t{:0.5f}\t{:0.5f}\t{:0.5f}".format(d, ops, t0, t1, t2, t3))
+
+def check_simple():
+    args = x, k = np.arange(5), np.arange(8).reshape(2,2,2)
+    args = x, k = make_xKs(4, 8)
+    y0 = cs.convolve1d3o(*args)
+    y1 = conv_1d3o_nj(*args)
+    y2 = conv_1d3o_cj(conv_1d3o_cuda_1, *args)
+    y3 = conv_1d3o_cj(conv_1d3o_cuda_2, *args)
+    # print(args[0])
+    # print(args[1])
+    print(y0)
+    print(y1)
+    print(y2)
+    print(y3)
+    c1 = np.all( np.isclose(y0, y1) )
+    c2 = np.all( np.isclose(y0, y2) )
+    c3 = np.all( np.isclose(y0, y3) )
+    print(c1, c2, c3)
+
+def check_consistancy():
+    args = make_xKs(20, 500)
+    y0 = cs.convolve1d3o(*args)
+    y1 = conv_1d3o_nj(*args)
+    y2 = conv_1d3o_cj(conv_1d3o_cuda_1, *args)
+    y3 = conv_1d3o_cj(conv_1d3o_cuda_2, *args)
+    print(y0.size, np.sum(y0))
+    print(y1.size, np.sum(y1))
+    print(y2.size, np.sum(y2))
+    print(y3.size, np.sum(y3))
+
+def check_sanity():
+    t0, t1 ,t2, t3 = prime()
+    check_simple()
+    check_consistancy()
+    print("init\t\t{:0.5f}\t{:0.5f}\t{:0.5f}\t{:0.5f}".format(t0, t1, t2, t3))
+
+if __name__ == '__main__':
+    check_sanity()
+    benchmark(100)
No results found