Skip to content

Instantly share code, notes, and snippets.

@randompast
Last active October 22, 2020 08:23
Show Gist options
  • Select an option

  • Save randompast/73b23a7d2560305be8bddb3a2b9f3a53 to your computer and use it in GitHub Desktop.

Select an option

Save randompast/73b23a7d2560305be8bddb3a2b9f3a53 to your computer and use it in GitHub Desktop.

Revisions

  1. randompast renamed this gist Oct 22, 2020. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  2. randompast revised this gist Oct 22, 2020. No changes.
  3. randompast renamed this gist Oct 22, 2020. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  4. randompast created this gist Oct 22, 2020.
    156 changes: 156 additions & 0 deletions cusignal1d3o
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,156 @@
    # (cusignal-dev) ~/Desktop/code/cs_fork/cusignal$ python3 discourse_2.py
    # [-0.06169884 -0.07237074 0.08575766 -0.03845544 0.37396236]
    # [-0.06169884 -0.07237074 0.08575766 -0.03845544 0.37396236]
    # [-0.06169884 -0.07237074 0.08575766 -0.03845544 0.37396236]
    # [-0.06169884 -0.07237074 0.08575766 -0.03845544 0.37396236]
    # True True True
    # 481 -135.8448081001215
    # 481 -135.84480810012172
    # 481 -135.8448081001215
    # 481 -135.84480810012175
    # init 0.22148 0.25764 0.15601 0.18100
    # size 1k-ops cusig njit cuda_1 cuda_2
    # 10 491 0.02558 0.07133 0.12641 0.11735
    # 20 3848 0.02641 0.51370 0.34600 0.31686
    # 30 12717 0.03262 1.67938 0.88682 0.86864
    # 40 29504 0.04509 4.28684 1.99569 2.01400
    # 50 56375 0.06790 7.96369 3.92720 3.92942


    import time
    from numba import cuda, njit, jit
    import numpy as np
    import cupy as cp
    import cusignal as cs

    import warnings
    warnings.filterwarnings('ignore')

    @cuda.jit
    def conv_1d3o_cuda_1(x,k,y): #convolution 1 dimension 3rd order
    n = cuda.grid(1)
    if (0 <= n) and (n < y.size):
    d = n+k.shape[0]-1
    for i in range(k.shape[0]):
    for j in range(k.shape[1]):
    for l in range(k.shape[2]):
    y[n] += x[d-i] * x[d-j] * x[d-l] * k[i,j,l]

    @cuda.jit
    def conv_1d3o_cuda_2(x,k,y): #convolution 1 dimension 3rd order
    n = cuda.grid(1)
    stride = cuda.gridsize(1)
    for i in range(n, y.size, stride):
    d = n+k.shape[0]-1
    for i in range(k.shape[0]):
    for j in range(k.shape[1]):
    for l in range(k.shape[2]):
    cuda.atomic.add( y, n, x[d-i] * x[d-j] * x[d-l] * k[i,j,l] )

    @njit
    def conv_1d3o_njit(x,k,y): #convolution 1 dimension 3rd order
    for n in range(0, y.size):
    d = n+k.shape[0]-1
    for i in range(k.shape[0]):
    for j in range(k.shape[1]):
    for l in range(k.shape[2]):
    y[n] += x[d-i] * x[d-j] * x[d-l] * k[i,j,l]


    def conv_1d3o_cj(f,x,k):
    xc = cuda.to_device(x)
    kc = cuda.to_device(k)
    device_id = cp.cuda.Device()
    numSM = device_id.attributes["MultiProcessorCount"]
    th = (128, )
    b = (numSM * 20,)
    # y = cuda.device_array_like(x)
    y = cp.zeros(x.size - k.shape[0] + 1)
    f[b,th](xc, kc, y)
    # return y.copy_to_host()[:-k.shape[0]+1]
    return y

    def conv_1d3o_nj(x, k):
    y = np.zeros(x.size-k.shape[0]+1)
    conv_1d3o_njit(x,k,y)
    return y

    def make_xKs(d, xsize):
    np.random.seed(0)
    x = np.random.uniform(-1,1,xsize)
    k = np.random.uniform(-1,1,(d,d,d))
    return x,k

    def test_time_1d_conv(n,f,x,k):
    start = time.time()
    for i in range(n):
    y = f(x,k)
    elapsed = time.time() - start
    return y, elapsed

    def test_time_1d_conv_cuda(n,f,x,k):
    start = time.time()
    for i in range(n):
    y = conv_1d3o_cj(f,x,k)
    elapsed = time.time() - start
    return y, elapsed

    def prime():
    args = make_xKs(2, 5)
    n = 1
    y0, t0 = test_time_1d_conv(n, cs.convolve1d3o, *args)
    y1, t1 = test_time_1d_conv(n, conv_1d3o_nj, *args)
    y2, t2 = test_time_1d_conv_cuda(n, conv_1d3o_cuda_1, *args)
    y3, t3 = test_time_1d_conv_cuda(n, conv_1d3o_cuda_2, *args)
    return t0, t1, t2, t3

    def benchmark(n):
    print("{:s}\t{:s}\t{:s}\t{:s}\t{:s}\t{:s}".format("size", "1k-ops", "cusig", "njit", "cuda_1", "cuda_2"))
    for d in range(10,60,10):
    args = make_xKs(d, 500)
    x, k = args
    ops = k.shape[0] * k.shape[1] * k.shape[2] * (x.size - k.shape[0] + 1) // 1000
    y0, t0 = test_time_1d_conv(n, cs.convolve1d3o, *args)
    y1, t1 = test_time_1d_conv(n, conv_1d3o_nj, *args)
    y2, t2 = test_time_1d_conv_cuda(n, conv_1d3o_cuda_1, *args)
    y3, t3 = test_time_1d_conv_cuda(n, conv_1d3o_cuda_2, *args)
    print("{:4d}\t{:4d}\t{:0.5f}\t{:0.5f}\t{:0.5f}\t{:0.5f}".format(d, ops, t0, t1, t2, t3))

    def check_simple():
    args = x, k = np.arange(5), np.arange(8).reshape(2,2,2)
    args = x, k = make_xKs(4, 8)
    y0 = cs.convolve1d3o(*args)
    y1 = conv_1d3o_nj(*args)
    y2 = conv_1d3o_cj(conv_1d3o_cuda_1, *args)
    y3 = conv_1d3o_cj(conv_1d3o_cuda_2, *args)
    # print(args[0])
    # print(args[1])
    print(y0)
    print(y1)
    print(y2)
    print(y3)
    c1 = np.all( np.isclose(y0, y1) )
    c2 = np.all( np.isclose(y0, y2) )
    c3 = np.all( np.isclose(y0, y3) )
    print(c1, c2, c3)

    def check_consistancy():
    args = make_xKs(20, 500)
    y0 = cs.convolve1d3o(*args)
    y1 = conv_1d3o_nj(*args)
    y2 = conv_1d3o_cj(conv_1d3o_cuda_1, *args)
    y3 = conv_1d3o_cj(conv_1d3o_cuda_2, *args)
    print(y0.size, np.sum(y0))
    print(y1.size, np.sum(y1))
    print(y2.size, np.sum(y2))
    print(y3.size, np.sum(y3))

    def check_sanity():
    t0, t1 ,t2, t3 = prime()
    check_simple()
    check_consistancy()
    print("init\t\t{:0.5f}\t{:0.5f}\t{:0.5f}\t{:0.5f}".format(t0, t1, t2, t3))

    if __name__ == '__main__':
    check_sanity()
    benchmark(100)