Last active
September 29, 2023 14:00
-
-
Save mdmaas/d1b6b1a69a6b235143d7110237ff4ae8 to your computer and use it in GitHub Desktop.
Revisions
-
mdmaas revised this gist
Sep 17, 2023 . 1 changed file with 2 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -3,6 +3,8 @@ using Bumper using StrideArraysCore using StaticTools set_default_buffer_size!(1000) @inline function sumArray_alloc(N) smallarray = Array{Float64}(undef,N) @turbo for i ∈ 1:N -
mdmaas revised this gist
Sep 17, 2023 . 1 changed file with 12 additions and 42 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -3,14 +3,6 @@ using Bumper using StrideArraysCore using StaticTools @inline function sumArray_alloc(N) smallarray = Array{Float64}(undef,N) @turbo for i ∈ 1:N @@ -81,26 +73,6 @@ function test_bumper(N) end end function test_malloc(N) rep = 10000 x = 0.0 @@ -109,19 +81,17 @@ function test_malloc(N) end end function test_prealloc(N) rep = 10000 smallarray = Array{Float64}(undef,N) x = 0.0 for i ∈ 1:rep x = sumArray_prealloc(N,smallarray) end end using Libdl C_code = """ #include <stdlib.h> @@ -185,7 +155,7 @@ end time_sumCArray(N, REP) = @ccall "./libarray.so".timesumCArray(N::Cint, REP::Cint)::Cdouble test_c_timing(N) = time_sumCArray(N, 10000) Ns = 5:2:100 t_cheat = [(@elapsed test_cheat(N))*1e6 for N ∈ Ns] t_alloc = [(@elapsed test_alloc(N))*1e6 for N ∈ Ns] t_malloc = [(@elapsed test_malloc(N))*1e6 for N ∈ Ns] @@ -200,11 +170,11 @@ mean(x) = sum(x) / length(x) using Plots gr() # plot(Ns, t_alloc./t_cstack, label="Julia Arrays") plot(Ns, t_prealloc./t_cstack, label="Julia Pre-allocated Arrays") plot!(Ns, t_malloc./t_cstack, label="Julia MallocArrays") plot!(Ns, t_pre_malloc./t_cstack, label="Julia Pre-MallocArrays") plot!(Ns, t_bumper./t_cstack, label="Bumper+StrideArrays") plot!(Ns, t_cheap./t_cstack, label="C Heap Array") plot!(xlabel="N", ylabel="Cost wrt C stack-allocation") plot!(Ns, ones(size(Ns)), color=:black, label="C Stack Arrays") -
mdmaas revised this gist
Sep 17, 2023 . 1 changed file with 54 additions and 19 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,9 +1,17 @@ using LoopVectorization using Bumper using StrideArraysCore using StaticTools function sum_cheat(N) sum = 0.0 @turbo for i ∈ 1:N sum += 1.0 / i^2 end return sum end @inline function sumArray_alloc(N) smallarray = Array{Float64}(undef,N) @turbo for i ∈ 1:N smallarray[i] = 1.0 / i^2 @@ -15,7 +23,7 @@ function sumArray_alloc(N) return sum end @inline function sumArray_malloc(N) smallarray = MallocArray{Float64}(undef, N) @turbo for i ∈ 1:N smallarray[i] = 1.0 / i^2 @@ -28,7 +36,7 @@ function sumArray_malloc(N) return sum end @inline function sumArray_bumper(N) @no_escape begin smallarray = alloc(Float64, N) @turbo for i ∈ 1:N @@ -44,7 +52,7 @@ end @inline function sumArray_prealloc(N, smallarray) @turbo for i ∈ 1:N smallarray[i] = 1.0 / i^2 end @@ -55,6 +63,8 @@ function sumArray_prealloc(N, smallarray) return sum end function test_alloc(N) rep = 10000 x = 0.0 @@ -63,30 +73,52 @@ function test_alloc(N) end end function test_bumper(N) rep = 10000 x = 0.0 for i ∈ 1:rep x = sumArray_bumper(N) end end function test_prealloc(N) rep = 10000 smallarray = Array{Float64}(undef,N) x = 0.0 for i ∈ 1:rep x = sumArray_prealloc(N,smallarray) end end function test_cheat(N) rep = 10000 x = 0.0 for i ∈ 1:rep x = sum_cheat(N) end end function test_malloc(N) rep = 10000 x = 0.0 for i ∈ 1:rep x = sumArray_malloc(N) end end function test_pre_malloc(N) rep = 10000 smallarray = MallocArray{Float64}(undef,N) x = 0.0 for i ∈ 1:rep x = sumArray_prealloc(N,smallarray) end free(smallarray) end @@ -153,11 +185,13 @@ end time_sumCArray(N, REP) = @ccall "./libarray.so".timesumCArray(N::Cint, REP::Cint)::Cdouble test_c_timing(N) = time_sumCArray(N, 10000) Ns = [3^p for p in 2:10] t_cheat = [(@elapsed test_cheat(N))*1e6 for N ∈ Ns] t_alloc = [(@elapsed test_alloc(N))*1e6 for N ∈ Ns] t_malloc = [(@elapsed test_malloc(N))*1e6 for N ∈ Ns] t_bumper = [(@elapsed test_bumper(N))*1e6 for N ∈ Ns] t_prealloc = [(@elapsed test_prealloc(N))*1e6 for N ∈ Ns] t_pre_malloc = [(@elapsed test_pre_malloc(N))*1e6 for N ∈ Ns] t_cstack = [(@elapsed test_cstackarray(N))*1e6 for N ∈ Ns] t_cheap = [(@elapsed test_cheaparray(N))*1e6 for N ∈ Ns] @@ -166,10 +200,11 @@ mean(x) = sum(x) / length(x) using Plots gr() scatter(log10.(Ns), t_alloc./t_cstack, label="Julia Arrays") scatter!(log10.(Ns), t_prealloc./t_cstack, label="Julia Pre-allocated Arrays") scatter!(log10.(Ns), t_malloc./t_cstack, label="Julia MallocArrays") scatter!(log10.(Ns), t_pre_malloc./t_cstack, label="Julia Pre-MallocArrays") scatter!(log10.(Ns), t_bumper./t_cstack, label="Bumper+StrideArrays") scatter!(log10.(Ns), t_cheap./t_cstack, label="C Heap Array") plot!(xlabel="log10(N)", ylabel="Cost wrt C stack-allocation") plot!(log10.(Ns), ones(size(Ns)), color=:black, label="C Stack Arrays") -
mdmaas created this gist
Sep 15, 2023 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,175 @@ using LoopVectorization using Bumper using StrideArrays using StaticTools function sumArray_alloc(N) smallarray = Array{Float64}(undef,N) @turbo for i ∈ 1:N smallarray[i] = 1.0 / i^2 end sum = 0.0 @turbo for i ∈ 1:N sum += smallarray[i] end return sum end function sumArray_malloc(N) smallarray = MallocArray{Float64}(undef, N) @turbo for i ∈ 1:N smallarray[i] = 1.0 / i^2 end sum = 0.0 @turbo for i ∈ 1:N sum += smallarray[i] end free(smallarray) return sum end function sumArray_bumper(N) @no_escape begin smallarray = alloc(Float64, N) @turbo for i ∈ 1:N smallarray[i] = 1.0 / i^2 end sum = 0.0 @turbo for i ∈ 1:N sum += smallarray[i] end end return sum end function sumArray_prealloc(N, smallarray) @turbo for i ∈ 1:N smallarray[i] = 1.0 / i^2 end sum = 0.0 @turbo for i ∈ 1:N sum += smallarray[i] end return sum end function test_alloc(N) rep = 10000 x = 0.0 for i ∈ 1:rep x = sumArray_alloc(N) end end function test_malloc(N) rep = 10000 x = 0.0 for i ∈ 1:rep x = sumArray_malloc(N) end end function test_bumper(N) rep = 10000 x = 0.0 for i ∈ 1:rep x = sumArray_bumper(N) end end function test_prealloc(N) rep = 10000 smallarray = Array{Float64}(undef,N) x = 0.0 for i ∈ 1:rep x = sumArray_prealloc(N,smallarray) end end using Libdl C_code = """ #include <stdlib.h> #include <math.h> #include <omp.h> double sumCStackArray( int N ) { double smallarray[N]; for(unsigned int k = 0; k<N; k++){ smallarray[k] = 1.0/pow(k+1,2); } double sum = 0.0; #pragma omp simd reduction(+:sum) for(unsigned int k = 0; k<N; k++){ sum += smallarray[k]; } return sum; } double sumCHeapArray( int N ) { double * smallarray = malloc(N * sizeof(double)); for(unsigned int k = 0; k<N; k++){ smallarray[k] = 1.0/pow(k+1,2); } double sum = 0.0; #pragma omp simd reduction(+:sum) for(unsigned int k = 0; k<N; k++){ sum += smallarray[k]; } free(smallarray); return sum; } """ Clib = "libarray" open(`gcc -fPIC -O3 -fargument-noalias -fopenmp -xc -shared -o $(Clib * "." * Libdl.dlext) -`, "w") do f print(f, C_code) end sumCStackArray(N) = @ccall "./libarray.so".sumCStackArray(N::Cint)::Cdouble sumCHeapArray(N) = @ccall "./libarray.so".sumCHeapArray(N::Cint)::Cdouble function test_cstackarray(N) rep = 10000 x = 0.0 for i ∈ 1:rep x = sumCStackArray(N) end end function test_cheaparray(N) rep = 10000 x = 0.0 for i ∈ 1:rep x = sumCHeapArray(N) end end time_sumCArray(N, REP) = @ccall "./libarray.so".timesumCArray(N::Cint, REP::Cint)::Cdouble test_c_timing(N) = time_sumCArray(N, 10000) Ns = [2^p for p in 2:14] t_alloc = [(@elapsed test_alloc(N))*1e6 for N ∈ Ns] t_malloc = [(@elapsed test_malloc(N))*1e6 for N ∈ Ns] t_bumper = [(@elapsed test_bumper(N))*1e6 for N ∈ Ns] t_prealloc = [(@elapsed test_prealloc(N))*1e6 for N ∈ Ns] t_cstack = [(@elapsed test_cstackarray(N))*1e6 for N ∈ Ns] t_cheap = [(@elapsed test_cheaparray(N))*1e6 for N ∈ Ns] mean(x) = sum(x) / length(x) using Plots gr() scatter(log2.(Ns), t_alloc./t_cstack, label="Julia Arrays") scatter!(log2.(Ns), t_prealloc./t_cstack, label="Julia Pre-allocated Arrays") scatter!(log2.(Ns), t_malloc./t_cstack, label="Julia MallocArrays") scatter!(log2.(Ns), t_bumper./t_cstack, label="Bumper+StrideArrays") scatter!(log2.(Ns), t_cheap./t_cstack, label="C Heap Array") plot!(xlabel="log2(N)", ylabel="Cost wrt C stack-allocation") plot!(log2.(Ns), ones(size(Ns)), color=:black, label="C Stack Arrays")