Created
November 23, 2023 16:14
-
-
Save MasonProtter/0cb4971cce58e9929e075dd1717c74aa to your computer and use it in GitHub Desktop.
Revisions
-
MasonProtter created this gist
Nov 23, 2023 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,212 @@ julia> code_native(Tuple{Vector{Int}}) do v sum(v; init=0) end .text .file "#58" .globl "julia_#58_2730" # -- Begin function julia_#58_2730 .p2align 4, 0x90 .type "julia_#58_2730",@function "julia_#58_2730": # @"julia_#58_2730" ; ┌ @ REPL[24]:2 within `#58` # %bb.0: # %top push rbp ; │┌ @ reducedim.jl:996 within `sum` ; ││┌ @ reducedim.jl:996 within `#sum#828` ; │││┌ @ reducedim.jl:1000 within `_sum` ; ││││┌ @ reducedim.jl:1000 within `#_sum#830` ; │││││┌ @ reducedim.jl:1001 within `_sum` ; ││││││┌ @ reducedim.jl:1001 within `#_sum#831` ; │││││││┌ @ reducedim.jl:357 within `mapreduce` ; ││││││││┌ @ reducedim.jl:357 within `#mapreduce#821` ; │││││││││┌ @ reducedim.jl:362 within `_mapreduce_dim` ; ││││││││││┌ @ reduce.jl:44 within `mapfoldl_impl` ; │││││││││││┌ @ reduce.jl:48 within `foldl_impl` ; ││││││││││││┌ @ reduce.jl:56 within `_foldl_impl` ; │││││││││││││┌ @ array.jl:943 within `iterate` @ array.jl:943 ; ││││││││││││││┌ @ essentials.jl:10 within `length` mov rcx, qword ptr [rdi + 8] mov rbp, rsp ; ││││││││││││││└ ; ││││││││││││││┌ @ int.jl:520 within `<` @ int.jl:513 test rcx, rcx ; ││││││││││││││└ je .LBB0_1 # %bb.2: # %L19 ; ││││││││││││││┌ @ essentials.jl:13 within `getindex` mov r9, qword ptr [rdi] mov rax, qword ptr [r9] ; │││││││││││││└└ ; │││││││││││││ @ reduce.jl:60 within `_foldl_impl` ; │││││││││││││┌ @ array.jl:943 within `iterate` ; ││││││││││││││┌ @ int.jl:520 within `<` @ int.jl:513 cmp rcx, 1 ; ││││││││││││││└ je .LBB0_15 # %bb.3: # %L40.preheader lea r8, [rcx - 1] cmp r8, 16 jae .LBB0_5 # %bb.4: mov esi, 2 mov edx, 1 jmp .LBB0_13 .LBB0_1: xor eax, eax ; │└└└└└└└└└└└└└ pop rbp ret .LBB0_5: # %vector.ph ; │┌ @ reducedim.jl:996 within `sum` ; ││┌ @ reducedim.jl:996 within `#sum#828` ; │││┌ @ reducedim.jl:1000 within `_sum` ; ││││┌ @ reducedim.jl:1000 within `#_sum#830` ; │││││┌ @ reducedim.jl:1001 within `_sum` ; ││││││┌ @ reducedim.jl:1001 within `#_sum#831` ; │││││││┌ @ reducedim.jl:357 within `mapreduce` ; ││││││││┌ @ reducedim.jl:357 within `#mapreduce#821` ; │││││││││┌ @ reducedim.jl:362 within `_mapreduce_dim` ; ││││││││││┌ @ reduce.jl:44 within `mapfoldl_impl` ; │││││││││││┌ @ reduce.jl:48 within `foldl_impl` ; ││││││││││││┌ @ reduce.jl:60 within `_foldl_impl` ; │││││││││││││┌ @ array.jl:943 within `iterate` mov rsi, r8 and rsi, -16 vmovq xmm0, rax lea rdx, [rsi - 16] mov rax, rdx shr rax, 4 inc rax mov r10d, eax and r10d, 7 cmp rdx, 112 jae .LBB0_7 # %bb.6: vpxor xmm1, xmm1, xmm1 xor edi, edi vpxor xmm2, xmm2, xmm2 vpxor xmm3, xmm3, xmm3 jmp .LBB0_9 .LBB0_7: # %vector.ph.new and rax, -8 lea rdx, [r9 + 1000] vpxor xmm1, xmm1, xmm1 xor edi, edi vpxor xmm2, xmm2, xmm2 vpxor xmm3, xmm3, xmm3 .p2align 4, 0x90 .LBB0_8: # %vector.body # =>This Inner Loop Header: Depth=1 ; │││││││││││││└ ; │││││││││││││ @ reduce.jl:62 within `_foldl_impl` ; │││││││││││││┌ @ reduce.jl:86 within `BottomRF` ; ││││││││││││││┌ @ reduce.jl:27 within `add_sum` ; │││││││││││││││┌ @ int.jl:87 within `+` vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 992] vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 960] vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 928] vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 896] vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 864] vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 832] vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 800] vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 768] vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 736] vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 704] vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 672] vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 640] vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 608] vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 576] vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 544] vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 512] vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 480] vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 448] vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 416] vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 384] vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 352] vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 320] vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 288] vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 256] vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 224] vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 192] vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 160] vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 128] vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 96] vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 64] vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 32] vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi] sub rdi, -128 add rax, -8 jne .LBB0_8 .LBB0_9: # %middle.block.unr-lcssa test r10, r10 je .LBB0_11 .p2align 4, 0x90 .LBB0_10: # %vector.body.epil # =>This Inner Loop Header: Depth=1 ; │││││││││││││└└└ ; │││││││││││││ @ reduce.jl:60 within `_foldl_impl` ; │││││││││││││┌ @ array.jl:943 within `iterate` lea rax, [8*rdi] add rdi, 16 ; ││││││││││││││┌ @ essentials.jl:13 within `getindex` or rax, 8 dec r10 ; │││││││││││││└└ ; │││││││││││││ @ reduce.jl:62 within `_foldl_impl` ; │││││││││││││┌ @ reduce.jl:86 within `BottomRF` ; ││││││││││││││┌ @ reduce.jl:27 within `add_sum` ; │││││││││││││││┌ @ int.jl:87 within `+` vpaddq ymm0, ymm0, ymmword ptr [r9 + rax] vpaddq ymm1, ymm1, ymmword ptr [r9 + rax + 32] vpaddq ymm2, ymm2, ymmword ptr [r9 + rax + 64] vpaddq ymm3, ymm3, ymmword ptr [r9 + rax + 96] jne .LBB0_10 .LBB0_11: # %middle.block ; │││││││││││││└└└ ; │││││││││││││ @ reduce.jl:60 within `_foldl_impl` ; │││││││││││││┌ @ array.jl:943 within `iterate` vpaddq ymm1, ymm1, ymm3 vpaddq ymm0, ymm0, ymm2 vpaddq ymm0, ymm0, ymm1 vextracti128 xmm1, ymm0, 1 vpaddq xmm0, xmm0, xmm1 vpshufd xmm1, xmm0, 238 # xmm1 = xmm0[2,3,2,3] vpaddq xmm0, xmm0, xmm1 vmovq rax, xmm0 cmp r8, rsi je .LBB0_15 # %bb.12: lea rdx, [rsi + 1] or rsi, 2 .LBB0_13: # %scalar.ph sub rcx, rsi inc rcx .p2align 4, 0x90 .LBB0_14: # %L40 # =>This Inner Loop Header: Depth=1 ; │││││││││││││└ ; │││││││││││││ @ reduce.jl:62 within `_foldl_impl` ; │││││││││││││┌ @ reduce.jl:86 within `BottomRF` ; ││││││││││││││┌ @ reduce.jl:27 within `add_sum` ; │││││││││││││││┌ @ int.jl:87 within `+` add rax, qword ptr [r9 + 8*rdx] mov rdx, rsi ; │││││││││││││└└└ ; │││││││││││││ @ reduce.jl:60 within `_foldl_impl` ; │││││││││││││┌ @ array.jl:943 within `iterate` ; ││││││││││││││┌ @ int.jl:87 within `+` inc rsi ; ││││││││││││││└ ; ││││││││││││││┌ @ int.jl:520 within `<` @ int.jl:513 dec rcx ; ││││││││││││││└ jne .LBB0_14 .LBB0_15: # %L55 ; │└└└└└└└└└└└└└ pop rbp vzeroupper ret .Lfunc_end0: .size "julia_#58_2730", .Lfunc_end0-"julia_#58_2730" ; └ # -- End function .section ".note.GNU-stack","",@progbits