julia> code_native(Tuple{Vector{Int}}) do v sum(v; init=0) end .text .file "#58" .globl "julia_#58_2730" # -- Begin function julia_#58_2730 .p2align 4, 0x90 .type "julia_#58_2730",@function "julia_#58_2730": # @"julia_#58_2730" ; ┌ @ REPL[24]:2 within `#58` # %bb.0: # %top push rbp ; │┌ @ reducedim.jl:996 within `sum` ; ││┌ @ reducedim.jl:996 within `#sum#828` ; │││┌ @ reducedim.jl:1000 within `_sum` ; ││││┌ @ reducedim.jl:1000 within `#_sum#830` ; │││││┌ @ reducedim.jl:1001 within `_sum` ; ││││││┌ @ reducedim.jl:1001 within `#_sum#831` ; │││││││┌ @ reducedim.jl:357 within `mapreduce` ; ││││││││┌ @ reducedim.jl:357 within `#mapreduce#821` ; │││││││││┌ @ reducedim.jl:362 within `_mapreduce_dim` ; ││││││││││┌ @ reduce.jl:44 within `mapfoldl_impl` ; │││││││││││┌ @ reduce.jl:48 within `foldl_impl` ; ││││││││││││┌ @ reduce.jl:56 within `_foldl_impl` ; │││││││││││││┌ @ array.jl:943 within `iterate` @ array.jl:943 ; ││││││││││││││┌ @ essentials.jl:10 within `length` mov rcx, qword ptr [rdi + 8] mov rbp, rsp ; ││││││││││││││└ ; ││││││││││││││┌ @ int.jl:520 within `<` @ int.jl:513 test rcx, rcx ; ││││││││││││││└ je .LBB0_1 # %bb.2: # %L19 ; ││││││││││││││┌ @ essentials.jl:13 within `getindex` mov r9, qword ptr [rdi] mov rax, qword ptr [r9] ; │││││││││││││└└ ; │││││││││││││ @ reduce.jl:60 within `_foldl_impl` ; │││││││││││││┌ @ array.jl:943 within `iterate` ; ││││││││││││││┌ @ int.jl:520 within `<` @ int.jl:513 cmp rcx, 1 ; ││││││││││││││└ je .LBB0_15 # %bb.3: # %L40.preheader lea r8, [rcx - 1] cmp r8, 16 jae .LBB0_5 # %bb.4: mov esi, 2 mov edx, 1 jmp .LBB0_13 .LBB0_1: xor eax, eax ; │└└└└└└└└└└└└└ pop rbp ret .LBB0_5: # %vector.ph ; │┌ @ reducedim.jl:996 within `sum` ; ││┌ @ reducedim.jl:996 within `#sum#828` ; │││┌ @ reducedim.jl:1000 within `_sum` ; ││││┌ @ reducedim.jl:1000 within `#_sum#830` ; │││││┌ @ reducedim.jl:1001 within `_sum` ; ││││││┌ @ reducedim.jl:1001 within `#_sum#831` ; │││││││┌ @ reducedim.jl:357 within `mapreduce` ; ││││││││┌ @ reducedim.jl:357 within `#mapreduce#821` ; │││││││││┌ @ reducedim.jl:362 within `_mapreduce_dim` ; ││││││││││┌ @ reduce.jl:44 within `mapfoldl_impl` ; │││││││││││┌ @ reduce.jl:48 within `foldl_impl` ; ││││││││││││┌ @ reduce.jl:60 within `_foldl_impl` ; │││││││││││││┌ @ array.jl:943 within `iterate` mov rsi, r8 and rsi, -16 vmovq xmm0, rax lea rdx, [rsi - 16] mov rax, rdx shr rax, 4 inc rax mov r10d, eax and r10d, 7 cmp rdx, 112 jae .LBB0_7 # %bb.6: vpxor xmm1, xmm1, xmm1 xor edi, edi vpxor xmm2, xmm2, xmm2 vpxor xmm3, xmm3, xmm3 jmp .LBB0_9 .LBB0_7: # %vector.ph.new and rax, -8 lea rdx, [r9 + 1000] vpxor xmm1, xmm1, xmm1 xor edi, edi vpxor xmm2, xmm2, xmm2 vpxor xmm3, xmm3, xmm3 .p2align 4, 0x90 .LBB0_8: # %vector.body # =>This Inner Loop Header: Depth=1 ; │││││││││││││└ ; │││││││││││││ @ reduce.jl:62 within `_foldl_impl` ; │││││││││││││┌ @ reduce.jl:86 within `BottomRF` ; ││││││││││││││┌ @ reduce.jl:27 within `add_sum` ; │││││││││││││││┌ @ int.jl:87 within `+` vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 992] vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 960] vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 928] vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 896] vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 864] vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 832] vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 800] vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 768] vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 736] vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 704] vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 672] vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 640] vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 608] vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 576] vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 544] vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 512] vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 480] vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 448] vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 416] vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 384] vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 352] vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 320] vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 288] vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 256] vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 224] vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 192] vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 160] vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 128] vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 96] vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 64] vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 32] vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi] sub rdi, -128 add rax, -8 jne .LBB0_8 .LBB0_9: # %middle.block.unr-lcssa test r10, r10 je .LBB0_11 .p2align 4, 0x90 .LBB0_10: # %vector.body.epil # =>This Inner Loop Header: Depth=1 ; │││││││││││││└└└ ; │││││││││││││ @ reduce.jl:60 within `_foldl_impl` ; │││││││││││││┌ @ array.jl:943 within `iterate` lea rax, [8*rdi] add rdi, 16 ; ││││││││││││││┌ @ essentials.jl:13 within `getindex` or rax, 8 dec r10 ; │││││││││││││└└ ; │││││││││││││ @ reduce.jl:62 within `_foldl_impl` ; │││││││││││││┌ @ reduce.jl:86 within `BottomRF` ; ││││││││││││││┌ @ reduce.jl:27 within `add_sum` ; │││││││││││││││┌ @ int.jl:87 within `+` vpaddq ymm0, ymm0, ymmword ptr [r9 + rax] vpaddq ymm1, ymm1, ymmword ptr [r9 + rax + 32] vpaddq ymm2, ymm2, ymmword ptr [r9 + rax + 64] vpaddq ymm3, ymm3, ymmword ptr [r9 + rax + 96] jne .LBB0_10 .LBB0_11: # %middle.block ; │││││││││││││└└└ ; │││││││││││││ @ reduce.jl:60 within `_foldl_impl` ; │││││││││││││┌ @ array.jl:943 within `iterate` vpaddq ymm1, ymm1, ymm3 vpaddq ymm0, ymm0, ymm2 vpaddq ymm0, ymm0, ymm1 vextracti128 xmm1, ymm0, 1 vpaddq xmm0, xmm0, xmm1 vpshufd xmm1, xmm0, 238 # xmm1 = xmm0[2,3,2,3] vpaddq xmm0, xmm0, xmm1 vmovq rax, xmm0 cmp r8, rsi je .LBB0_15 # %bb.12: lea rdx, [rsi + 1] or rsi, 2 .LBB0_13: # %scalar.ph sub rcx, rsi inc rcx .p2align 4, 0x90 .LBB0_14: # %L40 # =>This Inner Loop Header: Depth=1 ; │││││││││││││└ ; │││││││││││││ @ reduce.jl:62 within `_foldl_impl` ; │││││││││││││┌ @ reduce.jl:86 within `BottomRF` ; ││││││││││││││┌ @ reduce.jl:27 within `add_sum` ; │││││││││││││││┌ @ int.jl:87 within `+` add rax, qword ptr [r9 + 8*rdx] mov rdx, rsi ; │││││││││││││└└└ ; │││││││││││││ @ reduce.jl:60 within `_foldl_impl` ; │││││││││││││┌ @ array.jl:943 within `iterate` ; ││││││││││││││┌ @ int.jl:87 within `+` inc rsi ; ││││││││││││││└ ; ││││││││││││││┌ @ int.jl:520 within `<` @ int.jl:513 dec rcx ; ││││││││││││││└ jne .LBB0_14 .LBB0_15: # %L55 ; │└└└└└└└└└└└└└ pop rbp vzeroupper ret .Lfunc_end0: .size "julia_#58_2730", .Lfunc_end0-"julia_#58_2730" ; └ # -- End function .section ".note.GNU-stack","",@progbits