Skip to content

Instantly share code, notes, and snippets.

@MasonProtter
Created November 23, 2023 16:14
Show Gist options
  • Select an option

  • Save MasonProtter/0cb4971cce58e9929e075dd1717c74aa to your computer and use it in GitHub Desktop.

Select an option

Save MasonProtter/0cb4971cce58e9929e075dd1717c74aa to your computer and use it in GitHub Desktop.

Revisions

  1. MasonProtter created this gist Nov 23, 2023.
    212 changes: 212 additions & 0 deletions sum_v1_10
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,212 @@
    julia> code_native(Tuple{Vector{Int}}) do v
    sum(v; init=0)
    end
    .text
    .file "#58"
    .globl "julia_#58_2730" # -- Begin function julia_#58_2730
    .p2align 4, 0x90
    .type "julia_#58_2730",@function
    "julia_#58_2730": # @"julia_#58_2730"
    ; ┌ @ REPL[24]:2 within `#58`
    # %bb.0: # %top
    push rbp
    ; │┌ @ reducedim.jl:996 within `sum`
    ; ││┌ @ reducedim.jl:996 within `#sum#828`
    ; │││┌ @ reducedim.jl:1000 within `_sum`
    ; ││││┌ @ reducedim.jl:1000 within `#_sum#830`
    ; │││││┌ @ reducedim.jl:1001 within `_sum`
    ; ││││││┌ @ reducedim.jl:1001 within `#_sum#831`
    ; │││││││┌ @ reducedim.jl:357 within `mapreduce`
    ; ││││││││┌ @ reducedim.jl:357 within `#mapreduce#821`
    ; │││││││││┌ @ reducedim.jl:362 within `_mapreduce_dim`
    ; ││││││││││┌ @ reduce.jl:44 within `mapfoldl_impl`
    ; │││││││││││┌ @ reduce.jl:48 within `foldl_impl`
    ; ││││││││││││┌ @ reduce.jl:56 within `_foldl_impl`
    ; │││││││││││││┌ @ array.jl:943 within `iterate` @ array.jl:943
    ; ││││││││││││││┌ @ essentials.jl:10 within `length`
    mov rcx, qword ptr [rdi + 8]
    mov rbp, rsp
    ; ││││││││││││││└
    ; ││││││││││││││┌ @ int.jl:520 within `<` @ int.jl:513
    test rcx, rcx
    ; ││││││││││││││└
    je .LBB0_1
    # %bb.2: # %L19
    ; ││││││││││││││┌ @ essentials.jl:13 within `getindex`
    mov r9, qword ptr [rdi]
    mov rax, qword ptr [r9]
    ; │││││││││││││└└
    ; │││││││││││││ @ reduce.jl:60 within `_foldl_impl`
    ; │││││││││││││┌ @ array.jl:943 within `iterate`
    ; ││││││││││││││┌ @ int.jl:520 within `<` @ int.jl:513
    cmp rcx, 1
    ; ││││││││││││││└
    je .LBB0_15
    # %bb.3: # %L40.preheader
    lea r8, [rcx - 1]
    cmp r8, 16
    jae .LBB0_5
    # %bb.4:
    mov esi, 2
    mov edx, 1
    jmp .LBB0_13
    .LBB0_1:
    xor eax, eax
    ; │└└└└└└└└└└└└└
    pop rbp
    ret
    .LBB0_5: # %vector.ph
    ; │┌ @ reducedim.jl:996 within `sum`
    ; ││┌ @ reducedim.jl:996 within `#sum#828`
    ; │││┌ @ reducedim.jl:1000 within `_sum`
    ; ││││┌ @ reducedim.jl:1000 within `#_sum#830`
    ; │││││┌ @ reducedim.jl:1001 within `_sum`
    ; ││││││┌ @ reducedim.jl:1001 within `#_sum#831`
    ; │││││││┌ @ reducedim.jl:357 within `mapreduce`
    ; ││││││││┌ @ reducedim.jl:357 within `#mapreduce#821`
    ; │││││││││┌ @ reducedim.jl:362 within `_mapreduce_dim`
    ; ││││││││││┌ @ reduce.jl:44 within `mapfoldl_impl`
    ; │││││││││││┌ @ reduce.jl:48 within `foldl_impl`
    ; ││││││││││││┌ @ reduce.jl:60 within `_foldl_impl`
    ; │││││││││││││┌ @ array.jl:943 within `iterate`
    mov rsi, r8
    and rsi, -16
    vmovq xmm0, rax
    lea rdx, [rsi - 16]
    mov rax, rdx
    shr rax, 4
    inc rax
    mov r10d, eax
    and r10d, 7
    cmp rdx, 112
    jae .LBB0_7
    # %bb.6:
    vpxor xmm1, xmm1, xmm1
    xor edi, edi
    vpxor xmm2, xmm2, xmm2
    vpxor xmm3, xmm3, xmm3
    jmp .LBB0_9
    .LBB0_7: # %vector.ph.new
    and rax, -8
    lea rdx, [r9 + 1000]
    vpxor xmm1, xmm1, xmm1
    xor edi, edi
    vpxor xmm2, xmm2, xmm2
    vpxor xmm3, xmm3, xmm3
    .p2align 4, 0x90
    .LBB0_8: # %vector.body
    # =>This Inner Loop Header: Depth=1
    ; │││││││││││││└
    ; │││││││││││││ @ reduce.jl:62 within `_foldl_impl`
    ; │││││││││││││┌ @ reduce.jl:86 within `BottomRF`
    ; ││││││││││││││┌ @ reduce.jl:27 within `add_sum`
    ; │││││││││││││││┌ @ int.jl:87 within `+`
    vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 992]
    vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 960]
    vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 928]
    vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 896]
    vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 864]
    vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 832]
    vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 800]
    vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 768]
    vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 736]
    vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 704]
    vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 672]
    vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 640]
    vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 608]
    vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 576]
    vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 544]
    vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 512]
    vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 480]
    vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 448]
    vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 416]
    vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 384]
    vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 352]
    vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 320]
    vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 288]
    vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 256]
    vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 224]
    vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 192]
    vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 160]
    vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 128]
    vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 96]
    vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 64]
    vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 32]
    vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi]
    sub rdi, -128
    add rax, -8
    jne .LBB0_8
    .LBB0_9: # %middle.block.unr-lcssa
    test r10, r10
    je .LBB0_11
    .p2align 4, 0x90
    .LBB0_10: # %vector.body.epil
    # =>This Inner Loop Header: Depth=1
    ; │││││││││││││└└└
    ; │││││││││││││ @ reduce.jl:60 within `_foldl_impl`
    ; │││││││││││││┌ @ array.jl:943 within `iterate`
    lea rax, [8*rdi]
    add rdi, 16
    ; ││││││││││││││┌ @ essentials.jl:13 within `getindex`
    or rax, 8
    dec r10
    ; │││││││││││││└└
    ; │││││││││││││ @ reduce.jl:62 within `_foldl_impl`
    ; │││││││││││││┌ @ reduce.jl:86 within `BottomRF`
    ; ││││││││││││││┌ @ reduce.jl:27 within `add_sum`
    ; │││││││││││││││┌ @ int.jl:87 within `+`
    vpaddq ymm0, ymm0, ymmword ptr [r9 + rax]
    vpaddq ymm1, ymm1, ymmword ptr [r9 + rax + 32]
    vpaddq ymm2, ymm2, ymmword ptr [r9 + rax + 64]
    vpaddq ymm3, ymm3, ymmword ptr [r9 + rax + 96]
    jne .LBB0_10
    .LBB0_11: # %middle.block
    ; │││││││││││││└└└
    ; │││││││││││││ @ reduce.jl:60 within `_foldl_impl`
    ; │││││││││││││┌ @ array.jl:943 within `iterate`
    vpaddq ymm1, ymm1, ymm3
    vpaddq ymm0, ymm0, ymm2
    vpaddq ymm0, ymm0, ymm1
    vextracti128 xmm1, ymm0, 1
    vpaddq xmm0, xmm0, xmm1
    vpshufd xmm1, xmm0, 238 # xmm1 = xmm0[2,3,2,3]
    vpaddq xmm0, xmm0, xmm1
    vmovq rax, xmm0
    cmp r8, rsi
    je .LBB0_15
    # %bb.12:
    lea rdx, [rsi + 1]
    or rsi, 2
    .LBB0_13: # %scalar.ph
    sub rcx, rsi
    inc rcx
    .p2align 4, 0x90
    .LBB0_14: # %L40
    # =>This Inner Loop Header: Depth=1
    ; │││││││││││││└
    ; │││││││││││││ @ reduce.jl:62 within `_foldl_impl`
    ; │││││││││││││┌ @ reduce.jl:86 within `BottomRF`
    ; ││││││││││││││┌ @ reduce.jl:27 within `add_sum`
    ; │││││││││││││││┌ @ int.jl:87 within `+`
    add rax, qword ptr [r9 + 8*rdx]
    mov rdx, rsi
    ; │││││││││││││└└└
    ; │││││││││││││ @ reduce.jl:60 within `_foldl_impl`
    ; │││││││││││││┌ @ array.jl:943 within `iterate`
    ; ││││││││││││││┌ @ int.jl:87 within `+`
    inc rsi
    ; ││││││││││││││└
    ; ││││││││││││││┌ @ int.jl:520 within `<` @ int.jl:513
    dec rcx
    ; ││││││││││││││└
    jne .LBB0_14
    .LBB0_15: # %L55
    ; │└└└└└└└└└└└└└
    pop rbp
    vzeroupper
    ret
    .Lfunc_end0:
    .size "julia_#58_2730", .Lfunc_end0-"julia_#58_2730"
    ; └
    # -- End function
    .section ".note.GNU-stack","",@progbits