Skip to content

Instantly share code, notes, and snippets.

@MasonProtter
Created November 23, 2023 16:15
Show Gist options
  • Select an option

  • Save MasonProtter/de3a6b98adff01f237453d61dc214f94 to your computer and use it in GitHub Desktop.

Select an option

Save MasonProtter/de3a6b98adff01f237453d61dc214f94 to your computer and use it in GitHub Desktop.

Revisions

  1. MasonProtter created this gist Nov 23, 2023.
    150 changes: 150 additions & 0 deletions sum_v1_9
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,150 @@
    julia> code_native(Tuple{Vector{Int}}) do v
    sum(v; init=0)
    end
    .text
    .file "#12"
    .globl "julia_#12_1605" # -- Begin function julia_#12_1605
    .p2align 4, 0x90
    .type "julia_#12_1605",@function
    "julia_#12_1605": # @"julia_#12_1605"
    ; ┌ @ REPL[10]:2 within `#12`
    .cfi_startproc
    # %bb.0: # %top
    pushq %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset %rbp, -16
    movq %rsp, %rbp
    .cfi_def_cfa_register %rbp
    ; │┌ @ reducedim.jl:994 within `sum`
    ; ││┌ @ reducedim.jl:994 within `#sum#808`
    ; │││┌ @ reducedim.jl:998 within `_sum`
    ; ││││┌ @ reducedim.jl:998 within `#_sum#810`
    ; │││││┌ @ reducedim.jl:999 within `_sum`
    ; ││││││┌ @ reducedim.jl:999 within `#_sum#811`
    ; │││││││┌ @ reducedim.jl:357 within `mapreduce`
    ; ││││││││┌ @ reducedim.jl:357 within `#mapreduce#801`
    ; │││││││││┌ @ reducedim.jl:362 within `_mapreduce_dim`
    ; ││││││││││┌ @ reduce.jl:44 within `mapfoldl_impl`
    ; │││││││││││┌ @ reduce.jl:48 within `foldl_impl`
    ; ││││││││││││┌ @ reduce.jl:56 within `_foldl_impl`
    ; │││││││││││││┌ @ array.jl:893 within `iterate` @ array.jl:893
    ; ││││││││││││││┌ @ essentials.jl:10 within `length`
    movq 8(%rdi), %rcx
    ; ││││││││││││││└
    ; ││││││││││││││┌ @ int.jl:494 within `<` @ int.jl:487
    testq %rcx, %rcx
    ; ││││││││││││││└
    je .LBB0_1
    # %bb.2: # %L17
    ; ││││││││││││││┌ @ essentials.jl:13 within `getindex`
    movq (%rdi), %rdx
    movq (%rdx), %rax
    ; │││││││││││││└└
    ; │││││││││││││ @ reduce.jl:60 within `_foldl_impl`
    ; │││││││││││││┌ @ array.jl:893 within `iterate`
    ; ││││││││││││││┌ @ int.jl:494 within `<` @ int.jl:487
    cmpq $1, %rcx
    ; ││││││││││││││└
    je .LBB0_10
    # %bb.3: # %L40.preheader
    leaq -1(%rcx), %r8
    cmpq $16, %r8
    jae .LBB0_5
    # %bb.4:
    movl $2, %esi
    movl $1, %edi
    jmp .LBB0_8
    .LBB0_1:
    xorl %eax, %eax
    ; │└└└└└└└└└└└└└
    popq %rbp
    .cfi_def_cfa %rsp, 8
    retq
    .LBB0_5: # %vector.ph
    ; │┌ @ reducedim.jl:994 within `sum`
    ; ││┌ @ reducedim.jl:994 within `#sum#808`
    ; │││┌ @ reducedim.jl:998 within `_sum`
    ; ││││┌ @ reducedim.jl:998 within `#_sum#810`
    ; │││││┌ @ reducedim.jl:999 within `_sum`
    ; ││││││┌ @ reducedim.jl:999 within `#_sum#811`
    ; │││││││┌ @ reducedim.jl:357 within `mapreduce`
    ; ││││││││┌ @ reducedim.jl:357 within `#mapreduce#801`
    ; │││││││││┌ @ reducedim.jl:362 within `_mapreduce_dim`
    ; ││││││││││┌ @ reduce.jl:44 within `mapfoldl_impl`
    ; │││││││││││┌ @ reduce.jl:48 within `foldl_impl`
    ; ││││││││││││┌ @ reduce.jl:60 within `_foldl_impl`
    ; │││││││││││││┌ @ array.jl:893 within `iterate`
    .cfi_def_cfa %rbp, 16
    movq %r8, %r9
    andq $-16, %r9
    vmovq %rax, %xmm0
    vpxor %xmm1, %xmm1, %xmm1
    xorl %eax, %eax
    vpxor %xmm2, %xmm2, %xmm2
    vpxor %xmm3, %xmm3, %xmm3
    leaq 1(%r9), %rdi
    leaq 2(%r9), %rsi
    .p2align 4, 0x90
    .LBB0_6: # %vector.body
    # =>This Inner Loop Header: Depth=1
    ; │││││││││││││└
    ; │││││││││││││ @ reduce.jl:62 within `_foldl_impl`
    ; │││││││││││││┌ @ reduce.jl:81 within `BottomRF`
    ; ││││││││││││││┌ @ reduce.jl:27 within `add_sum`
    ; │││││││││││││││┌ @ int.jl:87 within `+`
    vpaddq 8(%rdx,%rax,8), %ymm0, %ymm0
    vpaddq 40(%rdx,%rax,8), %ymm1, %ymm1
    vpaddq 72(%rdx,%rax,8), %ymm2, %ymm2
    vpaddq 104(%rdx,%rax,8), %ymm3, %ymm3
    addq $16, %rax
    cmpq %rax, %r9
    jne .LBB0_6
    # %bb.7: # %middle.block
    ; │││││││││││││└└└
    ; │││││││││││││ @ reduce.jl:60 within `_foldl_impl`
    ; │││││││││││││┌ @ array.jl:893 within `iterate`
    vpaddq %ymm0, %ymm1, %ymm0
    vpaddq %ymm0, %ymm2, %ymm0
    vpaddq %ymm0, %ymm3, %ymm0
    vextracti128 $1, %ymm0, %xmm1
    vpaddq %xmm1, %xmm0, %xmm0
    vpshufd $238, %xmm0, %xmm1 # xmm1 = xmm0[2,3,2,3]
    vpaddq %xmm1, %xmm0, %xmm0
    vmovq %xmm0, %rax
    cmpq %r9, %r8
    je .LBB0_10
    .LBB0_8: # %scalar.ph
    subq %rsi, %rcx
    incq %rcx
    .p2align 4, 0x90
    .LBB0_9: # %L40
    # =>This Inner Loop Header: Depth=1
    ; │││││││││││││└
    ; │││││││││││││ @ reduce.jl:62 within `_foldl_impl`
    ; │││││││││││││┌ @ reduce.jl:81 within `BottomRF`
    ; ││││││││││││││┌ @ reduce.jl:27 within `add_sum`
    ; │││││││││││││││┌ @ int.jl:87 within `+`
    addq (%rdx,%rdi,8), %rax
    movq %rsi, %rdi
    ; │││││││││││││└└└
    ; │││││││││││││ @ reduce.jl:60 within `_foldl_impl`
    ; │││││││││││││┌ @ array.jl:893 within `iterate`
    ; ││││││││││││││┌ @ int.jl:87 within `+`
    incq %rsi
    ; ││││││││││││││└
    ; ││││││││││││││┌ @ int.jl:494 within `<` @ int.jl:487
    decq %rcx
    ; ││││││││││││││└
    jne .LBB0_9
    .LBB0_10: # %L55
    ; │└└└└└└└└└└└└└
    popq %rbp
    .cfi_def_cfa %rsp, 8
    vzeroupper
    retq
    .Lfunc_end0:
    .size "julia_#12_1605", .Lfunc_end0-"julia_#12_1605"
    .cfi_endproc
    ; └
    # -- End function
    .section ".note.GNU-stack","",@progbits