#+BEGIN_SRC jupyter-julia using CUDA struct StaticString{N} <: AbstractString chars::NTuple{N, Char} end macro s_str(s) chars = tuple(collect(s)...) N = length(chars) esc(:(StaticString{$N}($chars))) end Base.String(s::StaticString) = String(collect(s.chars)) Base.show(io::IO, s::StaticString{N}) where {N} = print(io, "StaticString{$N}(\"$(String(s))\")") Base.:(*)(s1::StaticString{N}, s2::StaticString{M}) where {N, M} = StaticString{N + M}((s1.chars..., s2.chars...)) @device_code_sass cu([s"abc ", s"123 "]) .* cu([s"hello ", s"goodbye"]) #+END_SRC #+RESULTS: #+BEGIN_EXAMPLE // PTX CompilerJob of kernel broadcast_kernel(CUDA.CuKernelContext, CuDeviceArray{StaticString{11},1,CUDA.AS.Global}, Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64}},typeof(*),Tuple{Base.Broadcast.Extruded{CuDeviceArray{StaticString{4},1,CUDA.AS.Global},Tuple{Bool},Tuple{Int64}},Base.Broadcast.Extruded{CuDeviceArray{StaticString{7},1,CUDA.AS.Global},Tuple{Bool},Tuple{Int64}}}}, Int64) for sm_75 .headerflags @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM75 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM75)" .elftype @"ET_EXEC" //--------------------- .text._Z27julia_broadcast_kernel_386615CuKernelContext13CuDeviceArrayI12StaticStringILi11EELi1E6GlobalE11BroadcastedIv5TupleI5OneToI5Int64EE2__5TupleI8ExtrudedI13CuDeviceArrayI12StaticStringILi4EELi1E6GlobalE5TupleI4BoolE5TupleI5Int64EE8ExtrudedI13CuDeviceArrayI12StaticStringILi7EELi1E6GlobalE5TupleI4BoolE5TupleI5Int64EEEE5Int64 -------------------------- .section .text._Z27julia_broadcast_kernel_386615CuKernelContext13CuDeviceArrayI12StaticStringILi11EELi1E6GlobalE11BroadcastedIv5TupleI5OneToI5Int64EE2__5TupleI8ExtrudedI13CuDeviceArrayI12StaticStringILi4EELi1E6GlobalE5TupleI4BoolE5TupleI5Int64EE8ExtrudedI13CuDeviceArrayI12StaticStringILi7EELi1E6GlobalE5TupleI4BoolE5TupleI5Int64EEEE5Int64,"ax",@progbits .sectioninfo @"SHI_REGISTERS=40" .align 128 .global _Z27julia_broadcast_kernel_386615CuKernelContext13CuDeviceArrayI12StaticStringILi11EELi1E6GlobalE11BroadcastedIv5TupleI5OneToI5Int64EE2__5TupleI8ExtrudedI13CuDeviceArrayI12StaticStringILi4EELi1E6GlobalE5TupleI4BoolE5TupleI5Int64EE8ExtrudedI13CuDeviceArrayI12StaticStringILi7EELi1E6GlobalE5TupleI4BoolE5TupleI5Int64EEEE5Int64 .type _Z27julia_broadcast_kernel_386615CuKernelContext13CuDeviceArrayI12StaticStringILi11EELi1E6GlobalE11BroadcastedIv5TupleI5OneToI5Int64EE2__5TupleI8ExtrudedI13CuDeviceArrayI12StaticStringILi4EELi1E6GlobalE5TupleI4BoolE5TupleI5Int64EE8ExtrudedI13CuDeviceArrayI12StaticStringILi7EELi1E6GlobalE5TupleI4BoolE5TupleI5Int64EEEE5Int64,@function .size _Z27julia_broadcast_kernel_386615CuKernelContext13CuDeviceArrayI12StaticStringILi11EELi1E6GlobalE11BroadcastedIv5TupleI5OneToI5Int64EE2__5TupleI8ExtrudedI13CuDeviceArrayI12StaticStringILi4EELi1E6GlobalE5TupleI4BoolE5TupleI5Int64EE8ExtrudedI13CuDeviceArrayI12StaticStringILi7EELi1E6GlobalE5TupleI4BoolE5TupleI5Int64EEEE5Int64,(.L_27 - _Z27julia_broadcast_kernel_386615CuKernelContext13CuDeviceArrayI12StaticStringILi11EELi1E6GlobalE11BroadcastedIv5TupleI5OneToI5Int64EE2__5TupleI8ExtrudedI13CuDeviceArrayI12StaticStringILi4EELi1E6GlobalE5TupleI4BoolE5TupleI5Int64EE8ExtrudedI13CuDeviceArrayI12StaticStringILi7EELi1E6GlobalE5TupleI4BoolE5TupleI5Int64EEEE5Int64) .other _Z27julia_broadcast_kernel_386615CuKernelContext13CuDeviceArrayI12StaticStringILi11EELi1E6GlobalE11BroadcastedIv5TupleI5OneToI5Int64EE2__5TupleI8ExtrudedI13CuDeviceArrayI12StaticStringILi4EELi1E6GlobalE5TupleI4BoolE5TupleI5Int64EE8ExtrudedI13CuDeviceArrayI12StaticStringILi7EELi1E6GlobalE5TupleI4BoolE5TupleI5Int64EEEE5Int64,@"STO_CUDA_ENTRY STV_DEFAULT" _Z27julia_broadcast_kernel_386615CuKernelContext13CuDeviceArrayI12StaticStringILi11EELi1E6GlobalE11BroadcastedIv5TupleI5OneToI5Int64EE2__5TupleI8ExtrudedI13CuDeviceArrayI12StaticStringILi4EELi1E6GlobalE5TupleI4BoolE5TupleI5Int64EE8ExtrudedI13CuDeviceArrayI12StaticStringILi7EELi1E6GlobalE5TupleI4BoolE5TupleI5Int64EEEE5Int64: .text._Z27julia_broadcast_kernel_386615CuKernelContext13CuDeviceArrayI12StaticStringILi11EELi1E6GlobalE11BroadcastedIv5TupleI5OneToI5Int64EE2__5TupleI8ExtrudedI13CuDeviceArrayI12StaticStringILi4EELi1E6GlobalE5TupleI4BoolE5TupleI5Int64EE8ExtrudedI13CuDeviceArrayI12StaticStringILi7EELi1E6GlobalE5TupleI4BoolE5TupleI5Int64EEEE5Int64: IMAD.MOV.U32 R1, RZ, RZ, c[0x0][0x28] ; IMAD.MOV.U32 R0, RZ, RZ, c[0x0][0x1b8] ; IMAD.MOV.U32 R2, RZ, RZ, c[0x0][0x1bc] ; ISETP.GE.U32.AND P0, PT, R0, 0x1, PT ; ISETP.GE.AND.EX P0, PT, R2, RZ, PT, P0 ; @!P0 EXIT ; S2R R6, SR_TID.X ; ULDC.U8 UR4, c[0x0][0x180] ; ISETP.LT.U32.AND P0, PT, RZ, c[0x0][0x1b8], PT ; ULOP3.LUT UR4, UR4, 0xff, URZ, 0xc0, !UP7 ; S2R R9, SR_CTAID.X ; ULDC.U8 UR6, c[0x0][0x1a0] ; ISETP.GT.AND.EX P0, PT, R2, RZ, PT, P0 ; ULOP3.LUT UR6, UR6, 0xff, URZ, 0xc0, !UP7 ; ISETP.NE.AND P1, PT, RZ, UR4, PT ; SEL R0, RZ, c[0x0][0x1b8], !P0 ; SEL R2, RZ, c[0x0][0x1bc], !P0 ; IADD3 R6, R6, 0x1, RZ ; @!P1 BRA `(.L_2) ; IMAD.MOV.U32 R7, RZ, RZ, RZ ; UMOV UR4, URZ ; IMAD.MOV.U32 R8, RZ, RZ, c[0x0][0x168] ; ISETP.NE.AND P1, PT, RZ, UR6, PT ; IMAD.WIDE.U32 R6, R9, c[0x0][0x0], R6 ; MOV R9, c[0x0][0x16c] ; ISETP.GT.U32.AND P0, PT, R6.reuse, c[0x0][0x160], PT ; IADD3 R16, R7, UR4, RZ ; IADD3 R3, P2, R6, -0x1, RZ ; ISETP.GT.AND.EX P0, PT, R16.reuse, c[0x0][0x164], PT, P0 ; LEA R10, P3, R3.reuse, c[0x0][0x178], 0x4 ; IMAD.WIDE.U32 R8, R3, 0x2c, R8 ; IADD3.X R4, R16, -0x1, RZ, P2, !PT ; @P0 EXIT ; IMAD.MOV.U32 R5, RZ, RZ, c[0x0][0x0] ; LEA.HI.X R7, R3, c[0x0][0x17c], R4, 0x4, P3 ; IMAD R31, R4, 0x2c, RZ ; UMOV UR4, URZ ; IMAD.WIDE.U32 R4, R5, c[0x0][0xc], RZ ; IADD3 R14, P2, R8, 0x14, RZ ; IADD3 R10, P0, R10, 0x8, RZ ; IMAD.MOV.U32 R33, RZ, RZ, R6 ; IMAD.X R31, R9, 0x1, R31, P2 ; IADD3 R37, R5, UR4, RZ ; IMAD.X R7, RZ, RZ, R7, P0 ; SHF.L.U64.HI R35, R4, 0x4, R37 ; .L_3: SEL R3, R33, c[0x0][0x1a8], P1 ; IMAD.MOV.U32 R9, RZ, RZ, c[0x0][0x19c] ; MOV R8, c[0x0][0x198] ; SEL R6, R16, c[0x0][0x1ac], P1 ; IMAD.WIDE.U32 R8, R3, 0x1c, R8 ; IMAD R11, R6, 0x1c, RZ ; IMAD.MOV.U32 R6, RZ, RZ, R10 ; IMAD.IADD R9, R9, 0x1, R11 ; LDG.E.SYS R3, [R6+-0x8] ; LDG.E.SYS R11, [R6+-0x4] ; LDG.E.SYS R13, [R6] ; LDG.E.SYS R15, [R6+0x4] ; LDG.E.SYS R17, [R8+-0x1c] ; LDG.E.SYS R19, [R8+-0x18] ; LDG.E.SYS R21, [R8+-0x14] ; LDG.E.SYS R23, [R8+-0x10] ; LDG.E.SYS R25, [R8+-0xc] ; LDG.E.SYS R27, [R8+-0x8] ; LDG.E.SYS R29, [R8+-0x4] ; IADD3 R10, P0, R0, -0x1, RZ ; IADD3 R0, P2, R33, R4, RZ ; IADD3.X R12, R2, -0x1, RZ, P0, !PT ; ISETP.NE.U32.AND P0, PT, R10, RZ, PT ; IMAD.X R2, R16, 0x1, R37, P2 ; MOV R9, R31 ; IMAD.MOV.U32 R8, RZ, RZ, R14 ; ISETP.NE.AND.EX P0, PT, R12, RZ, PT, P0 ; STG.E.SYS [R8+-0x14], R3 ; STG.E.SYS [R8+-0x10], R11 ; STG.E.SYS [R8+-0xc], R13 ; LEA R3, P2, R4, R6, 0x4 ; STG.E.SYS [R8+-0x8], R15 ; STG.E.SYS [R8+-0x4], R17 ; STG.E.SYS [R8], R19 ; STG.E.SYS [R8+0x4], R21 ; STG.E.SYS [R8+0x8], R23 ; STG.E.SYS [R8+0xc], R25 ; STG.E.SYS [R8+0x10], R27 ; STG.E.SYS [R8+0x14], R29 ; @!P0 EXIT ; IMAD.MOV.U32 R33, RZ, RZ, R0 ; IMAD.MOV.U32 R16, RZ, RZ, R2 ; IMAD.WIDE.U32 R8, R4, 0x2c, R8 ; ISETP.GT.U32.AND P0, PT, R33, c[0x0][0x160], PT ; IMAD R31, R37, 0x2c, RZ ; ISETP.GT.AND.EX P0, PT, R16, c[0x0][0x164], PT, P0 ; IMAD.X R6, R7, 0x1, R35, P2 ; IMAD.MOV.U32 R0, RZ, RZ, R10 ; IADD3 R31, R9, R31, RZ ; IMAD.MOV.U32 R14, RZ, RZ, R8 ; MOV R7, R6 ; IMAD.MOV.U32 R2, RZ, RZ, R12 ; IMAD.MOV.U32 R10, RZ, RZ, R3 ; @!P0 BRA `(.L_3) ; EXIT ; .L_2: IMAD.MOV.U32 R7, RZ, RZ, RZ ; UMOV UR8, URZ ; ISETP.NE.AND P1, PT, RZ, UR6, PT ; ULDC UR5, c[0x0][0x188] ; IMAD.WIDE.U32 R8, R9, c[0x0][0x0], R6 ; ULDC UR4, c[0x0][0x178] ; ULEA UR4, UP0, UR5, UR4, 0x4 ; IMAD.MOV.U32 R6, RZ, RZ, c[0x0][0x168] ; ULDC UR7, c[0x0][0x18c] ; IMAD.MOV.U32 R7, RZ, RZ, c[0x0][0x16c] ; ISETP.GT.U32.AND P0, PT, R8, c[0x0][0x160], PT ; IADD3 R12, R9, UR8, RZ ; ULDC UR8, c[0x0][0x17c] ; IMAD.WIDE.U32 R6, R8, 0x2c, R6 ; ULEA.HI.X UR5, UR5, UR8, UR7, 0x4, UP0 ; ISETP.GT.AND.EX P0, PT, R12, c[0x0][0x164], PT, P0 ; @P0 EXIT ; IMAD.MOV.U32 R4, RZ, RZ, c[0x0][0x0] ; IADD3 R10, P0, R6, -0x18, RZ ; IMAD R29, R12, 0x2c, RZ ; BMOV.32.CLEAR RZ, B0 ; IMAD.WIDE.U32 R4, R4, c[0x0][0xc], RZ ; UMOV UR6, URZ ; BSSY B0, `(.L_4) ; IADD3.X R29, R7, -0x1, R29, P0, !PT ; MOV R31, R8 ; IADD3 R33, R5, UR6, RZ ; .L_5: SEL R3, R31, c[0x0][0x1a8], P1 ; IMAD.MOV.U32 R6, RZ, RZ, c[0x0][0x198] ; SEL R8, R12, c[0x0][0x1ac], P1 ; IMAD.MOV.U32 R7, RZ, RZ, c[0x0][0x19c] ; LDG.E.SYS R11, [UR4+-0x8] ; IMAD.WIDE.U32 R6, R3, 0x1c, R6 ; LDG.E.SYS R13, [UR4+-0x4] ; IMAD R9, R8, 0x1c, RZ ; LDG.E.SYS R3, [UR4+-0x10] ; IMAD.IADD R7, R7, 0x1, R9 ; LDG.E.SYS R9, [UR4+-0xc] ; LDG.E.SYS R15, [R6+-0x1c] ; LDG.E.SYS R17, [R6+-0x18] ; LDG.E.SYS R19, [R6+-0x14] ; LDG.E.SYS R21, [R6+-0x10] ; LDG.E.SYS R23, [R6+-0xc] ; LDG.E.SYS R25, [R6+-0x8] ; LDG.E.SYS R27, [R6+-0x4] ; IADD3 R8, P0, R0, -0x1, RZ ; IADD3.X R14, R2, -0x1, RZ, P0, !PT ; ISETP.NE.U32.AND P0, PT, R8, RZ, PT ; ISETP.NE.AND.EX P0, PT, R14, RZ, PT, P0 ; IMAD.MOV.U32 R6, RZ, RZ, R10 ; MOV R7, R29 ; IADD3 R0, P2, R31, R4, RZ ; IMAD.X R2, R12, 0x1, R33, P2 ; STG.E.SYS [R6+-0xc], R11 ; STG.E.SYS [R6+-0x8], R13 ; STG.E.SYS [R6+-0x14], R3 ; STG.E.SYS [R6+-0x10], R9 ; STG.E.SYS [R6+-0x4], R15 ; STG.E.SYS [R6], R17 ; STG.E.SYS [R6+0x4], R19 ; STG.E.SYS [R6+0x8], R21 ; STG.E.SYS [R6+0xc], R23 ; STG.E.SYS [R6+0x10], R25 ; STG.E.SYS [R6+0x14], R27 ; @!P0 EXIT ; IMAD.MOV.U32 R31, RZ, RZ, R0 ; IMAD.MOV.U32 R12, RZ, RZ, R2 ; IMAD.WIDE.U32 R6, R4, 0x2c, R6 ; ISETP.GT.U32.AND P0, PT, R31, c[0x0][0x160], PT ; IMAD R29, R33, 0x2c, RZ ; ISETP.GT.AND.EX P0, PT, R12, c[0x0][0x164], PT, P0 ; IMAD.MOV.U32 R0, RZ, RZ, R8 ; MOV R10, R6 ; IMAD.IADD R29, R29, 0x1, R7 ; IMAD.MOV.U32 R2, RZ, RZ, R14 ; @!P0 BRA `(.L_5) ; BSYNC B0 ; .L_4: EXIT ; .L_6: BRA `(.L_6); .L_27: #+END_EXAMPLE