Skip to content

Instantly share code, notes, and snippets.

@folkertdev
Created April 16, 2025 14:40
Show Gist options
  • Select an option

  • Save folkertdev/520d85934b51a1dcea85552b2f4a83fb to your computer and use it in GitHub Desktop.

Select an option

Save folkertdev/520d85934b51a1dcea85552b2f4a83fb to your computer and use it in GitHub Desktop.

Revisions

  1. folkertdev created this gist Apr 16, 2025.
    53 changes: 53 additions & 0 deletions main.rs
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,53 @@
    const N: usize = 10000;

    fn main() {
    let mut args = std::env::args().skip(1); // skip the program name

    // just get a bunch of arbitrary values cheaply
    const INPUT: &[u8] = include_bytes!(
    "/home/folkertdev/.cargo/registry/cache/github.com-1ecc6299db9ec823/clap-4.2.7.crate"
    );
    let (_, table, _) = unsafe { INPUT.align_to() };

    let wsize = std::hint::black_box(42);

    match args.next().as_deref() {
    Some("scalar") => {
    for _ in 0..N {
    let mut table = table.to_vec();
    scalar(&mut table, wsize)
    }
    }
    Some("simd") => {
    for _ in 0..N {
    let mut table = table.to_vec();
    unsafe { simd(&mut table, wsize) }
    }
    }
    _ => unreachable!("Unexpected argument. Only 'scalar' or 'simd' are allowed."),
    }
    }


    pub fn scalar(table: &mut [u16], wsize: u16) {
    for m in table.iter_mut() {
    *m = m.saturating_sub(wsize);
    }
    }

    #[inline(always)]
    fn generic_slide_hash_chain<const N: usize>(table: &mut [u16], wsize: u16) {
    for chunk in table.chunks_exact_mut(N) {
    for m in chunk.iter_mut() {
    *m = m.saturating_sub(wsize);
    }
    }
    }

    #[cfg(target_arch = "x86_64")]
    #[target_feature(enable = "avx2")]
    unsafe fn simd(table: &mut [u16], wsize: u16) {
    // 64 means that 4 256-bit values can be processed per iteration.
    // That appear to be the optimal amount for avx2.
    generic_slide_hash_chain::<64>(table, wsize);
    }
    23 changes: 23 additions & 0 deletions results.md
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,23 @@
    using https://github.com/andrewrk/poop

    ```
    > poop "target/release/playground scalar" "target/release/playground simd"
    Benchmark 1 (277 runs): target/release/playground scalar
    measurement mean ± σ min … max outliers delta
    wall_time 18.0ms ± 540us 17.4ms … 23.6ms 17 ( 6%) 0%
    peak_rss 2.14MB ± 61.5KB 2.10MB … 2.23MB 0 ( 0%) 0%
    cpu_cycles 77.4M ± 2.15M 75.7M … 101M 16 ( 6%) 0%
    instructions 157M ± 292 157M … 157M 4 ( 1%) 0%
    cache_references 41.8M ± 862K 36.6M … 43.4M 29 (10%) 0%
    cache_misses 251K ± 62.7K 217K … 710K 50 (18%) 0%
    branch_misses 14.0K ± 28.8 13.9K … 14.1K 6 ( 2%) 0%
    Benchmark 2 (300 runs): target/release/playground simd
    measurement mean ± σ min … max outliers delta
    wall_time 16.6ms ± 328us 16.0ms … 19.0ms 17 ( 6%) ⚡ - 7.6% ± 0.4%
    peak_rss 2.15MB ± 63.5KB 2.10MB … 2.23MB 0 ( 0%) + 0.3% ± 0.5%
    cpu_cycles 71.3M ± 1.31M 70.2M … 81.4M 17 ( 6%) ⚡ - 8.0% ± 0.4%
    instructions 68.1M ± 281 68.1M … 68.1M 1 ( 0%) ⚡ - 56.7% ± 0.0%
    cache_references 39.6M ± 812K 31.9M … 44.1M 22 ( 7%) ⚡ - 5.4% ± 0.3%
    cache_misses 223K ± 52.9K 152K … 507K 64 (21%) ⚡ - 11.1% ± 3.8%
    branch_misses 14.1K ± 578 13.9K … 24.0K 5 ( 2%) + 0.2% ± 0.5%
    ```