Created
April 16, 2025 14:40
-
-
Save folkertdev/520d85934b51a1dcea85552b2f4a83fb to your computer and use it in GitHub Desktop.
Revisions
-
folkertdev created this gist
Apr 16, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,53 @@ const N: usize = 10000; fn main() { let mut args = std::env::args().skip(1); // skip the program name // just get a bunch of arbitrary values cheaply const INPUT: &[u8] = include_bytes!( "/home/folkertdev/.cargo/registry/cache/github.com-1ecc6299db9ec823/clap-4.2.7.crate" ); let (_, table, _) = unsafe { INPUT.align_to() }; let wsize = std::hint::black_box(42); match args.next().as_deref() { Some("scalar") => { for _ in 0..N { let mut table = table.to_vec(); scalar(&mut table, wsize) } } Some("simd") => { for _ in 0..N { let mut table = table.to_vec(); unsafe { simd(&mut table, wsize) } } } _ => unreachable!("Unexpected argument. Only 'scalar' or 'simd' are allowed."), } } pub fn scalar(table: &mut [u16], wsize: u16) { for m in table.iter_mut() { *m = m.saturating_sub(wsize); } } #[inline(always)] fn generic_slide_hash_chain<const N: usize>(table: &mut [u16], wsize: u16) { for chunk in table.chunks_exact_mut(N) { for m in chunk.iter_mut() { *m = m.saturating_sub(wsize); } } } #[cfg(target_arch = "x86_64")] #[target_feature(enable = "avx2")] unsafe fn simd(table: &mut [u16], wsize: u16) { // 64 means that 4 256-bit values can be processed per iteration. // That appear to be the optimal amount for avx2. generic_slide_hash_chain::<64>(table, wsize); } This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,23 @@ using https://github.com/andrewrk/poop ``` > poop "target/release/playground scalar" "target/release/playground simd" Benchmark 1 (277 runs): target/release/playground scalar measurement mean ± σ min … max outliers delta wall_time 18.0ms ± 540us 17.4ms … 23.6ms 17 ( 6%) 0% peak_rss 2.14MB ± 61.5KB 2.10MB … 2.23MB 0 ( 0%) 0% cpu_cycles 77.4M ± 2.15M 75.7M … 101M 16 ( 6%) 0% instructions 157M ± 292 157M … 157M 4 ( 1%) 0% cache_references 41.8M ± 862K 36.6M … 43.4M 29 (10%) 0% cache_misses 251K ± 62.7K 217K … 710K 50 (18%) 0% branch_misses 14.0K ± 28.8 13.9K … 14.1K 6 ( 2%) 0% Benchmark 2 (300 runs): target/release/playground simd measurement mean ± σ min … max outliers delta wall_time 16.6ms ± 328us 16.0ms … 19.0ms 17 ( 6%) ⚡ - 7.6% ± 0.4% peak_rss 2.15MB ± 63.5KB 2.10MB … 2.23MB 0 ( 0%) + 0.3% ± 0.5% cpu_cycles 71.3M ± 1.31M 70.2M … 81.4M 17 ( 6%) ⚡ - 8.0% ± 0.4% instructions 68.1M ± 281 68.1M … 68.1M 1 ( 0%) ⚡ - 56.7% ± 0.0% cache_references 39.6M ± 812K 31.9M … 44.1M 22 ( 7%) ⚡ - 5.4% ± 0.3% cache_misses 223K ± 52.9K 152K … 507K 64 (21%) ⚡ - 11.1% ± 3.8% branch_misses 14.1K ± 578 13.9K … 24.0K 5 ( 2%) + 0.2% ± 0.5% ```