const N: usize = 10000; fn main() { let mut args = std::env::args().skip(1); // skip the program name // just get a bunch of arbitrary values cheaply const INPUT: &[u8] = include_bytes!( "/home/folkertdev/.cargo/registry/cache/github.com-1ecc6299db9ec823/clap-4.2.7.crate" ); let (_, table, _) = unsafe { INPUT.align_to() }; let wsize = std::hint::black_box(42); match args.next().as_deref() { Some("scalar") => { for _ in 0..N { let mut table = table.to_vec(); scalar(&mut table, wsize) } } Some("simd") => { for _ in 0..N { let mut table = table.to_vec(); unsafe { simd(&mut table, wsize) } } } _ => unreachable!("Unexpected argument. Only 'scalar' or 'simd' are allowed."), } } pub fn scalar(table: &mut [u16], wsize: u16) { for m in table.iter_mut() { *m = m.saturating_sub(wsize); } } #[inline(always)] fn generic_slide_hash_chain(table: &mut [u16], wsize: u16) { for chunk in table.chunks_exact_mut(N) { for m in chunk.iter_mut() { *m = m.saturating_sub(wsize); } } } #[cfg(target_arch = "x86_64")] #[target_feature(enable = "avx2")] unsafe fn simd(table: &mut [u16], wsize: u16) { // 64 means that 4 256-bit values can be processed per iteration. // That appear to be the optimal amount for avx2. generic_slide_hash_chain::<64>(table, wsize); }