Created
April 8, 2025 07:18
-
-
Save cargopete/8c82e051e53867639b4b2940dc65b9e0 to your computer and use it in GitHub Desktop.
Solution
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| use rand::Rng; | |
| use rand::seq::SliceRandom; | |
| use std::collections::{HashSet, BTreeSet}; | |
| use std::time::Instant; | |
| use rand::distributions::{Distribution, Uniform}; | |
| /// optimized for different scenarios including huge (GB) data ranges | |
| fn generate_unique_random_numbers(begin: i64, end: i64, count: usize) -> Vec<i64> { | |
| let range_size = (end - begin + 1) as usize; | |
| if count > range_size { | |
| panic!("Cannot generate {} unique numbers in range [{}, {}]", count, begin, end); | |
| } | |
| let mut rng = rand::thread_rng(); | |
| // for small ranges, generate all numbers and shuffle | |
| if range_size <= 10_000 && range_size <= 10 * count { | |
| let mut all_numbers: Vec<i64> = (begin..=end).collect(); | |
| all_numbers.shuffle(&mut rng); | |
| return all_numbers.into_iter().take(count).collect(); | |
| } | |
| // for medium-sized tasks, use HashSet | |
| else if count < 10_000_000 && range_size < i64::MAX as usize / 10 { | |
| let mut unique_numbers = HashSet::with_capacity(count); | |
| let range = Uniform::new_inclusive(begin, end); | |
| while unique_numbers.len() < count { | |
| unique_numbers.insert(range.sample(&mut rng)); | |
| } | |
| return unique_numbers.into_iter().collect(); | |
| } | |
| // for extremely large ranges with small count, use reservoir sampling | |
| else if count < range_size / 1000 { | |
| let mut result = Vec::with_capacity(count); | |
| let range = Uniform::new_inclusive(begin, end); | |
| // Initial population of the reservoir | |
| let mut seen = BTreeSet::new(); | |
| while result.len() < count { | |
| let num = range.sample(&mut rng); | |
| if seen.insert(num) { | |
| result.push(num); | |
| } | |
| } | |
| return result; | |
| } | |
| // for large count from huge range, use Floyd's algorithm (selection sampling), this is what I couldn't remember in the interview | |
| else { | |
| let mut selected = BTreeSet::new(); | |
| let offset = end - range_size as i64 + 1; | |
| for j in (range_size - count)..range_size { | |
| let t = rng.gen_range(0..=j); | |
| if !selected.contains(&t) { | |
| selected.insert(t); | |
| } else { | |
| selected.insert(j); | |
| } | |
| } | |
| return selected.into_iter().map(|x| x + begin).collect(); | |
| } | |
| } | |
| fn test_scenarios() { | |
| { | |
| // small range | |
| let begin = 1; | |
| let end = 10; | |
| let count = 3; | |
| let random_numbers = generate_unique_random_numbers(begin, end, count); | |
| println!("Small range example: {:?}", random_numbers); | |
| } | |
| // medium range | |
| { | |
| let begin = 1; | |
| let end = 1_000_000; | |
| let count = 5; | |
| let start = Instant::now(); | |
| let random_numbers = generate_unique_random_numbers(begin, end, count); | |
| let duration = start.elapsed(); | |
| println!("Medium range (1 to 1M, 5 numbers): {:?}", random_numbers); | |
| println!("Time taken: {:?}", duration); | |
| } | |
| // large range | |
| /* | |
| { | |
| let begin = 1; | |
| let end = 1_000_000_000; | |
| let count = 10; | |
| let start = Instant::now(); | |
| let random_numbers = generate_unique_random_numbers(begin, end, count); | |
| let duration = start.elapsed(); | |
| println!("Large range (1 to 1B, 10 numbers): {:?}", random_numbers); | |
| println!("Time taken: {:?}", duration); | |
| } | |
| */ | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment