Skip to content

Instantly share code, notes, and snippets.

@cargopete
Created April 8, 2025 07:18
Show Gist options
  • Select an option

  • Save cargopete/8c82e051e53867639b4b2940dc65b9e0 to your computer and use it in GitHub Desktop.

Select an option

Save cargopete/8c82e051e53867639b4b2940dc65b9e0 to your computer and use it in GitHub Desktop.
Solution
use rand::Rng;
use rand::seq::SliceRandom;
use std::collections::{HashSet, BTreeSet};
use std::time::Instant;
use rand::distributions::{Distribution, Uniform};
/// optimized for different scenarios including huge (GB) data ranges
fn generate_unique_random_numbers(begin: i64, end: i64, count: usize) -> Vec<i64> {
let range_size = (end - begin + 1) as usize;
if count > range_size {
panic!("Cannot generate {} unique numbers in range [{}, {}]", count, begin, end);
}
let mut rng = rand::thread_rng();
// for small ranges, generate all numbers and shuffle
if range_size <= 10_000 && range_size <= 10 * count {
let mut all_numbers: Vec<i64> = (begin..=end).collect();
all_numbers.shuffle(&mut rng);
return all_numbers.into_iter().take(count).collect();
}
// for medium-sized tasks, use HashSet
else if count < 10_000_000 && range_size < i64::MAX as usize / 10 {
let mut unique_numbers = HashSet::with_capacity(count);
let range = Uniform::new_inclusive(begin, end);
while unique_numbers.len() < count {
unique_numbers.insert(range.sample(&mut rng));
}
return unique_numbers.into_iter().collect();
}
// for extremely large ranges with small count, use reservoir sampling
else if count < range_size / 1000 {
let mut result = Vec::with_capacity(count);
let range = Uniform::new_inclusive(begin, end);
// Initial population of the reservoir
let mut seen = BTreeSet::new();
while result.len() < count {
let num = range.sample(&mut rng);
if seen.insert(num) {
result.push(num);
}
}
return result;
}
// for large count from huge range, use Floyd's algorithm (selection sampling), this is what I couldn't remember in the interview
else {
let mut selected = BTreeSet::new();
let offset = end - range_size as i64 + 1;
for j in (range_size - count)..range_size {
let t = rng.gen_range(0..=j);
if !selected.contains(&t) {
selected.insert(t);
} else {
selected.insert(j);
}
}
return selected.into_iter().map(|x| x + begin).collect();
}
}
fn test_scenarios() {
{
// small range
let begin = 1;
let end = 10;
let count = 3;
let random_numbers = generate_unique_random_numbers(begin, end, count);
println!("Small range example: {:?}", random_numbers);
}
// medium range
{
let begin = 1;
let end = 1_000_000;
let count = 5;
let start = Instant::now();
let random_numbers = generate_unique_random_numbers(begin, end, count);
let duration = start.elapsed();
println!("Medium range (1 to 1M, 5 numbers): {:?}", random_numbers);
println!("Time taken: {:?}", duration);
}
// large range
/*
{
let begin = 1;
let end = 1_000_000_000;
let count = 10;
let start = Instant::now();
let random_numbers = generate_unique_random_numbers(begin, end, count);
let duration = start.elapsed();
println!("Large range (1 to 1B, 10 numbers): {:?}", random_numbers);
println!("Time taken: {:?}", duration);
}
*/
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment