Skip to content

Instantly share code, notes, and snippets.

@copyleftdev
Created April 12, 2026 02:36
Show Gist options
  • Select an option

  • Save copyleftdev/b913cf69e079d3a6d5212e0a39dcea2b to your computer and use it in GitHub Desktop.

Select an option

Save copyleftdev/b913cf69e079d3a6d5212e0a39dcea2b to your computer and use it in GitHub Desktop.
Palimpsest: Seeded PRNG (CrawlSeed)
//! Core domain types shared across all crates.
use rand_chacha::ChaCha8Rng;
use rand_chacha::rand_core::SeedableRng;
use serde::{Deserialize, Serialize};
use url::Url;
use crate::hash::ContentHash;
use crate::time::CaptureInstant;
/// Seed for all deterministic operations in a crawl.
///
/// Law 1 (Determinism): All randomness flows from this seed through a seeded
/// PRNG. No `rand::thread_rng()`, no `OsRng`, no entropy sources in core paths.
/// Call [`CrawlSeed::rng()`] to get a deterministic `ChaCha8Rng`.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct CrawlSeed {
pub value: u64,
}
impl CrawlSeed {
/// Create a new seed from a `u64` value.
pub fn new(value: u64) -> Self {
Self { value }
}
/// Produce a deterministic PRNG from this seed.
///
/// Same seed value always yields the same RNG sequence.
/// This is the **only** sanctioned way to obtain randomness in the kernel.
pub fn rng(&self) -> ChaCha8Rng {
ChaCha8Rng::seed_from_u64(self.value)
}
/// Derive a child seed for a sub-operation (e.g., per-host frontier shard).
///
/// Deterministic: same parent seed + same index = same child seed.
pub fn derive(&self, index: u64) -> Self {
// Mix the parent seed with the index using BLAKE3 to avoid correlation.
let mut input = [0u8; 16];
input[..8].copy_from_slice(&self.value.to_le_bytes());
input[8..].copy_from_slice(&index.to_le_bytes());
let hash = blake3::hash(&input);
let b = hash.as_bytes();
Self {
value: u64::from_le_bytes([b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]]),
}
}
}
/// A target URL to be crawled, with its depth in the crawl graph and optional
/// parent artifact that discovered it.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct CrawlTarget {
pub url: Url,
pub depth: u32,
pub parent: Option<ContentHash>,
}
/// Unique identifier for a captured artifact, binding content hash to its
/// temporal and crawl context.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ArtifactId {
pub content_hash: ContentHash,
pub captured_at: CaptureInstant,
pub crawl_context: CrawlContextId,
}
/// Opaque identifier for a crawl execution context.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct CrawlContextId(pub u64);
#[cfg(test)]
mod tests {
use super::*;
use rand_chacha::rand_core::RngCore;
#[test]
fn test_seed_rng_same_seed_produces_identical_sequence() {
let seed = CrawlSeed::new(42);
let mut rng_a = seed.rng();
let mut rng_b = seed.rng();
let seq_a: Vec<u64> = (0..1000).map(|_| rng_a.next_u64()).collect();
let seq_b: Vec<u64> = (0..1000).map(|_| rng_b.next_u64()).collect();
assert_eq!(
seq_a, seq_b,
"same seed must produce identical RNG sequence"
);
}
#[test]
fn test_seed_rng_different_seeds_produce_different_sequences() {
let mut rng_a = CrawlSeed::new(1).rng();
let mut rng_b = CrawlSeed::new(2).rng();
let seq_a: Vec<u64> = (0..100).map(|_| rng_a.next_u64()).collect();
let seq_b: Vec<u64> = (0..100).map(|_| rng_b.next_u64()).collect();
assert_ne!(
seq_a, seq_b,
"different seeds must produce different sequences"
);
}
#[test]
fn test_seed_derive_is_deterministic() {
let parent = CrawlSeed::new(42);
let child_a = parent.derive(0);
let child_b = parent.derive(0);
assert_eq!(
child_a, child_b,
"same parent + same index = same child seed"
);
}
#[test]
fn test_seed_derive_different_indices_differ() {
let parent = CrawlSeed::new(42);
let child_0 = parent.derive(0);
let child_1 = parent.derive(1);
assert_ne!(
child_0, child_1,
"different indices must yield different child seeds"
);
}
#[test]
fn test_seed_derive_different_parents_differ() {
let child_a = CrawlSeed::new(1).derive(0);
let child_b = CrawlSeed::new(2).derive(0);
assert_ne!(
child_a, child_b,
"different parent seeds must yield different children"
);
}
#[test]
fn test_seed_derive_child_rng_is_independent() {
let parent = CrawlSeed::new(99);
let child = parent.derive(7);
let mut parent_rng = parent.rng();
let mut child_rng = child.rng();
let parent_vals: Vec<u64> = (0..100).map(|_| parent_rng.next_u64()).collect();
let child_vals: Vec<u64> = (0..100).map(|_| child_rng.next_u64()).collect();
assert_ne!(
parent_vals, child_vals,
"child RNG must differ from parent RNG"
);
}
#[test]
fn test_seed_is_copy() {
let seed = CrawlSeed::new(42);
let copy = seed; // Copy, not move
assert_eq!(seed, copy);
}
#[test]
fn test_seed_serialization_roundtrip() {
let seed = CrawlSeed::new(42);
let json = serde_json::to_string(&seed).unwrap();
let deserialized: CrawlSeed = serde_json::from_str(&json).unwrap();
assert_eq!(seed, deserialized);
}
#[test]
fn test_crawl_context_id_is_copy_and_eq() {
let id = CrawlContextId(1);
let copy = id;
assert_eq!(id, copy);
}
#[test]
fn test_crawl_target_equality() {
let target_a = CrawlTarget {
url: Url::parse("https://example.com").unwrap(),
depth: 0,
parent: None,
};
let target_b = CrawlTarget {
url: Url::parse("https://example.com").unwrap(),
depth: 0,
parent: None,
};
assert_eq!(target_a, target_b);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment