Skip to content

Instantly share code, notes, and snippets.

@copyleftdev
Created April 12, 2026 02:36
Show Gist options
  • Select an option

  • Save copyleftdev/a7088215d904111c2b9d227a44a2561f to your computer and use it in GitHub Desktop.

Select an option

Save copyleftdev/a7088215d904111c2b9d227a44a2561f to your computer and use it in GitHub Desktop.
Palimpsest: ExecutionEnvelope (Sealed Context)
//! palimpsest-envelope: The execution envelope.
//!
//! Every fetch runs inside a sealed context capturing:
//! seed, timestamp, headers, DNS resolution snapshot, TLS fingerprint,
//! and browser config (if applicable).
//!
//! The envelope is immutable after construction. It is the critical
//! abstraction that makes both determinism (Law 1) and replay fidelity
//! (Law 5) possible.
pub mod browser;
pub mod dns;
pub mod tls;
use palimpsest_core::hash::ContentHash;
use palimpsest_core::time::CaptureInstant;
use palimpsest_core::types::CrawlSeed;
use serde::{Deserialize, Serialize};
use url::Url;
use crate::browser::BrowserConfig;
use crate::dns::DnsSnapshot;
use crate::tls::TlsFingerprint;
/// A sealed execution context for a single fetch operation.
///
/// Once constructed via [`EnvelopeBuilder`], an `ExecutionEnvelope` is immutable.
/// All fields are captured *before* the fetch begins and recorded alongside the
/// resulting artifacts. This is what makes deterministic replay possible:
/// given the same envelope, the fetch engine must produce identical artifacts.
///
/// Serialized as the first record in a WARC++ capture group.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ExecutionEnvelope {
/// The crawl seed that governs all deterministic decisions.
seed: CrawlSeed,
/// Wall-clock + logical timestamp at envelope creation time.
timestamp: CaptureInstant,
/// The target URL for this fetch.
target_url: Url,
/// Ordered HTTP request headers to be sent.
/// Uses `Vec` (not `HashMap`) to preserve insertion order — Law 1.
request_headers: Vec<(String, String)>,
/// DNS resolution snapshot captured before connecting.
dns_snapshot: DnsSnapshot,
/// TLS connection fingerprint. `None` for plain HTTP.
tls_fingerprint: Option<TlsFingerprint>,
/// Browser configuration. `None` for raw HTTP fetches.
browser_config: Option<BrowserConfig>,
/// BLAKE3 hash of the canonical JSON serialization of this envelope
/// (computed with this field set to a zero hash, then replaced).
content_hash: ContentHash,
}
impl ExecutionEnvelope {
pub fn seed(&self) -> CrawlSeed {
self.seed
}
pub fn timestamp(&self) -> CaptureInstant {
self.timestamp
}
pub fn target_url(&self) -> &Url {
&self.target_url
}
pub fn request_headers(&self) -> &[(String, String)] {
&self.request_headers
}
pub fn dns_snapshot(&self) -> &DnsSnapshot {
&self.dns_snapshot
}
pub fn tls_fingerprint(&self) -> Option<&TlsFingerprint> {
self.tls_fingerprint.as_ref()
}
pub fn browser_config(&self) -> Option<&BrowserConfig> {
self.browser_config.as_ref()
}
pub fn content_hash(&self) -> ContentHash {
self.content_hash
}
}
/// Errors that can occur when building an envelope.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EnvelopeError {
MissingSeed,
MissingTimestamp,
MissingTargetUrl,
MissingDnsSnapshot,
}
impl std::fmt::Display for EnvelopeError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::MissingSeed => write!(f, "envelope requires a CrawlSeed"),
Self::MissingTimestamp => write!(f, "envelope requires a CaptureInstant"),
Self::MissingTargetUrl => write!(f, "envelope requires a target URL"),
Self::MissingDnsSnapshot => write!(f, "envelope requires a DNS snapshot"),
}
}
}
impl std::error::Error for EnvelopeError {}
/// Builder for constructing an [`ExecutionEnvelope`].
///
/// Required fields: `seed`, `timestamp`, `target_url`, `dns_snapshot`.
/// Optional fields: `tls_fingerprint`, `browser_config`.
/// Headers default to empty if not set.
///
/// The content hash is computed automatically at build time from the
/// canonical JSON serialization.
#[derive(Debug, Default)]
pub struct EnvelopeBuilder {
seed: Option<CrawlSeed>,
timestamp: Option<CaptureInstant>,
target_url: Option<Url>,
request_headers: Vec<(String, String)>,
dns_snapshot: Option<DnsSnapshot>,
tls_fingerprint: Option<TlsFingerprint>,
browser_config: Option<BrowserConfig>,
}
impl EnvelopeBuilder {
pub fn new() -> Self {
Self::default()
}
pub fn seed(mut self, seed: CrawlSeed) -> Self {
self.seed = Some(seed);
self
}
pub fn timestamp(mut self, timestamp: CaptureInstant) -> Self {
self.timestamp = Some(timestamp);
self
}
pub fn target_url(mut self, url: Url) -> Self {
self.target_url = Some(url);
self
}
pub fn header(mut self, name: String, value: String) -> Self {
self.request_headers.push((name, value));
self
}
pub fn headers(mut self, headers: Vec<(String, String)>) -> Self {
self.request_headers = headers;
self
}
pub fn dns_snapshot(mut self, dns: DnsSnapshot) -> Self {
self.dns_snapshot = Some(dns);
self
}
pub fn tls_fingerprint(mut self, tls: TlsFingerprint) -> Self {
self.tls_fingerprint = Some(tls);
self
}
pub fn browser_config(mut self, config: BrowserConfig) -> Self {
self.browser_config = Some(config);
self
}
/// Consume the builder and produce a sealed [`ExecutionEnvelope`].
///
/// Computes the content hash from the canonical JSON serialization.
/// Returns an error if any required field is missing.
pub fn build(self) -> Result<ExecutionEnvelope, EnvelopeError> {
let seed = self.seed.ok_or(EnvelopeError::MissingSeed)?;
let timestamp = self.timestamp.ok_or(EnvelopeError::MissingTimestamp)?;
let target_url = self.target_url.ok_or(EnvelopeError::MissingTargetUrl)?;
let dns_snapshot = self.dns_snapshot.ok_or(EnvelopeError::MissingDnsSnapshot)?;
// Build with a zero hash first, serialize, then compute the real hash.
let mut envelope = ExecutionEnvelope {
seed,
timestamp,
target_url,
request_headers: self.request_headers,
dns_snapshot,
tls_fingerprint: self.tls_fingerprint,
browser_config: self.browser_config,
content_hash: ContentHash::of(b""),
};
envelope.content_hash = compute_envelope_hash(&envelope);
Ok(envelope)
}
}
/// Compute the canonical content hash of an envelope.
///
/// The hash is computed over the JSON serialization with `content_hash`
/// set to the zero-input hash. This makes the hash deterministic and
/// self-referentially consistent.
fn compute_envelope_hash(envelope: &ExecutionEnvelope) -> ContentHash {
// Create a temporary copy with the placeholder hash for serialization.
let mut hashable = envelope.clone();
hashable.content_hash = ContentHash::of(b"");
// Canonical JSON: compact, deterministic key ordering via serde.
// serde_json serializes struct fields in declaration order, which is stable.
let canonical = serde_json::to_vec(&hashable).unwrap_or_default(); // empty vec if serialization fails — produces a known hash
ContentHash::of(&canonical)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment