Created
April 12, 2026 02:36
-
-
Save copyleftdev/a7088215d904111c2b9d227a44a2561f to your computer and use it in GitHub Desktop.
Palimpsest: ExecutionEnvelope (Sealed Context)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| //! palimpsest-envelope: The execution envelope. | |
| //! | |
| //! Every fetch runs inside a sealed context capturing: | |
| //! seed, timestamp, headers, DNS resolution snapshot, TLS fingerprint, | |
| //! and browser config (if applicable). | |
| //! | |
| //! The envelope is immutable after construction. It is the critical | |
| //! abstraction that makes both determinism (Law 1) and replay fidelity | |
| //! (Law 5) possible. | |
| pub mod browser; | |
| pub mod dns; | |
| pub mod tls; | |
| use palimpsest_core::hash::ContentHash; | |
| use palimpsest_core::time::CaptureInstant; | |
| use palimpsest_core::types::CrawlSeed; | |
| use serde::{Deserialize, Serialize}; | |
| use url::Url; | |
| use crate::browser::BrowserConfig; | |
| use crate::dns::DnsSnapshot; | |
| use crate::tls::TlsFingerprint; | |
| /// A sealed execution context for a single fetch operation. | |
| /// | |
| /// Once constructed via [`EnvelopeBuilder`], an `ExecutionEnvelope` is immutable. | |
| /// All fields are captured *before* the fetch begins and recorded alongside the | |
| /// resulting artifacts. This is what makes deterministic replay possible: | |
| /// given the same envelope, the fetch engine must produce identical artifacts. | |
| /// | |
| /// Serialized as the first record in a WARC++ capture group. | |
| #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] | |
| pub struct ExecutionEnvelope { | |
| /// The crawl seed that governs all deterministic decisions. | |
| seed: CrawlSeed, | |
| /// Wall-clock + logical timestamp at envelope creation time. | |
| timestamp: CaptureInstant, | |
| /// The target URL for this fetch. | |
| target_url: Url, | |
| /// Ordered HTTP request headers to be sent. | |
| /// Uses `Vec` (not `HashMap`) to preserve insertion order — Law 1. | |
| request_headers: Vec<(String, String)>, | |
| /// DNS resolution snapshot captured before connecting. | |
| dns_snapshot: DnsSnapshot, | |
| /// TLS connection fingerprint. `None` for plain HTTP. | |
| tls_fingerprint: Option<TlsFingerprint>, | |
| /// Browser configuration. `None` for raw HTTP fetches. | |
| browser_config: Option<BrowserConfig>, | |
| /// BLAKE3 hash of the canonical JSON serialization of this envelope | |
| /// (computed with this field set to a zero hash, then replaced). | |
| content_hash: ContentHash, | |
| } | |
| impl ExecutionEnvelope { | |
| pub fn seed(&self) -> CrawlSeed { | |
| self.seed | |
| } | |
| pub fn timestamp(&self) -> CaptureInstant { | |
| self.timestamp | |
| } | |
| pub fn target_url(&self) -> &Url { | |
| &self.target_url | |
| } | |
| pub fn request_headers(&self) -> &[(String, String)] { | |
| &self.request_headers | |
| } | |
| pub fn dns_snapshot(&self) -> &DnsSnapshot { | |
| &self.dns_snapshot | |
| } | |
| pub fn tls_fingerprint(&self) -> Option<&TlsFingerprint> { | |
| self.tls_fingerprint.as_ref() | |
| } | |
| pub fn browser_config(&self) -> Option<&BrowserConfig> { | |
| self.browser_config.as_ref() | |
| } | |
| pub fn content_hash(&self) -> ContentHash { | |
| self.content_hash | |
| } | |
| } | |
| /// Errors that can occur when building an envelope. | |
| #[derive(Debug, Clone, PartialEq, Eq)] | |
| pub enum EnvelopeError { | |
| MissingSeed, | |
| MissingTimestamp, | |
| MissingTargetUrl, | |
| MissingDnsSnapshot, | |
| } | |
| impl std::fmt::Display for EnvelopeError { | |
| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
| match self { | |
| Self::MissingSeed => write!(f, "envelope requires a CrawlSeed"), | |
| Self::MissingTimestamp => write!(f, "envelope requires a CaptureInstant"), | |
| Self::MissingTargetUrl => write!(f, "envelope requires a target URL"), | |
| Self::MissingDnsSnapshot => write!(f, "envelope requires a DNS snapshot"), | |
| } | |
| } | |
| } | |
| impl std::error::Error for EnvelopeError {} | |
| /// Builder for constructing an [`ExecutionEnvelope`]. | |
| /// | |
| /// Required fields: `seed`, `timestamp`, `target_url`, `dns_snapshot`. | |
| /// Optional fields: `tls_fingerprint`, `browser_config`. | |
| /// Headers default to empty if not set. | |
| /// | |
| /// The content hash is computed automatically at build time from the | |
| /// canonical JSON serialization. | |
| #[derive(Debug, Default)] | |
| pub struct EnvelopeBuilder { | |
| seed: Option<CrawlSeed>, | |
| timestamp: Option<CaptureInstant>, | |
| target_url: Option<Url>, | |
| request_headers: Vec<(String, String)>, | |
| dns_snapshot: Option<DnsSnapshot>, | |
| tls_fingerprint: Option<TlsFingerprint>, | |
| browser_config: Option<BrowserConfig>, | |
| } | |
| impl EnvelopeBuilder { | |
| pub fn new() -> Self { | |
| Self::default() | |
| } | |
| pub fn seed(mut self, seed: CrawlSeed) -> Self { | |
| self.seed = Some(seed); | |
| self | |
| } | |
| pub fn timestamp(mut self, timestamp: CaptureInstant) -> Self { | |
| self.timestamp = Some(timestamp); | |
| self | |
| } | |
| pub fn target_url(mut self, url: Url) -> Self { | |
| self.target_url = Some(url); | |
| self | |
| } | |
| pub fn header(mut self, name: String, value: String) -> Self { | |
| self.request_headers.push((name, value)); | |
| self | |
| } | |
| pub fn headers(mut self, headers: Vec<(String, String)>) -> Self { | |
| self.request_headers = headers; | |
| self | |
| } | |
| pub fn dns_snapshot(mut self, dns: DnsSnapshot) -> Self { | |
| self.dns_snapshot = Some(dns); | |
| self | |
| } | |
| pub fn tls_fingerprint(mut self, tls: TlsFingerprint) -> Self { | |
| self.tls_fingerprint = Some(tls); | |
| self | |
| } | |
| pub fn browser_config(mut self, config: BrowserConfig) -> Self { | |
| self.browser_config = Some(config); | |
| self | |
| } | |
| /// Consume the builder and produce a sealed [`ExecutionEnvelope`]. | |
| /// | |
| /// Computes the content hash from the canonical JSON serialization. | |
| /// Returns an error if any required field is missing. | |
| pub fn build(self) -> Result<ExecutionEnvelope, EnvelopeError> { | |
| let seed = self.seed.ok_or(EnvelopeError::MissingSeed)?; | |
| let timestamp = self.timestamp.ok_or(EnvelopeError::MissingTimestamp)?; | |
| let target_url = self.target_url.ok_or(EnvelopeError::MissingTargetUrl)?; | |
| let dns_snapshot = self.dns_snapshot.ok_or(EnvelopeError::MissingDnsSnapshot)?; | |
| // Build with a zero hash first, serialize, then compute the real hash. | |
| let mut envelope = ExecutionEnvelope { | |
| seed, | |
| timestamp, | |
| target_url, | |
| request_headers: self.request_headers, | |
| dns_snapshot, | |
| tls_fingerprint: self.tls_fingerprint, | |
| browser_config: self.browser_config, | |
| content_hash: ContentHash::of(b""), | |
| }; | |
| envelope.content_hash = compute_envelope_hash(&envelope); | |
| Ok(envelope) | |
| } | |
| } | |
| /// Compute the canonical content hash of an envelope. | |
| /// | |
| /// The hash is computed over the JSON serialization with `content_hash` | |
| /// set to the zero-input hash. This makes the hash deterministic and | |
| /// self-referentially consistent. | |
| fn compute_envelope_hash(envelope: &ExecutionEnvelope) -> ContentHash { | |
| // Create a temporary copy with the placeholder hash for serialization. | |
| let mut hashable = envelope.clone(); | |
| hashable.content_hash = ContentHash::of(b""); | |
| // Canonical JSON: compact, deterministic key ordering via serde. | |
| // serde_json serializes struct fields in declaration order, which is stable. | |
| let canonical = serde_json::to_vec(&hashable).unwrap_or_default(); // empty vec if serialization fails — produces a known hash | |
| ContentHash::of(&canonical) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment