opsec-ee · March 20, 2026 22:38 · opsec-ee · Mar 20, 2026
diff --git a/lzw-ee-strings.c b/lzw-ee-strings.c
 /*
 * lzw.c
 *  Euman = ∫(optimization/time) △ performance
 *
 * @file     lzw.c                                      @version  2.1.0
 * @author   H. Overman (ee)
 * @created  2026-03-20
 * @modified 2026-03-20
 * @by       H. Overman (ee)
 * @brief    LZW compression / decompression. C23
 *           Direct-index dict: dict[code].next[byte] = next_code.
 *           O(1) lookup. Zero collision possible.
 *           Sentinel EE_LZW_EMPTY = 0xFFFF (exceeds all valid codes).
 *           Self-tests at startup. Gate-X at structural cause.
 *
 * Build (release): gcc -O3 -march=native -flto -funroll-loops -DNDEBUG
 *                      -Wall -Wextra -Wpedantic lzw.c -o lzw
 * Build (ASan):    gcc -fsanitize=address,undefined -O1 -g
 *                      -Wall -Wextra -Wpedantic lzw.c -o lzw
 *
 * Usage:
 *   ./lzw test    -- run self-tests and test suite
 *   ./lzw         -- interactive (==comp / ==decomp)
 *
 *
 * =======================================================================
 * IFP INVARIANTS (causal order)
 * =======================================================================
 *
 * I1: dict(code, byte)     -> next_code
 *       Direct array: ee_lzw_entry_t.next[byte]. O(1). Zero collision.
 *       Sentinel: EE_LZW_EMPTY = 0xFFFF. Never a valid code since
 *       EE_LZW_DICT_MAX = 4096 < 0xFFFF.
 *       Spot-check: 'A'=0x41, next['B'=0x42] = 256 after first "AB"
 *       added. dict[0x41].next[0x42] == 256. Verified in self-test.
 *
 * I2: compress(input, len) -> {codes[], code_count}
 *       Walk input. I1 lookup each byte.
 *       Hit: extend current code. Miss: emit, add entry, reset.
 *       Emit final code after loop.
 *
 * I3: decompress(codes[], count) -> {output[], out_len}
 *       Rebuild dict incrementally. Invert I2 exactly.
 *       Special case: code == dict_size (string ends with own first char).
 *
 * I4: round-trip invariant
 *       decompress(compress(s)) == s for all valid inputs.
 *       Verified by self-test checks 7-8.
 *
 * =======================================================================
 * DICT STRUCTURE
 * =======================================================================
 *
 * Direct-index: next[byte] = next_code or EE_LZW_EMPTY.
 * Key = byte. Value = next_code. No hash. No probe. No collision.
 * sizeof(ee_lzw_entry_t) = 256 * 2 = 512 bytes per entry.
 * Total dict = 4096 * 512 = 2 097 152 bytes = 2 MB.
 *
 * Derivation (python3):
 *   entries = 4096; entry_sz = 256 * 2; total = entries * entry_sz
 *   print(total)  # -> 2097152
 * Spot-check: 4096 * 512 = 2 097 152. Exact.
 *
 * =======================================================================
 * SENTINEL DERIVATION
 * =======================================================================
 *
 * EE_LZW_EMPTY = 0xFFFF = 65535.
 * EE_LZW_DICT_MAX = 4096. Valid codes: [0, 4095].
 * 65535 > 4095. Never a valid code. Unambiguous empty marker.
 * 0xFF fill (memset) sets every uint16_t slot to 0xFFFF. One call.
 * Using 0 as sentinel (original) was wrong: code 0 = NUL byte,
 * a valid single-character literal. 0xFFFF is structurally safe.
 *
 * =======================================================================
 * ACKNOWLEDGMENTS
 * =======================================================================
 *
 * H. Overman (ee):
 *   ppo-2 C23 systems programming standard. 4-state gate Z/X/0/1.
 *   FRONT/LEAD/REAR. IFP. ExCLisp. Direct-index dict.
 *   EE_LZW_EMPTY sentinel design. Linux kernel style.
 *
 * Terry Welch (IEEE Computer, June 1984):
 *   "A Technique for High-Performance Data Compression."
 *   LZW algorithm. Dictionary-based compression.
 *
 * Abraham Lempel / Jacob Ziv (1977, 1978):
 *   LZ77 / LZ78 -- precursors to LZW.
 *
 * ==================================================================
 */

 #define _POSIX_C_SOURCE 200809L

 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdint.h>
 #include <stdbool.h>
 #include <inttypes.h>
 #include <time.h>

 /*
 * ==================================================================
 * LOGOS / CONSTANTS
 * ==================================================================
 */

 /*
 * EE_LZW_DICT_MAX = 4096 = 2^12.
 * Standard LZW 12-bit code space. Codes [0,255]: literals.
 * Codes [256, 4095]: runtime extensions. 3840 extension slots.
 */
 #define EE_LZW_DICT_MAX		4096u
 #define EE_LZW_ALPHA_SZ		256u	/* byte alphabet 2^8               */
 #define EE_LZW_CODES_MAX	10000u	/* max codes per compress pass     */
 #define EE_LZW_OUTPUT_MAX	20000u	/* max bytes per decompress pass   */
 #define EE_LZW_STR_MAX		256u	/* max length of one dict string   */

 /*
 * EE_LZW_EMPTY = 0xFFFF = 65535.
 * Derivation: EE_LZW_DICT_MAX = 4096. Valid codes [0, 4095].
 * 0xFFFF = 65535 > 4095. Never a valid code. Safe sentinel.
 * memset(dict, 0xFF, ...) initialises all slots to 0xFFFF in one call.
 */
 #define EE_LZW_EMPTY		0xFFFFu

 static_assert(EE_LZW_DICT_MAX  == 4096u,
              "dict max must be 4096");
 static_assert(EE_LZW_ALPHA_SZ  == 256u,
              "alpha must be 256");
 static_assert(EE_LZW_EMPTY     > EE_LZW_DICT_MAX - 1u,
              "sentinel must exceed all valid codes");
 static_assert(EE_LZW_DICT_MAX * EE_LZW_ALPHA_SZ * sizeof(uint16_t) == 2097152u,
              "dict total size must be 2097152 bytes");

 /*
 * ==================================================================
 * LOGOS / 4-STATE GATE CONSTANTS
 * ==================================================================
 */

 #define EE_GATE_Z	0x00u	/* not applicable: empty input              */
 #define EE_GATE_X	0x01u	/* error: overflow, invalid code, NULL ptr  */
 #define EE_GATE_0	0x02u	/* deny: invariant violation                */
 #define EE_GATE_1	0x03u	/* allow: success                           */

 static_assert(EE_GATE_Z != EE_GATE_X &&
 EE_GATE_X != EE_GATE_0 &&
 EE_GATE_0 != EE_GATE_1,
 "gate constants must be distinct");

 /*
 * ==================================================================
 * LOGOS / REASON CODES
 * ==================================================================
 */

 #define EE_LZW_OK		0x0000u
 #define EE_LZW_R_EMPTY		0x0001u	/* gate-Z: empty input              */
 #define EE_LZW_R_NULL		0x0002u	/* gate-X: required ptr NULL        */
 #define EE_LZW_R_CODES_OVF	0x0003u	/* gate-X: code output overflow     */
 #define EE_LZW_R_OUTPUT_OVF	0x0004u	/* gate-X: byte output overflow     */
 #define EE_LZW_R_BAD_CODE	0x0005u	/* gate-X: code > dict_size         */
 #define EE_LZW_R_STR_OVF	0x0006u	/* gate-X: string length overflow   */
 #define EE_LZW_R_DICT_FULL	0x0007u	/* gate-0: dict at capacity         */
 #define EE_LZW_R_SELF_FAIL	0x000Fu	/* gate-0: self-test failure        */

 /*
 * ==================================================================
 * LOGOS / GATE RESULT
 * ==================================================================
 */

 /*
 * ee_lzw_gate_t
 *
 * Universal return type. Every public function returns this.
 * Gate-X fires at the structural cause -- not at the symptom.
 */
 typedef struct ee_lzw_gate {
    uint8_t		state;
    uint8_t		pad;
    uint16_t	reason;
 } ee_lzw_gate_t;

 static_assert(sizeof(ee_lzw_gate_t) == 4u, "gate must be 4 bytes");

 /*
 * ==================================================================
 * LOGOS / DICT ENTRY (I1)
 *
 * Direct-index: next[byte] = next_code or EE_LZW_EMPTY.
 * O(1). Zero collision possible. Byte IS the index.
 * ==================================================================
 */

 typedef struct ee_lzw_entry {
    uint16_t	next[EE_LZW_ALPHA_SZ];	/* 256 x 2 = 512 bytes */
 } ee_lzw_entry_t;

 static_assert(sizeof(ee_lzw_entry_t) == 512u,
              "dict entry must be 512 bytes");

 /*
 * ==================================================================
 * LOGOS / COMPRESS STATE (I2)
 * ==================================================================
 */

 typedef struct ee_lzw_cs {
    ee_lzw_entry_t	dict[EE_LZW_DICT_MAX];	/* I1 direct-index dict  */
    uint16_t	codes[EE_LZW_CODES_MAX];/* output code sequence  */
    uint32_t	dict_size;		/* current dict entries  */
    uint32_t	code_count;		/* codes emitted         */
 } ee_lzw_cs_t;

 /*
 * ==================================================================
 * LOGOS / DECOMPRESS STATE (I3)
 * ==================================================================
 */

 typedef struct ee_lzw_str {
    uint8_t		data[EE_LZW_STR_MAX];
    uint16_t	len;
 } ee_lzw_str_t;

 static_assert(sizeof(ee_lzw_str_t) == EE_LZW_STR_MAX + 2u,
              "lzw_str_t must be EE_LZW_STR_MAX + 2 bytes");

 typedef struct ee_lzw_ds {
    ee_lzw_str_t	str[EE_LZW_DICT_MAX];
    uint8_t		output[EE_LZW_OUTPUT_MAX];
    uint32_t	dict_size;
    uint32_t	out_len;
 } ee_lzw_ds_t;

 /*
 * ==================================================================
 * LOGOS COMPLETE. LOGIC begins below.
 * ==================================================================
 */

 static inline ee_lzw_gate_t
 lzw__ok(void)
 {
    return (ee_lzw_gate_t){ EE_GATE_1, 0u, EE_LZW_OK };
 }

 static inline ee_lzw_gate_t
 lzw__gate(uint8_t state, uint16_t reason)
 {
    return (ee_lzw_gate_t){ state, 0u, reason };
 }

 /*
 * ==================================================================
 * LOGIC / COMPRESS INIT
 * ==================================================================
 */

 /*
 * lzw_compress_init
 *
 * FRONT: uninitialised cs (AS)
 * LEAD:  memset(0xFF) sets all next[] slots to EE_LZW_EMPTY;
 *        dict_size = 256; code_count = 0 (Pivot)
 * REAR:  cs ready for lzw_compress() (IS)
 *   X: cs == NULL
 *   1: ready
 *
 * ExCLisp: {{0 [ cs (AS/.\IS) cs_ready ] 1}}
 */
 static ee_lzw_gate_t
 lzw_compress_init(ee_lzw_cs_t *cs)
 {
    if (!cs)
        return lzw__gate(EE_GATE_X, EE_LZW_R_NULL);

    memset(cs->dict, 0xFF, sizeof(cs->dict));
    cs->dict_size  = EE_LZW_ALPHA_SZ;
    cs->code_count = 0u;

    return lzw__ok();
 }

 /*
 * ==================================================================
 * LOGIC / COMPRESS
 * ==================================================================
 */

 /*
 * lzw_compress
 *
 * FRONT: cs initialised; input non-NULL; len > 0 (AS)
 * LEAD:  IFP pipeline over input bytes (Pivot):
 *   DISSOLVE  seed current_code = input[0]
 *   DISCOVER  I1 lookup dict[cur].next[byte] per byte
 *   REMAP     hit -> extend cur; miss -> emit + add entry + reset
 *   REMOVE    dict-full: emit without adding (gate-0, continue)
 *   REINDEX   emit final code after loop
 * REAR:  cs->codes[0..code_count-1] = compressed output (IS)
 *   Z: len == 0
 *   X: cs or input NULL; code output overflow
 *   0: dict reached EE_LZW_DICT_MAX (entries stopped, codes continue)
 *   1: compression complete
 *
 * ExCLisp: {{0 [ (cs,input,len) (AS/.\IS) cs.codes ] 1}}
 */
 [[nodiscard]]
 static ee_lzw_gate_t
 lzw_compress(ee_lzw_cs_t *cs, const uint8_t *input, size_t len)
 {
    ee_lzw_gate_t	result;
    uint16_t	cur;
    size_t		i;

    if (!cs || !input)
        return lzw__gate(EE_GATE_X, EE_LZW_R_NULL);
    if (!len)
        return lzw__gate(EE_GATE_Z, EE_LZW_R_EMPTY);

    result = lzw__ok();
    cur    = input[0];

    for (i = 1u; i < len; i++) {
        uint8_t  byte = input[i];
        uint16_t next = cs->dict[cur].next[byte]; /* I1: O(1) */

        if (__builtin_expect(next != EE_LZW_EMPTY, 1)) {
            cur = next;
            continue;
        }

        if (__builtin_expect(cs->code_count >= EE_LZW_CODES_MAX, 0))
            return lzw__gate(EE_GATE_X, EE_LZW_R_CODES_OVF);

        cs->codes[cs->code_count++] = cur;

        if (__builtin_expect(cs->dict_size < EE_LZW_DICT_MAX, 1)) {
            cs->dict[cur].next[byte] = (uint16_t)cs->dict_size;
            cs->dict_size++;
        } else {
            result = lzw__gate(EE_GATE_0, EE_LZW_R_DICT_FULL);
        }

        cur = byte;
    }

    if (__builtin_expect(cs->code_count >= EE_LZW_CODES_MAX, 0))
        return lzw__gate(EE_GATE_X, EE_LZW_R_CODES_OVF);

    cs->codes[cs->code_count++] = cur;

    return result;
 }

 /*
 * ==================================================================
 * LOGIC / DECOMPRESS INIT
 * ==================================================================
 */

 /*
 * lzw_decompress_init
 *
 * FRONT: uninitialised ds (AS)
 * LEAD:  populate str[0..255] with single-byte literals;
 *        dict_size = 256; out_len = 0 (Pivot)
 * REAR:  ds ready for lzw_decompress() (IS)
 *   X: ds == NULL
 *   1: ready
 *
 * ExCLisp: {{0 [ ds (AS/.\IS) ds_ready ] 1}}
 */
 static ee_lzw_gate_t
 lzw_decompress_init(ee_lzw_ds_t *ds)
 {
    uint32_t i;

    if (!ds)
        return lzw__gate(EE_GATE_X, EE_LZW_R_NULL);

    for (i = 0u; i < EE_LZW_ALPHA_SZ; i++) {
        ds->str[i].data[0] = (uint8_t)i;
        ds->str[i].len     = 1u;
    }

    ds->dict_size = EE_LZW_ALPHA_SZ;
    ds->out_len   = 0u;

    return lzw__ok();
 }

 /*
 * ==================================================================
 * LOGIC / DECOMPRESS
 * ==================================================================
 */

 /*
 * lzw_decompress
 *
 * FRONT: ds initialised; codes non-NULL; count > 0 (AS)
 * LEAD:  IFP pipeline -- invert I2 (Pivot):
 *   DISSOLVE  output first code's string
 *   DISCOVER  per code: lookup or special-case if code == dict_size
 *   REMAP     known -> copy string; unknown == dict_size -> construct
 *   REMOVE    code > dict_size -> gate-X at structural cause
 *   REINDEX   add new entry = old_string + first_char(cur_string)
 * REAR:  ds->output[0..out_len-1] = decompressed bytes (IS)
 *   Z: count == 0
 *   X: ds or codes NULL; output overflow; string overflow; bad code
 *   0: dict full (entries stopped, decompression continues)
 *   1: success
 *
 * ExCLisp: {{0 [ (ds,codes,count) (AS/.\IS) ds.output ] 1}}
 */
 [[nodiscard]]
 static ee_lzw_gate_t
 lzw_decompress(ee_lzw_ds_t *ds, const uint16_t *codes, size_t count)
 {
    ee_lzw_gate_t	result;
    uint16_t	old;
    size_t		i;

    if (!ds || !codes)
        return lzw__gate(EE_GATE_X, EE_LZW_R_NULL);
    if (!count)
        return lzw__gate(EE_GATE_Z, EE_LZW_R_EMPTY);

    result = lzw__ok();
    old    = codes[0];

    if (__builtin_expect(old >= ds->dict_size, 0))
        return lzw__gate(EE_GATE_X, EE_LZW_R_BAD_CODE);

    {
        uint16_t flen = ds->str[old].len;

        if (ds->out_len + flen > EE_LZW_OUTPUT_MAX)
            return lzw__gate(EE_GATE_X, EE_LZW_R_OUTPUT_OVF);

        memcpy(ds->output + ds->out_len, ds->str[old].data, flen);
        ds->out_len += flen;
    }

    for (i = 1u; i < count; i++) {
        uint16_t	 code    = codes[i];
        const uint8_t	*cur_str;
        uint16_t	 cur_len;

        /* gate-X at structural cause: code structurally impossible */
        if (__builtin_expect(code > ds->dict_size, 0))
            return lzw__gate(EE_GATE_X, EE_LZW_R_BAD_CODE);

        if (code < ds->dict_size) {
            cur_str = ds->str[code].data;
            cur_len = ds->str[code].len;
        } else {
            /* special case: code == dict_size
             * string = old_string + old_string[0] */
            ee_lzw_str_t *ns  = &ds->str[ds->dict_size];
            uint16_t      olen = ds->str[old].len;

            if (__builtin_expect((uint32_t)olen + 1u > EE_LZW_STR_MAX, 0))
                return lzw__gate(EE_GATE_X, EE_LZW_R_STR_OVF);

            memcpy(ns->data, ds->str[old].data, olen);
            ns->data[olen] = ds->str[old].data[0];
            ns->len        = (uint16_t)(olen + 1u);
            cur_str        = ns->data;
            cur_len        = ns->len;
        }

        if (__builtin_expect(ds->out_len + cur_len > EE_LZW_OUTPUT_MAX, 0))
            return lzw__gate(EE_GATE_X, EE_LZW_R_OUTPUT_OVF);

        memcpy(ds->output + ds->out_len, cur_str, cur_len);
        ds->out_len += cur_len;

        /* add new entry: old_string + first_char(cur_string) */
        if (__builtin_expect(ds->dict_size < EE_LZW_DICT_MAX, 1)) {
            ee_lzw_str_t *ne  = &ds->str[ds->dict_size];
            uint16_t      olen = ds->str[old].len;

            if ((uint32_t)olen + 1u <= EE_LZW_STR_MAX) {
                memcpy(ne->data, ds->str[old].data, olen);
                ne->data[olen] = cur_str[0];
                ne->len        = (uint16_t)(olen + 1u);
                ds->dict_size++;
            }
        } else {
            result = lzw__gate(EE_GATE_0, EE_LZW_R_DICT_FULL);
        }

        old = code;
    }

    return result;
 }

 /*
 * ==================================================================
 * LOGIC / TIMING (integer nanoseconds -- no IEEE 754)
 * ==================================================================
 */

 static uint64_t
 lzw_now_ns(void)
 {
    struct timespec ts;

    if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0)
        return 0u;

    return (uint64_t)ts.tv_sec * UINT64_C(1000000000)
    + (uint64_t)ts.tv_nsec;
 }

 /*
 * ==================================================================
 * LOGIC / SELF-TEST (ppo-2: SELF-TESTS AT STARTUP)
 * ==================================================================
 */

 /* file-scope to avoid 2MB+ stack allocation */
 static ee_lzw_cs_t g_self_cs;
 static ee_lzw_ds_t g_self_ds;

 /*
 * lzw_self_test
 *
 * FRONT: no external state (AS)
 * LEAD:  nine checks against hand-derivable values (Pivot)
 * REAR:  gate-1 all pass; gate-0 + stderr on failure (IS)
 *   0: any check failed
 *   1: all nine passed
 *
 * Checks:
 *   1. sizeof(ee_lzw_gate_t)  == 4
 *   2. sizeof(ee_lzw_entry_t) == 512
 *   3. EE_LZW_EMPTY == 0xFFFF
 *   4. EE_LZW_EMPTY > EE_LZW_DICT_MAX - 1  (sentinel valid)
 *   5. compress("A") -> codes = {65}
 *   6. compress("AB") -> codes = {65, 66}
 *   7. after compress("AB"): dict['A']['B'] == 256  (I1 spot-check)
 *   8. round-trip "TOBEORNOT" correct length
 *   9. round-trip "TOBEORNOT" correct content
 *
 * ExCLisp: {{0 [ void (AS/.\IS) gate_1_or_0 ] 1}}
 */
 [[nodiscard]]
 static ee_lzw_gate_t
 lzw_self_test(void)
 {
    #define LZW_ST(expr, msg)					\
    do {							\
        if (!(expr)) {					\
            fprintf(stderr,				\
            "[lzw_self_test] FAIL: %s\n",	\
            (msg));				\
            return lzw__gate(EE_GATE_0,		\
            EE_LZW_R_SELF_FAIL);		\
        }						\
    } while (0)

    /* 1-4: structural */
    LZW_ST(sizeof(ee_lzw_gate_t)  == 4u,   "sizeof gate == 4");
    LZW_ST(sizeof(ee_lzw_entry_t) == 512u, "sizeof entry == 512");
    LZW_ST(EE_LZW_EMPTY == 0xFFFFu,        "sentinel == 0xFFFF");
    LZW_ST(EE_LZW_EMPTY > EE_LZW_DICT_MAX - 1u,
           "sentinel > max valid code");

    /* 5: compress("A") -> {65} */
    lzw_compress_init(&g_self_cs);
    {
        ee_lzw_gate_t g_ = lzw_compress(&g_self_cs,
                                        (const uint8_t *)"A", 1u);
        LZW_ST(g_.state == EE_GATE_1, "compress A gate");
    }
    LZW_ST(g_self_cs.code_count == 1u, "compress A: one code");
    LZW_ST(g_self_cs.codes[0]   == 65u, "compress A: code == 65");

    /* 6: compress("AB") -> {65, 66} */
    lzw_compress_init(&g_self_cs);
    {
        ee_lzw_gate_t g_ = lzw_compress(&g_self_cs,
                                        (const uint8_t *)"AB", 2u);
        LZW_ST(g_.state == EE_GATE_1, "compress AB gate");
    }
    LZW_ST(g_self_cs.code_count == 2u,  "compress AB: two codes");
    LZW_ST(g_self_cs.codes[0]   == 65u, "compress AB: code[0] == 65");
    LZW_ST(g_self_cs.codes[1]   == 66u, "compress AB: code[1] == 66");

    /* 7: I1 spot-check: dict['A']['B'] == 256 after "AB" */
    LZW_ST(g_self_cs.dict[65].next[66] == 256u,
           "dict['A']['B'] == 256 after AB");

    /* 8-9: round-trip "TOBEORNOT" */
    lzw_compress_init(&g_self_cs);
    {
        ee_lzw_gate_t g_ = lzw_compress(&g_self_cs,
                                        (const uint8_t *)"TOBEORNOT", 9u);
        LZW_ST(g_.state == EE_GATE_1, "compress TOBEORNOT gate");
    }
    lzw_decompress_init(&g_self_ds);
    {
        ee_lzw_gate_t g_ = lzw_decompress(&g_self_ds,
                                          g_self_cs.codes, g_self_cs.code_count);
        LZW_ST(g_.state == EE_GATE_1, "decompress TOBEORNOT gate");
    }
    g_self_ds.output[g_self_ds.out_len] = '\0';
    LZW_ST(g_self_ds.out_len == 9u, "round-trip TOBEORNOT: len");
    LZW_ST(memcmp(g_self_ds.output,
                  (const uint8_t *)"TOBEORNOT", 9u) == 0,
           "round-trip TOBEORNOT: content");

    #undef LZW_ST
    return lzw__ok();
 }

 /*
 * ==================================================================
 * LOGIC / TEST SUITE
 * ==================================================================
 */

 typedef struct lzw_test {
    const char *name;
    const char *input;
 } lzw_test_t;

 static const lzw_test_t lzw_tests[] = {
    { "TOBEORNOTTOBEORTOBEORNOT",
        "TOBEORNOTTOBEORTOBEORNOT"                      },
        { "ABABABA",
            "ABABABA"                                        },
            { "single char A",
                "A"                                              },
                { "two chars AB",
                    "AB"                                             },
                    { "ABCABCABC",
                        "ABCABCABC"                                      },
                        { "fox sentence",
                            "THE QUICK BROWN FOX JUMPS OVER THE LAZY DOG."   },
                            { "digits 12312312312345",
                                "12312312312345"                                 },
                                { "Hello World round-trip",
                                    "Hello, World! Hello, Programming!"              },
 };

 static const size_t lzw_test_count =
 sizeof(lzw_tests) / sizeof(lzw_tests[0]);

 /* file-scope to avoid 2MB+ stack allocation in test runner */
 static ee_lzw_cs_t g_test_cs;
 static ee_lzw_ds_t g_test_ds;

 /*
 * lzw_run_test
 *
 * FRONT: test with name and input string (AS)
 * LEAD:  compress then decompress; compare to input (Pivot)
 * REAR:  printed result line; true if round-trip correct (IS)
 *   1: round-trip correct
 *   0: mismatch or gate-X (false returned)
 *
 * ExCLisp: {{0 [ test (AS/.\IS) bool ] 1}}
 */
 static bool
 lzw_run_test(const lzw_test_t *test)
 {
    size_t ilen = strlen(test->input);

    lzw_compress_init(&g_test_cs);

    ee_lzw_gate_t cg = lzw_compress(&g_test_cs,
                                    (const uint8_t *)test->input, ilen);

    if (cg.state == EE_GATE_X) {
        printf("  FAIL [%s]: compress gate-X 0x%04x\n",
               test->name, cg.reason);
        return false;
    }

    lzw_decompress_init(&g_test_ds);

    ee_lzw_gate_t dg = lzw_decompress(&g_test_ds,
                                      g_test_cs.codes, g_test_cs.code_count);

    if (dg.state == EE_GATE_X) {
        printf("  FAIL [%s]: decompress gate-X 0x%04x\n",
               test->name, dg.reason);
        return false;
    }

    g_test_ds.output[g_test_ds.out_len] = '\0';

    if (g_test_ds.out_len != ilen ||
        memcmp(g_test_ds.output, test->input, ilen) != 0) {
        printf("  FAIL [%s]: round-trip mismatch\n", test->name);
    return false;
        }

        printf("  PASS [%s]: %zu -> %u codes -> %u bytes\n",
               test->name,
               ilen,
               g_test_cs.code_count,
               g_test_ds.out_len);

        return true;
 }

 /*
 * ==================================================================
 * LOGIC / INTERACTIVE MODE
 * ==================================================================
 */

 /* file-scope to avoid 2MB+ stack allocation in interactive */
 static ee_lzw_cs_t g_int_cs;
 static ee_lzw_ds_t g_int_ds;

 /*
 * lzw_interactive
 *
 * FRONT: stdin open (AS)
 * LEAD:  read lines; ==comp / ==decomp switch mode;
 *        compress or decompress input (Pivot)
 * REAR:  interactive session runs until EOF (IS)
 *   1: always (session complete on EOF)
 *
 * ExCLisp: {{0 [ void (AS/.\IS) void ] 1}}
 */
 static void
 lzw_interactive(void)
 {
    char	 line[EE_LZW_CODES_MAX];
    uint8_t  mode = 0u;	/* 0=none 1=compress 2=decompress */

    printf("LZW Interactive Mode\n");
    printf("Commands: ==comp, ==decomp, or type text\n");
    printf("Ctrl+C to exit\n\n");

    while (fgets(line, (int)sizeof(line), stdin)) {
        line[strcspn(line, "\n")] = '\0';

        if (strncmp(line, "==comp", 6) == 0) {
            mode = 1u;
            printf("Compression mode\n");
            continue;
        }

        if (strncmp(line, "==decomp", 8) == 0) {
            mode = 2u;
            printf("Decompression mode\n");
            continue;
        }

        if (mode == 1u) {
            lzw_compress_init(&g_int_cs);

            uint64_t      t0 = lzw_now_ns();
            ee_lzw_gate_t g  = lzw_compress(&g_int_cs,
                                            (const uint8_t *)line, strlen(line));
            uint64_t      ns = lzw_now_ns() - t0;

            if (g.state == EE_GATE_X) {
                printf("gate-X: 0x%04x\n", g.reason);
                continue;
            }

            for (uint32_t i = 0u; i < g_int_cs.code_count; i++) {
                printf("%u", g_int_cs.codes[i]);
                if (i < g_int_cs.code_count - 1u)
                    putchar(',');
            }
            putchar('\n');
            printf("// %zu chars -> %u codes  %" PRIu64 " ns\n",
                   strlen(line), g_int_cs.code_count, ns);

        } else if (mode == 2u) {
            uint16_t codes[EE_LZW_CODES_MAX];
            uint32_t count = 0u;
            char    *tok   = strtok(line, ",");

            while (tok && count < EE_LZW_CODES_MAX) {
                codes[count++] = (uint16_t)atoi(tok);
                tok = strtok(NULL, ",");
            }

            lzw_decompress_init(&g_int_ds);

            uint64_t      t0 = lzw_now_ns();
            ee_lzw_gate_t g  = lzw_decompress(&g_int_ds,
                                              codes, count);
            uint64_t      ns = lzw_now_ns() - t0;

            if (g.state == EE_GATE_X) {
                printf("gate-X: 0x%04x\n", g.reason);
                continue;
            }

            g_int_ds.output[g_int_ds.out_len] = '\0';
            printf("%s\n", g_int_ds.output);
            printf("// %u codes -> %u bytes  %" PRIu64 " ns\n",
                   count, g_int_ds.out_len, ns);
        }
    }
 }

 /*
 * ==================================================================
 * LOGIC / MAIN
 * ==================================================================
 */

 int
 main(int argc, char **argv)
 {
    printf("LZW -- ppo-2 C23 v2.1.0\n\n");

    /* ppo-2: self-tests fire before any external interaction */
    printf("[SELF-TEST] LZW (9 checks)... ");
    fflush(stdout);

    ee_lzw_gate_t st = lzw_self_test();

    if (st.state != EE_GATE_1) {
        fprintf(stderr, "FAIL (0x%04x)\n", st.reason);
        return 1;
    }
    printf("PASS\n\n");

    if (argc > 1 && strcmp(argv[1], "test") == 0) {
        printf("Test suite (%zu cases):\n\n", lzw_test_count);

        uint64_t t0     = lzw_now_ns();
        int      passed = 0;

        for (size_t i = 0u; i < lzw_test_count; i++)
            if (lzw_run_test(&lzw_tests[i]))
                passed++;

        uint64_t elapsed = lzw_now_ns() - t0;

        printf("\nResults: %d/%zu passed"
        "  %" PRIu64 " ns  gate-1 = ALLOW\n",
        passed, lzw_test_count, elapsed);

        return passed == (int)lzw_test_count ? 0 : 1;
    }

    lzw_interactive();
    return 0;
 }
No results found