Last active
March 20, 2026 22:38
-
-
Save opsec-ee/70795de110113bf8aef2ce128dfdd416 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| * lzw.c | |
| * Euman = ∫(optimization/time) △ performance | |
| * | |
| * @file lzw.c @version 2.1.0 | |
| * @author H. Overman (ee) | |
| * @created 2026-03-20 | |
| * @modified 2026-03-20 | |
| * @by H. Overman (ee) | |
| * @brief LZW compression / decompression. C23 | |
| * Direct-index dict: dict[code].next[byte] = next_code. | |
| * O(1) lookup. Zero collision possible. | |
| * Sentinel EE_LZW_EMPTY = 0xFFFF (exceeds all valid codes). | |
| * Self-tests at startup. Gate-X at structural cause. | |
| * | |
| * Build (release): gcc -O3 -march=native -flto -funroll-loops -DNDEBUG | |
| * -Wall -Wextra -Wpedantic lzw.c -o lzw | |
| * Build (ASan): gcc -fsanitize=address,undefined -O1 -g | |
| * -Wall -Wextra -Wpedantic lzw.c -o lzw | |
| * | |
| * Usage: | |
| * ./lzw test -- run self-tests and test suite | |
| * ./lzw -- interactive (==comp / ==decomp) | |
| * | |
| * | |
| * ======================================================================= | |
| * IFP INVARIANTS (causal order) | |
| * ======================================================================= | |
| * | |
| * I1: dict(code, byte) -> next_code | |
| * Direct array: ee_lzw_entry_t.next[byte]. O(1). Zero collision. | |
| * Sentinel: EE_LZW_EMPTY = 0xFFFF. Never a valid code since | |
| * EE_LZW_DICT_MAX = 4096 < 0xFFFF. | |
| * Spot-check: 'A'=0x41, next['B'=0x42] = 256 after first "AB" | |
| * added. dict[0x41].next[0x42] == 256. Verified in self-test. | |
| * | |
| * I2: compress(input, len) -> {codes[], code_count} | |
| * Walk input. I1 lookup each byte. | |
| * Hit: extend current code. Miss: emit, add entry, reset. | |
| * Emit final code after loop. | |
| * | |
| * I3: decompress(codes[], count) -> {output[], out_len} | |
| * Rebuild dict incrementally. Invert I2 exactly. | |
| * Special case: code == dict_size (string ends with own first char). | |
| * | |
| * I4: round-trip invariant | |
| * decompress(compress(s)) == s for all valid inputs. | |
| * Verified by self-test checks 7-8. | |
| * | |
| * ======================================================================= | |
| * DICT STRUCTURE | |
| * ======================================================================= | |
| * | |
| * Direct-index: next[byte] = next_code or EE_LZW_EMPTY. | |
| * Key = byte. Value = next_code. No hash. No probe. No collision. | |
| * sizeof(ee_lzw_entry_t) = 256 * 2 = 512 bytes per entry. | |
| * Total dict = 4096 * 512 = 2 097 152 bytes = 2 MB. | |
| * | |
| * Derivation (python3): | |
| * entries = 4096; entry_sz = 256 * 2; total = entries * entry_sz | |
| * print(total) # -> 2097152 | |
| * Spot-check: 4096 * 512 = 2 097 152. Exact. | |
| * | |
| * ======================================================================= | |
| * SENTINEL DERIVATION | |
| * ======================================================================= | |
| * | |
| * EE_LZW_EMPTY = 0xFFFF = 65535. | |
| * EE_LZW_DICT_MAX = 4096. Valid codes: [0, 4095]. | |
| * 65535 > 4095. Never a valid code. Unambiguous empty marker. | |
| * 0xFF fill (memset) sets every uint16_t slot to 0xFFFF. One call. | |
| * Using 0 as sentinel (original) was wrong: code 0 = NUL byte, | |
| * a valid single-character literal. 0xFFFF is structurally safe. | |
| * | |
| * ======================================================================= | |
| * ACKNOWLEDGMENTS | |
| * ======================================================================= | |
| * | |
| * H. Overman (ee): | |
| * ppo-2 C23 systems programming standard. 4-state gate Z/X/0/1. | |
| * FRONT/LEAD/REAR. IFP. ExCLisp. Direct-index dict. | |
| * EE_LZW_EMPTY sentinel design. Linux kernel style. | |
| * | |
| * Terry Welch (IEEE Computer, June 1984): | |
| * "A Technique for High-Performance Data Compression." | |
| * LZW algorithm. Dictionary-based compression. | |
| * | |
| * Abraham Lempel / Jacob Ziv (1977, 1978): | |
| * LZ77 / LZ78 -- precursors to LZW. | |
| * | |
| * ================================================================== | |
| */ | |
| #define _POSIX_C_SOURCE 200809L | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <string.h> | |
| #include <stdint.h> | |
| #include <stdbool.h> | |
| #include <inttypes.h> | |
| #include <time.h> | |
| /* | |
| * ================================================================== | |
| * LOGOS / CONSTANTS | |
| * ================================================================== | |
| */ | |
| /* | |
| * EE_LZW_DICT_MAX = 4096 = 2^12. | |
| * Standard LZW 12-bit code space. Codes [0,255]: literals. | |
| * Codes [256, 4095]: runtime extensions. 3840 extension slots. | |
| */ | |
| #define EE_LZW_DICT_MAX 4096u | |
| #define EE_LZW_ALPHA_SZ 256u /* byte alphabet 2^8 */ | |
| #define EE_LZW_CODES_MAX 10000u /* max codes per compress pass */ | |
| #define EE_LZW_OUTPUT_MAX 20000u /* max bytes per decompress pass */ | |
| #define EE_LZW_STR_MAX 256u /* max length of one dict string */ | |
| /* | |
| * EE_LZW_EMPTY = 0xFFFF = 65535. | |
| * Derivation: EE_LZW_DICT_MAX = 4096. Valid codes [0, 4095]. | |
| * 0xFFFF = 65535 > 4095. Never a valid code. Safe sentinel. | |
| * memset(dict, 0xFF, ...) initialises all slots to 0xFFFF in one call. | |
| */ | |
| #define EE_LZW_EMPTY 0xFFFFu | |
| static_assert(EE_LZW_DICT_MAX == 4096u, | |
| "dict max must be 4096"); | |
| static_assert(EE_LZW_ALPHA_SZ == 256u, | |
| "alpha must be 256"); | |
| static_assert(EE_LZW_EMPTY > EE_LZW_DICT_MAX - 1u, | |
| "sentinel must exceed all valid codes"); | |
| static_assert(EE_LZW_DICT_MAX * EE_LZW_ALPHA_SZ * sizeof(uint16_t) == 2097152u, | |
| "dict total size must be 2097152 bytes"); | |
| /* | |
| * ================================================================== | |
| * LOGOS / 4-STATE GATE CONSTANTS | |
| * ================================================================== | |
| */ | |
| #define EE_GATE_Z 0x00u /* not applicable: empty input */ | |
| #define EE_GATE_X 0x01u /* error: overflow, invalid code, NULL ptr */ | |
| #define EE_GATE_0 0x02u /* deny: invariant violation */ | |
| #define EE_GATE_1 0x03u /* allow: success */ | |
| static_assert(EE_GATE_Z != EE_GATE_X && | |
| EE_GATE_X != EE_GATE_0 && | |
| EE_GATE_0 != EE_GATE_1, | |
| "gate constants must be distinct"); | |
| /* | |
| * ================================================================== | |
| * LOGOS / REASON CODES | |
| * ================================================================== | |
| */ | |
| #define EE_LZW_OK 0x0000u | |
| #define EE_LZW_R_EMPTY 0x0001u /* gate-Z: empty input */ | |
| #define EE_LZW_R_NULL 0x0002u /* gate-X: required ptr NULL */ | |
| #define EE_LZW_R_CODES_OVF 0x0003u /* gate-X: code output overflow */ | |
| #define EE_LZW_R_OUTPUT_OVF 0x0004u /* gate-X: byte output overflow */ | |
| #define EE_LZW_R_BAD_CODE 0x0005u /* gate-X: code > dict_size */ | |
| #define EE_LZW_R_STR_OVF 0x0006u /* gate-X: string length overflow */ | |
| #define EE_LZW_R_DICT_FULL 0x0007u /* gate-0: dict at capacity */ | |
| #define EE_LZW_R_SELF_FAIL 0x000Fu /* gate-0: self-test failure */ | |
| /* | |
| * ================================================================== | |
| * LOGOS / GATE RESULT | |
| * ================================================================== | |
| */ | |
| /* | |
| * ee_lzw_gate_t | |
| * | |
| * Universal return type. Every public function returns this. | |
| * Gate-X fires at the structural cause -- not at the symptom. | |
| */ | |
| typedef struct ee_lzw_gate { | |
| uint8_t state; | |
| uint8_t pad; | |
| uint16_t reason; | |
| } ee_lzw_gate_t; | |
| static_assert(sizeof(ee_lzw_gate_t) == 4u, "gate must be 4 bytes"); | |
| /* | |
| * ================================================================== | |
| * LOGOS / DICT ENTRY (I1) | |
| * | |
| * Direct-index: next[byte] = next_code or EE_LZW_EMPTY. | |
| * O(1). Zero collision possible. Byte IS the index. | |
| * ================================================================== | |
| */ | |
| typedef struct ee_lzw_entry { | |
| uint16_t next[EE_LZW_ALPHA_SZ]; /* 256 x 2 = 512 bytes */ | |
| } ee_lzw_entry_t; | |
| static_assert(sizeof(ee_lzw_entry_t) == 512u, | |
| "dict entry must be 512 bytes"); | |
| /* | |
| * ================================================================== | |
| * LOGOS / COMPRESS STATE (I2) | |
| * ================================================================== | |
| */ | |
| typedef struct ee_lzw_cs { | |
| ee_lzw_entry_t dict[EE_LZW_DICT_MAX]; /* I1 direct-index dict */ | |
| uint16_t codes[EE_LZW_CODES_MAX];/* output code sequence */ | |
| uint32_t dict_size; /* current dict entries */ | |
| uint32_t code_count; /* codes emitted */ | |
| } ee_lzw_cs_t; | |
| /* | |
| * ================================================================== | |
| * LOGOS / DECOMPRESS STATE (I3) | |
| * ================================================================== | |
| */ | |
| typedef struct ee_lzw_str { | |
| uint8_t data[EE_LZW_STR_MAX]; | |
| uint16_t len; | |
| } ee_lzw_str_t; | |
| static_assert(sizeof(ee_lzw_str_t) == EE_LZW_STR_MAX + 2u, | |
| "lzw_str_t must be EE_LZW_STR_MAX + 2 bytes"); | |
| typedef struct ee_lzw_ds { | |
| ee_lzw_str_t str[EE_LZW_DICT_MAX]; | |
| uint8_t output[EE_LZW_OUTPUT_MAX]; | |
| uint32_t dict_size; | |
| uint32_t out_len; | |
| } ee_lzw_ds_t; | |
| /* | |
| * ================================================================== | |
| * LOGOS COMPLETE. LOGIC begins below. | |
| * ================================================================== | |
| */ | |
| static inline ee_lzw_gate_t | |
| lzw__ok(void) | |
| { | |
| return (ee_lzw_gate_t){ EE_GATE_1, 0u, EE_LZW_OK }; | |
| } | |
| static inline ee_lzw_gate_t | |
| lzw__gate(uint8_t state, uint16_t reason) | |
| { | |
| return (ee_lzw_gate_t){ state, 0u, reason }; | |
| } | |
| /* | |
| * ================================================================== | |
| * LOGIC / COMPRESS INIT | |
| * ================================================================== | |
| */ | |
| /* | |
| * lzw_compress_init | |
| * | |
| * FRONT: uninitialised cs (AS) | |
| * LEAD: memset(0xFF) sets all next[] slots to EE_LZW_EMPTY; | |
| * dict_size = 256; code_count = 0 (Pivot) | |
| * REAR: cs ready for lzw_compress() (IS) | |
| * X: cs == NULL | |
| * 1: ready | |
| * | |
| * ExCLisp: {{0 [ cs (AS/.\IS) cs_ready ] 1}} | |
| */ | |
| static ee_lzw_gate_t | |
| lzw_compress_init(ee_lzw_cs_t *cs) | |
| { | |
| if (!cs) | |
| return lzw__gate(EE_GATE_X, EE_LZW_R_NULL); | |
| memset(cs->dict, 0xFF, sizeof(cs->dict)); | |
| cs->dict_size = EE_LZW_ALPHA_SZ; | |
| cs->code_count = 0u; | |
| return lzw__ok(); | |
| } | |
| /* | |
| * ================================================================== | |
| * LOGIC / COMPRESS | |
| * ================================================================== | |
| */ | |
| /* | |
| * lzw_compress | |
| * | |
| * FRONT: cs initialised; input non-NULL; len > 0 (AS) | |
| * LEAD: IFP pipeline over input bytes (Pivot): | |
| * DISSOLVE seed current_code = input[0] | |
| * DISCOVER I1 lookup dict[cur].next[byte] per byte | |
| * REMAP hit -> extend cur; miss -> emit + add entry + reset | |
| * REMOVE dict-full: emit without adding (gate-0, continue) | |
| * REINDEX emit final code after loop | |
| * REAR: cs->codes[0..code_count-1] = compressed output (IS) | |
| * Z: len == 0 | |
| * X: cs or input NULL; code output overflow | |
| * 0: dict reached EE_LZW_DICT_MAX (entries stopped, codes continue) | |
| * 1: compression complete | |
| * | |
| * ExCLisp: {{0 [ (cs,input,len) (AS/.\IS) cs.codes ] 1}} | |
| */ | |
| [[nodiscard]] | |
| static ee_lzw_gate_t | |
| lzw_compress(ee_lzw_cs_t *cs, const uint8_t *input, size_t len) | |
| { | |
| ee_lzw_gate_t result; | |
| uint16_t cur; | |
| size_t i; | |
| if (!cs || !input) | |
| return lzw__gate(EE_GATE_X, EE_LZW_R_NULL); | |
| if (!len) | |
| return lzw__gate(EE_GATE_Z, EE_LZW_R_EMPTY); | |
| result = lzw__ok(); | |
| cur = input[0]; | |
| for (i = 1u; i < len; i++) { | |
| uint8_t byte = input[i]; | |
| uint16_t next = cs->dict[cur].next[byte]; /* I1: O(1) */ | |
| if (__builtin_expect(next != EE_LZW_EMPTY, 1)) { | |
| cur = next; | |
| continue; | |
| } | |
| if (__builtin_expect(cs->code_count >= EE_LZW_CODES_MAX, 0)) | |
| return lzw__gate(EE_GATE_X, EE_LZW_R_CODES_OVF); | |
| cs->codes[cs->code_count++] = cur; | |
| if (__builtin_expect(cs->dict_size < EE_LZW_DICT_MAX, 1)) { | |
| cs->dict[cur].next[byte] = (uint16_t)cs->dict_size; | |
| cs->dict_size++; | |
| } else { | |
| result = lzw__gate(EE_GATE_0, EE_LZW_R_DICT_FULL); | |
| } | |
| cur = byte; | |
| } | |
| if (__builtin_expect(cs->code_count >= EE_LZW_CODES_MAX, 0)) | |
| return lzw__gate(EE_GATE_X, EE_LZW_R_CODES_OVF); | |
| cs->codes[cs->code_count++] = cur; | |
| return result; | |
| } | |
| /* | |
| * ================================================================== | |
| * LOGIC / DECOMPRESS INIT | |
| * ================================================================== | |
| */ | |
| /* | |
| * lzw_decompress_init | |
| * | |
| * FRONT: uninitialised ds (AS) | |
| * LEAD: populate str[0..255] with single-byte literals; | |
| * dict_size = 256; out_len = 0 (Pivot) | |
| * REAR: ds ready for lzw_decompress() (IS) | |
| * X: ds == NULL | |
| * 1: ready | |
| * | |
| * ExCLisp: {{0 [ ds (AS/.\IS) ds_ready ] 1}} | |
| */ | |
| static ee_lzw_gate_t | |
| lzw_decompress_init(ee_lzw_ds_t *ds) | |
| { | |
| uint32_t i; | |
| if (!ds) | |
| return lzw__gate(EE_GATE_X, EE_LZW_R_NULL); | |
| for (i = 0u; i < EE_LZW_ALPHA_SZ; i++) { | |
| ds->str[i].data[0] = (uint8_t)i; | |
| ds->str[i].len = 1u; | |
| } | |
| ds->dict_size = EE_LZW_ALPHA_SZ; | |
| ds->out_len = 0u; | |
| return lzw__ok(); | |
| } | |
| /* | |
| * ================================================================== | |
| * LOGIC / DECOMPRESS | |
| * ================================================================== | |
| */ | |
| /* | |
| * lzw_decompress | |
| * | |
| * FRONT: ds initialised; codes non-NULL; count > 0 (AS) | |
| * LEAD: IFP pipeline -- invert I2 (Pivot): | |
| * DISSOLVE output first code's string | |
| * DISCOVER per code: lookup or special-case if code == dict_size | |
| * REMAP known -> copy string; unknown == dict_size -> construct | |
| * REMOVE code > dict_size -> gate-X at structural cause | |
| * REINDEX add new entry = old_string + first_char(cur_string) | |
| * REAR: ds->output[0..out_len-1] = decompressed bytes (IS) | |
| * Z: count == 0 | |
| * X: ds or codes NULL; output overflow; string overflow; bad code | |
| * 0: dict full (entries stopped, decompression continues) | |
| * 1: success | |
| * | |
| * ExCLisp: {{0 [ (ds,codes,count) (AS/.\IS) ds.output ] 1}} | |
| */ | |
| [[nodiscard]] | |
| static ee_lzw_gate_t | |
| lzw_decompress(ee_lzw_ds_t *ds, const uint16_t *codes, size_t count) | |
| { | |
| ee_lzw_gate_t result; | |
| uint16_t old; | |
| size_t i; | |
| if (!ds || !codes) | |
| return lzw__gate(EE_GATE_X, EE_LZW_R_NULL); | |
| if (!count) | |
| return lzw__gate(EE_GATE_Z, EE_LZW_R_EMPTY); | |
| result = lzw__ok(); | |
| old = codes[0]; | |
| if (__builtin_expect(old >= ds->dict_size, 0)) | |
| return lzw__gate(EE_GATE_X, EE_LZW_R_BAD_CODE); | |
| { | |
| uint16_t flen = ds->str[old].len; | |
| if (ds->out_len + flen > EE_LZW_OUTPUT_MAX) | |
| return lzw__gate(EE_GATE_X, EE_LZW_R_OUTPUT_OVF); | |
| memcpy(ds->output + ds->out_len, ds->str[old].data, flen); | |
| ds->out_len += flen; | |
| } | |
| for (i = 1u; i < count; i++) { | |
| uint16_t code = codes[i]; | |
| const uint8_t *cur_str; | |
| uint16_t cur_len; | |
| /* gate-X at structural cause: code structurally impossible */ | |
| if (__builtin_expect(code > ds->dict_size, 0)) | |
| return lzw__gate(EE_GATE_X, EE_LZW_R_BAD_CODE); | |
| if (code < ds->dict_size) { | |
| cur_str = ds->str[code].data; | |
| cur_len = ds->str[code].len; | |
| } else { | |
| /* special case: code == dict_size | |
| * string = old_string + old_string[0] */ | |
| ee_lzw_str_t *ns = &ds->str[ds->dict_size]; | |
| uint16_t olen = ds->str[old].len; | |
| if (__builtin_expect((uint32_t)olen + 1u > EE_LZW_STR_MAX, 0)) | |
| return lzw__gate(EE_GATE_X, EE_LZW_R_STR_OVF); | |
| memcpy(ns->data, ds->str[old].data, olen); | |
| ns->data[olen] = ds->str[old].data[0]; | |
| ns->len = (uint16_t)(olen + 1u); | |
| cur_str = ns->data; | |
| cur_len = ns->len; | |
| } | |
| if (__builtin_expect(ds->out_len + cur_len > EE_LZW_OUTPUT_MAX, 0)) | |
| return lzw__gate(EE_GATE_X, EE_LZW_R_OUTPUT_OVF); | |
| memcpy(ds->output + ds->out_len, cur_str, cur_len); | |
| ds->out_len += cur_len; | |
| /* add new entry: old_string + first_char(cur_string) */ | |
| if (__builtin_expect(ds->dict_size < EE_LZW_DICT_MAX, 1)) { | |
| ee_lzw_str_t *ne = &ds->str[ds->dict_size]; | |
| uint16_t olen = ds->str[old].len; | |
| if ((uint32_t)olen + 1u <= EE_LZW_STR_MAX) { | |
| memcpy(ne->data, ds->str[old].data, olen); | |
| ne->data[olen] = cur_str[0]; | |
| ne->len = (uint16_t)(olen + 1u); | |
| ds->dict_size++; | |
| } | |
| } else { | |
| result = lzw__gate(EE_GATE_0, EE_LZW_R_DICT_FULL); | |
| } | |
| old = code; | |
| } | |
| return result; | |
| } | |
| /* | |
| * ================================================================== | |
| * LOGIC / TIMING (integer nanoseconds -- no IEEE 754) | |
| * ================================================================== | |
| */ | |
| static uint64_t | |
| lzw_now_ns(void) | |
| { | |
| struct timespec ts; | |
| if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) | |
| return 0u; | |
| return (uint64_t)ts.tv_sec * UINT64_C(1000000000) | |
| + (uint64_t)ts.tv_nsec; | |
| } | |
| /* | |
| * ================================================================== | |
| * LOGIC / SELF-TEST (ppo-2: SELF-TESTS AT STARTUP) | |
| * ================================================================== | |
| */ | |
| /* file-scope to avoid 2MB+ stack allocation */ | |
| static ee_lzw_cs_t g_self_cs; | |
| static ee_lzw_ds_t g_self_ds; | |
| /* | |
| * lzw_self_test | |
| * | |
| * FRONT: no external state (AS) | |
| * LEAD: nine checks against hand-derivable values (Pivot) | |
| * REAR: gate-1 all pass; gate-0 + stderr on failure (IS) | |
| * 0: any check failed | |
| * 1: all nine passed | |
| * | |
| * Checks: | |
| * 1. sizeof(ee_lzw_gate_t) == 4 | |
| * 2. sizeof(ee_lzw_entry_t) == 512 | |
| * 3. EE_LZW_EMPTY == 0xFFFF | |
| * 4. EE_LZW_EMPTY > EE_LZW_DICT_MAX - 1 (sentinel valid) | |
| * 5. compress("A") -> codes = {65} | |
| * 6. compress("AB") -> codes = {65, 66} | |
| * 7. after compress("AB"): dict['A']['B'] == 256 (I1 spot-check) | |
| * 8. round-trip "TOBEORNOT" correct length | |
| * 9. round-trip "TOBEORNOT" correct content | |
| * | |
| * ExCLisp: {{0 [ void (AS/.\IS) gate_1_or_0 ] 1}} | |
| */ | |
| [[nodiscard]] | |
| static ee_lzw_gate_t | |
| lzw_self_test(void) | |
| { | |
| #define LZW_ST(expr, msg) \ | |
| do { \ | |
| if (!(expr)) { \ | |
| fprintf(stderr, \ | |
| "[lzw_self_test] FAIL: %s\n", \ | |
| (msg)); \ | |
| return lzw__gate(EE_GATE_0, \ | |
| EE_LZW_R_SELF_FAIL); \ | |
| } \ | |
| } while (0) | |
| /* 1-4: structural */ | |
| LZW_ST(sizeof(ee_lzw_gate_t) == 4u, "sizeof gate == 4"); | |
| LZW_ST(sizeof(ee_lzw_entry_t) == 512u, "sizeof entry == 512"); | |
| LZW_ST(EE_LZW_EMPTY == 0xFFFFu, "sentinel == 0xFFFF"); | |
| LZW_ST(EE_LZW_EMPTY > EE_LZW_DICT_MAX - 1u, | |
| "sentinel > max valid code"); | |
| /* 5: compress("A") -> {65} */ | |
| lzw_compress_init(&g_self_cs); | |
| { | |
| ee_lzw_gate_t g_ = lzw_compress(&g_self_cs, | |
| (const uint8_t *)"A", 1u); | |
| LZW_ST(g_.state == EE_GATE_1, "compress A gate"); | |
| } | |
| LZW_ST(g_self_cs.code_count == 1u, "compress A: one code"); | |
| LZW_ST(g_self_cs.codes[0] == 65u, "compress A: code == 65"); | |
| /* 6: compress("AB") -> {65, 66} */ | |
| lzw_compress_init(&g_self_cs); | |
| { | |
| ee_lzw_gate_t g_ = lzw_compress(&g_self_cs, | |
| (const uint8_t *)"AB", 2u); | |
| LZW_ST(g_.state == EE_GATE_1, "compress AB gate"); | |
| } | |
| LZW_ST(g_self_cs.code_count == 2u, "compress AB: two codes"); | |
| LZW_ST(g_self_cs.codes[0] == 65u, "compress AB: code[0] == 65"); | |
| LZW_ST(g_self_cs.codes[1] == 66u, "compress AB: code[1] == 66"); | |
| /* 7: I1 spot-check: dict['A']['B'] == 256 after "AB" */ | |
| LZW_ST(g_self_cs.dict[65].next[66] == 256u, | |
| "dict['A']['B'] == 256 after AB"); | |
| /* 8-9: round-trip "TOBEORNOT" */ | |
| lzw_compress_init(&g_self_cs); | |
| { | |
| ee_lzw_gate_t g_ = lzw_compress(&g_self_cs, | |
| (const uint8_t *)"TOBEORNOT", 9u); | |
| LZW_ST(g_.state == EE_GATE_1, "compress TOBEORNOT gate"); | |
| } | |
| lzw_decompress_init(&g_self_ds); | |
| { | |
| ee_lzw_gate_t g_ = lzw_decompress(&g_self_ds, | |
| g_self_cs.codes, g_self_cs.code_count); | |
| LZW_ST(g_.state == EE_GATE_1, "decompress TOBEORNOT gate"); | |
| } | |
| g_self_ds.output[g_self_ds.out_len] = '\0'; | |
| LZW_ST(g_self_ds.out_len == 9u, "round-trip TOBEORNOT: len"); | |
| LZW_ST(memcmp(g_self_ds.output, | |
| (const uint8_t *)"TOBEORNOT", 9u) == 0, | |
| "round-trip TOBEORNOT: content"); | |
| #undef LZW_ST | |
| return lzw__ok(); | |
| } | |
| /* | |
| * ================================================================== | |
| * LOGIC / TEST SUITE | |
| * ================================================================== | |
| */ | |
| typedef struct lzw_test { | |
| const char *name; | |
| const char *input; | |
| } lzw_test_t; | |
| static const lzw_test_t lzw_tests[] = { | |
| { "TOBEORNOTTOBEORTOBEORNOT", | |
| "TOBEORNOTTOBEORTOBEORNOT" }, | |
| { "ABABABA", | |
| "ABABABA" }, | |
| { "single char A", | |
| "A" }, | |
| { "two chars AB", | |
| "AB" }, | |
| { "ABCABCABC", | |
| "ABCABCABC" }, | |
| { "fox sentence", | |
| "THE QUICK BROWN FOX JUMPS OVER THE LAZY DOG." }, | |
| { "digits 12312312312345", | |
| "12312312312345" }, | |
| { "Hello World round-trip", | |
| "Hello, World! Hello, Programming!" }, | |
| }; | |
| static const size_t lzw_test_count = | |
| sizeof(lzw_tests) / sizeof(lzw_tests[0]); | |
| /* file-scope to avoid 2MB+ stack allocation in test runner */ | |
| static ee_lzw_cs_t g_test_cs; | |
| static ee_lzw_ds_t g_test_ds; | |
| /* | |
| * lzw_run_test | |
| * | |
| * FRONT: test with name and input string (AS) | |
| * LEAD: compress then decompress; compare to input (Pivot) | |
| * REAR: printed result line; true if round-trip correct (IS) | |
| * 1: round-trip correct | |
| * 0: mismatch or gate-X (false returned) | |
| * | |
| * ExCLisp: {{0 [ test (AS/.\IS) bool ] 1}} | |
| */ | |
| static bool | |
| lzw_run_test(const lzw_test_t *test) | |
| { | |
| size_t ilen = strlen(test->input); | |
| lzw_compress_init(&g_test_cs); | |
| ee_lzw_gate_t cg = lzw_compress(&g_test_cs, | |
| (const uint8_t *)test->input, ilen); | |
| if (cg.state == EE_GATE_X) { | |
| printf(" FAIL [%s]: compress gate-X 0x%04x\n", | |
| test->name, cg.reason); | |
| return false; | |
| } | |
| lzw_decompress_init(&g_test_ds); | |
| ee_lzw_gate_t dg = lzw_decompress(&g_test_ds, | |
| g_test_cs.codes, g_test_cs.code_count); | |
| if (dg.state == EE_GATE_X) { | |
| printf(" FAIL [%s]: decompress gate-X 0x%04x\n", | |
| test->name, dg.reason); | |
| return false; | |
| } | |
| g_test_ds.output[g_test_ds.out_len] = '\0'; | |
| if (g_test_ds.out_len != ilen || | |
| memcmp(g_test_ds.output, test->input, ilen) != 0) { | |
| printf(" FAIL [%s]: round-trip mismatch\n", test->name); | |
| return false; | |
| } | |
| printf(" PASS [%s]: %zu -> %u codes -> %u bytes\n", | |
| test->name, | |
| ilen, | |
| g_test_cs.code_count, | |
| g_test_ds.out_len); | |
| return true; | |
| } | |
| /* | |
| * ================================================================== | |
| * LOGIC / INTERACTIVE MODE | |
| * ================================================================== | |
| */ | |
| /* file-scope to avoid 2MB+ stack allocation in interactive */ | |
| static ee_lzw_cs_t g_int_cs; | |
| static ee_lzw_ds_t g_int_ds; | |
| /* | |
| * lzw_interactive | |
| * | |
| * FRONT: stdin open (AS) | |
| * LEAD: read lines; ==comp / ==decomp switch mode; | |
| * compress or decompress input (Pivot) | |
| * REAR: interactive session runs until EOF (IS) | |
| * 1: always (session complete on EOF) | |
| * | |
| * ExCLisp: {{0 [ void (AS/.\IS) void ] 1}} | |
| */ | |
| static void | |
| lzw_interactive(void) | |
| { | |
| char line[EE_LZW_CODES_MAX]; | |
| uint8_t mode = 0u; /* 0=none 1=compress 2=decompress */ | |
| printf("LZW Interactive Mode\n"); | |
| printf("Commands: ==comp, ==decomp, or type text\n"); | |
| printf("Ctrl+C to exit\n\n"); | |
| while (fgets(line, (int)sizeof(line), stdin)) { | |
| line[strcspn(line, "\n")] = '\0'; | |
| if (strncmp(line, "==comp", 6) == 0) { | |
| mode = 1u; | |
| printf("Compression mode\n"); | |
| continue; | |
| } | |
| if (strncmp(line, "==decomp", 8) == 0) { | |
| mode = 2u; | |
| printf("Decompression mode\n"); | |
| continue; | |
| } | |
| if (mode == 1u) { | |
| lzw_compress_init(&g_int_cs); | |
| uint64_t t0 = lzw_now_ns(); | |
| ee_lzw_gate_t g = lzw_compress(&g_int_cs, | |
| (const uint8_t *)line, strlen(line)); | |
| uint64_t ns = lzw_now_ns() - t0; | |
| if (g.state == EE_GATE_X) { | |
| printf("gate-X: 0x%04x\n", g.reason); | |
| continue; | |
| } | |
| for (uint32_t i = 0u; i < g_int_cs.code_count; i++) { | |
| printf("%u", g_int_cs.codes[i]); | |
| if (i < g_int_cs.code_count - 1u) | |
| putchar(','); | |
| } | |
| putchar('\n'); | |
| printf("// %zu chars -> %u codes %" PRIu64 " ns\n", | |
| strlen(line), g_int_cs.code_count, ns); | |
| } else if (mode == 2u) { | |
| uint16_t codes[EE_LZW_CODES_MAX]; | |
| uint32_t count = 0u; | |
| char *tok = strtok(line, ","); | |
| while (tok && count < EE_LZW_CODES_MAX) { | |
| codes[count++] = (uint16_t)atoi(tok); | |
| tok = strtok(NULL, ","); | |
| } | |
| lzw_decompress_init(&g_int_ds); | |
| uint64_t t0 = lzw_now_ns(); | |
| ee_lzw_gate_t g = lzw_decompress(&g_int_ds, | |
| codes, count); | |
| uint64_t ns = lzw_now_ns() - t0; | |
| if (g.state == EE_GATE_X) { | |
| printf("gate-X: 0x%04x\n", g.reason); | |
| continue; | |
| } | |
| g_int_ds.output[g_int_ds.out_len] = '\0'; | |
| printf("%s\n", g_int_ds.output); | |
| printf("// %u codes -> %u bytes %" PRIu64 " ns\n", | |
| count, g_int_ds.out_len, ns); | |
| } | |
| } | |
| } | |
| /* | |
| * ================================================================== | |
| * LOGIC / MAIN | |
| * ================================================================== | |
| */ | |
| int | |
| main(int argc, char **argv) | |
| { | |
| printf("LZW -- ppo-2 C23 v2.1.0\n\n"); | |
| /* ppo-2: self-tests fire before any external interaction */ | |
| printf("[SELF-TEST] LZW (9 checks)... "); | |
| fflush(stdout); | |
| ee_lzw_gate_t st = lzw_self_test(); | |
| if (st.state != EE_GATE_1) { | |
| fprintf(stderr, "FAIL (0x%04x)\n", st.reason); | |
| return 1; | |
| } | |
| printf("PASS\n\n"); | |
| if (argc > 1 && strcmp(argv[1], "test") == 0) { | |
| printf("Test suite (%zu cases):\n\n", lzw_test_count); | |
| uint64_t t0 = lzw_now_ns(); | |
| int passed = 0; | |
| for (size_t i = 0u; i < lzw_test_count; i++) | |
| if (lzw_run_test(&lzw_tests[i])) | |
| passed++; | |
| uint64_t elapsed = lzw_now_ns() - t0; | |
| printf("\nResults: %d/%zu passed" | |
| " %" PRIu64 " ns gate-1 = ALLOW\n", | |
| passed, lzw_test_count, elapsed); | |
| return passed == (int)lzw_test_count ? 0 : 1; | |
| } | |
| lzw_interactive(); | |
| return 0; | |
| } |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
$ ./lzw test
LZW -- ppo-2 C23 v2.1.0
[SELF-TEST] LZW (9 checks)... PASS
Test suite (8 cases):
PASS [TOBEORNOTTOBEORTOBEORNOT]: 24 -> 16 codes -> 24 bytes
PASS [ABABABA]: 7 -> 4 codes -> 7 bytes
PASS [single char A]: 1 -> 1 codes -> 1 bytes
PASS [two chars AB]: 2 -> 2 codes -> 2 bytes
PASS [ABCABCABC]: 9 -> 6 codes -> 9 bytes
PASS [fox sentence]: 44 -> 42 codes -> 44 bytes
PASS [digits 12312312312345]: 14 -> 9 codes -> 14 bytes
PASS [Hello World round-trip]: 33 -> 30 codes -> 33 bytes
Results: 8/8 passed 2915980 ns gate-1 = ALLOW
Linux. Zero warnings. 8/8. 2.9ms for all eight cases. Clean.