Created
March 30, 2016 20:33
-
-
Save Juddling/c82f2a36f4d1b85aebb303a556945ce8 to your computer and use it in GitHub Desktop.
LZ78 encoder, no decoding
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import math | |
| class Node(): | |
| def __init__(self, position, bit_string, bit): | |
| self.position = position | |
| self.bit_string = bit_string | |
| self.bit = bit | |
| self.one = None | |
| self.zero = None | |
| self.pointer = None | |
| def set_child_one(self, child): | |
| self.one = child | |
| def set_child_zero(self, child): | |
| self.zero = child | |
| def get_bit_string(self): | |
| return self.bit_string | |
| def get_child_one(self): | |
| return self.one | |
| def get_child_zero(self): | |
| return self.zero | |
| def set_pointer(self, node): | |
| self.pointer = node | |
| def get_pointer(self): | |
| return self.pointer | |
| def get_position(self): | |
| return self.position | |
| def __repr__(self): | |
| return "position (dec): " + str(self.position) + "\n" \ | |
| "pointer (dec): " + str(self.get_pointer()) + "\n" \ | |
| "bit string: " + self.bit_string + "\n" \ | |
| "bit: " + self.bit + "\n" \ | |
| "encoded (bin): " + self.get_encoded() + "\n\n" | |
| def get_encoded(self): | |
| return pointer_format(self.pointer, self.position) + self.bit | |
| def pointer_length(position): | |
| if position < 1: | |
| return 0 | |
| length = math.ceil(math.log(position, 2)) | |
| return int(length) | |
| def pointer_format(pointer, position): | |
| if pointer is None: | |
| return '' | |
| length = pointer_length(position) | |
| if length == 0: | |
| return '' | |
| formatting = '0' + str(length) + 'b' | |
| return format(pointer, formatting) | |
| def traverse(node, bit_string, position): | |
| if node == None: | |
| return None | |
| if node.get_bit_string() == bit_string: | |
| return node | |
| bit = bit_string[position:position+1] | |
| child = node.get_child_zero() if bit == "0" else node.get_child_one() | |
| return traverse(child, bit_string, position + 1) | |
| with open("bin2.txt", mode='r') as file: # b is important -> binary | |
| bit_stream = file.read() | |
| #bit_stream = "00000000000000000000000000000000000000000000000000000" | |
| encoded = "" | |
| tree = Node(0, "", "") | |
| position = 1 | |
| starting_index = 0 | |
| substring_length = 1 | |
| len_bit_stream = len(bit_stream) | |
| while starting_index < len_bit_stream: | |
| parent = None | |
| # keep getting longer until | |
| while True: | |
| substring = bit_stream[starting_index:starting_index+substring_length] | |
| found_node = traverse(tree, substring, 0) | |
| if found_node is not None: | |
| parent = found_node | |
| if starting_index + substring_length == len(bit_stream): | |
| # we've reached the end | |
| break | |
| substring_length += 1 | |
| else: | |
| break | |
| bit = substring[substring_length-1:] | |
| child = Node(position, substring, bit) | |
| if parent is None: | |
| parent = tree | |
| child.set_pointer(parent.get_position()) | |
| #print(child) | |
| encoded += child.get_encoded() | |
| if bit == "0": | |
| parent.set_child_zero(child) | |
| else: | |
| parent.set_child_one(child) | |
| starting_index += substring_length | |
| substring_length = 1 | |
| position += 1 | |
| # print (bit_stream) | |
| # print (encoded) | |
| if len(bit_stream) <= len(encoded): | |
| print("Compression has not been achieved") | |
| print("original: %d, compressed: %d" % (len(bit_stream), len(encoded))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment