Skip to content

Instantly share code, notes, and snippets.

@Juddling
Created March 30, 2016 20:33
Show Gist options
  • Select an option

  • Save Juddling/c82f2a36f4d1b85aebb303a556945ce8 to your computer and use it in GitHub Desktop.

Select an option

Save Juddling/c82f2a36f4d1b85aebb303a556945ce8 to your computer and use it in GitHub Desktop.
LZ78 encoder, no decoding
import math
class Node():
def __init__(self, position, bit_string, bit):
self.position = position
self.bit_string = bit_string
self.bit = bit
self.one = None
self.zero = None
self.pointer = None
def set_child_one(self, child):
self.one = child
def set_child_zero(self, child):
self.zero = child
def get_bit_string(self):
return self.bit_string
def get_child_one(self):
return self.one
def get_child_zero(self):
return self.zero
def set_pointer(self, node):
self.pointer = node
def get_pointer(self):
return self.pointer
def get_position(self):
return self.position
def __repr__(self):
return "position (dec): " + str(self.position) + "\n" \
"pointer (dec): " + str(self.get_pointer()) + "\n" \
"bit string: " + self.bit_string + "\n" \
"bit: " + self.bit + "\n" \
"encoded (bin): " + self.get_encoded() + "\n\n"
def get_encoded(self):
return pointer_format(self.pointer, self.position) + self.bit
def pointer_length(position):
if position < 1:
return 0
length = math.ceil(math.log(position, 2))
return int(length)
def pointer_format(pointer, position):
if pointer is None:
return ''
length = pointer_length(position)
if length == 0:
return ''
formatting = '0' + str(length) + 'b'
return format(pointer, formatting)
def traverse(node, bit_string, position):
if node == None:
return None
if node.get_bit_string() == bit_string:
return node
bit = bit_string[position:position+1]
child = node.get_child_zero() if bit == "0" else node.get_child_one()
return traverse(child, bit_string, position + 1)
with open("bin2.txt", mode='r') as file: # b is important -> binary
bit_stream = file.read()
#bit_stream = "00000000000000000000000000000000000000000000000000000"
encoded = ""
tree = Node(0, "", "")
position = 1
starting_index = 0
substring_length = 1
len_bit_stream = len(bit_stream)
while starting_index < len_bit_stream:
parent = None
# keep getting longer until
while True:
substring = bit_stream[starting_index:starting_index+substring_length]
found_node = traverse(tree, substring, 0)
if found_node is not None:
parent = found_node
if starting_index + substring_length == len(bit_stream):
# we've reached the end
break
substring_length += 1
else:
break
bit = substring[substring_length-1:]
child = Node(position, substring, bit)
if parent is None:
parent = tree
child.set_pointer(parent.get_position())
#print(child)
encoded += child.get_encoded()
if bit == "0":
parent.set_child_zero(child)
else:
parent.set_child_one(child)
starting_index += substring_length
substring_length = 1
position += 1
# print (bit_stream)
# print (encoded)
if len(bit_stream) <= len(encoded):
print("Compression has not been achieved")
print("original: %d, compressed: %d" % (len(bit_stream), len(encoded)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment