When I run the "decompress()" module all I get is this error. The code takes a file path and compresses a .txt file. The compression works but not when I try to decompress the file. After compression, the .txt file becomes .bin. Any ideas would be appreciated.
import tkinter as tk
from tkinter import filedialog
import os
import heapq
class HuffmanCoding:
def __init__(self, path):
self.path = path
self.heap = []
self.codes = {}
self.reverse_mapping = {}
class HeapNode:
def __init__(self, char, freq):
self.char = char
self.freq = freq
self.left = None
self.right = None
# defining comparators less_than and equals
def __lt__(self, other):
return self.freq < other.freq
def __eq__(self, other):
if (other == None):
return False
if (not isinstance(other, HeapNode)):
return False
return self.freq == other.freq
# functions for compression:
def make_frequency_dict(self, text):
frequency = {}
for character in text:
if not character in frequency:
frequency[character] = 0
frequency[character] += 1
return frequency
def make_heap(self, frequency):
for key in frequency:
node = self.HeapNode(key, frequency[key])
heapq.heappush(self.heap, node)
def merge_nodes(self):
while (len(self.heap) > 1):
node1 = heapq.heappop(self.heap)
node2 = heapq.heappop(self.heap)
merged = self.HeapNode(None, node1.freq + node2.freq)
merged.left = node1
merged.right = node2
heapq.heappush(self.heap, merged)
def make_codes_helper(self, root, current_code):
if (root == None):
return
if (root.char != None):
self.codes[root.char] = current_code
self.reverse_mapping[current_code] = root.char
return
self.make_codes_helper(root.left, current_code + "0")
self.make_codes_helper(root.right, current_code + "1")
def make_codes(self):
root = heapq.heappop(self.heap)
current_code = ""
self.make_codes_helper(root, current_code)
def get_encoded_text(self, text):
encoded_text = ""
for character in text:
encoded_text += self.codes[character]
return encoded_text
def pad_encoded_text(self, encoded_text):
extra_padding = 8 - len(encoded_text) % 8
for i in range(extra_padding):
encoded_text += "0"
padded_info = "{0:08b}".format(extra_padding)
encoded_text = padded_info + encoded_text
return encoded_text
def get_byte_array(self, padded_encoded_text):
if len(padded_encoded_text) % 8 != 0:
print("Encoded text not padded properly")
exit(0)
b = bytearray()
for i in range(0, len(padded_encoded_text), 8):
byte = padded_encoded_text[i:i + 8]
b.append(int(byte, 2))
return b
def compress(self):
filename, file_extension = os.path.splitext(self.path)
output_path = filename + ".bin"
with open(self.path, 'r+') as file, open(output_path, 'wb') as output:
text = file.read()
text = text.rstrip()
frequency = self.make_frequency_dict(text)
self.make_heap(frequency)
self.merge_nodes()
self.make_codes()
encoded_text = self.get_encoded_text(text)
padded_encoded_text = self.pad_encoded_text(encoded_text)
b = self.get_byte_array(padded_encoded_text)
output.write(bytes(b))
print("Compressed")
return output_path
""" functions for decompression: """
def remove_padding(self, padded_encoded_text):
padded_info = padded_encoded_text[:8]
extra_padding = int(padded_info, 2)
padded_encoded_text = padded_encoded_text[8:]
encoded_text = padded_encoded_text[:-1 * extra_padding]
return encoded_text
def decode_text(self, encoded_text):
current_code = ""
decoded_text = ""
for bit in encoded_text:
current_code += bit
if current_code in self.reverse_mapping:
character = self.reverse_mapping[current_code]
decoded_text += character
current_code = ""
return decoded_text
def decompress(self, input_path):
filename, file_extension = os.path.splitext(self.path)
output_path = filename + "_decompressed" + ".txt"
with open(input_path, 'rb') as file, open(output_path, 'w') as output:
bit_string = ""
byte = file.read(1)
while (len(byte) > 0):
byte = ord(byte)
bits = bin(byte)[2:].rjust(8, '0')
bit_string += bits
byte = file.read(1)
encoded_text = self.remove_padding(bit_string)
decompressed_text = self.decode_text(encoded_text)
output.write(decompressed_text)
print("Decompressed")
return output_path
class Application(tk.Frame):
def __init__(self, master=None):
super().__init__(master)
self.master = master
self.pack(side=tk.TOP, fill=tk.BOTH, expand=True)
self.config(background="white")
self.path = None
self.label_file_explorer = tk.Label(self, text="Menu", fg="blue",
width="300", height="2",
font="Helvetica 15 bold")
self.label_file_explorer.pack()
self.button_explore = tk.Button(self, text="Browse Files", fg="blue",
font="Arial 15", relief=tk.GROOVE, width=20,
command=self.browse_files)
self.button_explore.pack(padx=10, pady=10)
self.button_exit = tk.Button(self, text="Close Program", width=20,
font="Arial 15", relief=tk.GROOVE,
# destroy root
command=self.master.destroy)
self.button_exit.pack(padx=10, pady=10)
self.button_compress = tk.Button(self, text="Compress", width=20,
font="Arial 15", relief=tk.GROOVE,
command=self.but_comp)
self.button_compress.pack(padx=10, pady=10)
self.button_decompress = tk.Button(self, text="Decompress", width=20,
font="Arial 15", relief=tk.GROOVE,
command=self.but_decomp)
self.button_decompress.pack(padx=10, pady=10)
def browse_files(self):
file_name = filedialog.askopenfilename(initialdir="/",
title="Select a File",
filetypes=(("all files", "*.*"),
("text files", "*.txt*")))
if file_name == "": # if Cancel
return
else:
self.label_file_explorer.configure(text="Selected File: " + file_name)
self.path = file_name
def but_comp(self):
if self.path:
H = HuffmanCoding(self.path)
H.compress()
def but_decomp(self):
if self.path:
H = HuffmanCoding(self.path)
H.decompress()
root = tk.Tk()
root.title("Compression Utility")
root.geometry("800x600")
app = Application(master=root)
app.mainloop()
# the last path stored in the data attribute
# of the instance of the Application class
print(app.path)
What I have tried:
def but_decomp(self):
if self.path:
H = HuffmanCoding(self.path)
H.decompress(self.path)
The error does not appear but the decompressed .bin file (which is now a text file) becomes empty even though the .bin file had data.