Click here to Skip to main content
15,867,686 members
Please Sign up or sign in to vote.
1.00/5 (1 vote)
When I run the "decompress()" module all I get is this error. The code takes a file path and compresses a .txt file. The compression works but not when I try to decompress the file. After compression, the .txt file becomes .bin. Any ideas would be appreciated.

import tkinter as tk
from tkinter import filedialog
import os
import heapq


class HuffmanCoding:
    def __init__(self, path):
        self.path = path
        self.heap = []
        self.codes = {}
        self.reverse_mapping = {}

    class HeapNode:
        def __init__(self, char, freq):
            self.char = char
            self.freq = freq
            self.left = None
            self.right = None

        # defining comparators less_than and equals
        def __lt__(self, other):
            return self.freq < other.freq

        def __eq__(self, other):
            if (other == None):
                return False
            if (not isinstance(other, HeapNode)):
                return False
            return self.freq == other.freq

    # functions for compression:

    def make_frequency_dict(self, text):
        frequency = {}
        for character in text:
            if not character in frequency:
                frequency[character] = 0
            frequency[character] += 1
        return frequency

    def make_heap(self, frequency):
        for key in frequency:
            node = self.HeapNode(key, frequency[key])
            heapq.heappush(self.heap, node)

    def merge_nodes(self):
        while (len(self.heap) > 1):
            node1 = heapq.heappop(self.heap)
            node2 = heapq.heappop(self.heap)

            merged = self.HeapNode(None, node1.freq + node2.freq)
            merged.left = node1
            merged.right = node2

            heapq.heappush(self.heap, merged)

    def make_codes_helper(self, root, current_code):
        if (root == None):
            return

        if (root.char != None):
            self.codes[root.char] = current_code
            self.reverse_mapping[current_code] = root.char
            return

        self.make_codes_helper(root.left, current_code + "0")
        self.make_codes_helper(root.right, current_code + "1")

    def make_codes(self):
        root = heapq.heappop(self.heap)
        current_code = ""
        self.make_codes_helper(root, current_code)

    def get_encoded_text(self, text):
        encoded_text = ""
        for character in text:
            encoded_text += self.codes[character]
        return encoded_text

    def pad_encoded_text(self, encoded_text):
        extra_padding = 8 - len(encoded_text) % 8
        for i in range(extra_padding):
            encoded_text += "0"

        padded_info = "{0:08b}".format(extra_padding)
        encoded_text = padded_info + encoded_text
        return encoded_text

    def get_byte_array(self, padded_encoded_text):
        if len(padded_encoded_text) % 8 != 0:
            print("Encoded text not padded properly")
            exit(0)

        b = bytearray()
        for i in range(0, len(padded_encoded_text), 8):
            byte = padded_encoded_text[i:i + 8]
            b.append(int(byte, 2))
        return b

    def compress(self):
        filename, file_extension = os.path.splitext(self.path)
        output_path = filename + ".bin"

        with open(self.path, 'r+') as file, open(output_path, 'wb') as output:
            text = file.read()
            text = text.rstrip()

            frequency = self.make_frequency_dict(text)
            self.make_heap(frequency)
            self.merge_nodes()
            self.make_codes()

            encoded_text = self.get_encoded_text(text)
            padded_encoded_text = self.pad_encoded_text(encoded_text)

            b = self.get_byte_array(padded_encoded_text)
            output.write(bytes(b))

        print("Compressed")
        return output_path

    """ functions for decompression: """

    def remove_padding(self, padded_encoded_text):
        padded_info = padded_encoded_text[:8]
        extra_padding = int(padded_info, 2)

        padded_encoded_text = padded_encoded_text[8:]
        encoded_text = padded_encoded_text[:-1 * extra_padding]

        return encoded_text

    def decode_text(self, encoded_text):
        current_code = ""
        decoded_text = ""

        for bit in encoded_text:
            current_code += bit
            if current_code in self.reverse_mapping:
                character = self.reverse_mapping[current_code]
                decoded_text += character
                current_code = ""

        return decoded_text

    def decompress(self, input_path):
        filename, file_extension = os.path.splitext(self.path)
        output_path = filename + "_decompressed" + ".txt"

        with open(input_path, 'rb') as file, open(output_path, 'w') as output:
            bit_string = ""

            byte = file.read(1)
            while (len(byte) > 0):
                byte = ord(byte)
                bits = bin(byte)[2:].rjust(8, '0')
                bit_string += bits
                byte = file.read(1)

            encoded_text = self.remove_padding(bit_string)

            decompressed_text = self.decode_text(encoded_text)

            output.write(decompressed_text)

        print("Decompressed")

        return output_path


class Application(tk.Frame):
    def __init__(self, master=None):
        super().__init__(master)
        self.master = master
        self.pack(side=tk.TOP, fill=tk.BOTH, expand=True)
        self.config(background="white")
        self.path = None
        self.label_file_explorer = tk.Label(self, text="Menu", fg="blue",
                                            width="300", height="2",
                                            font="Helvetica 15 bold")
        self.label_file_explorer.pack()
        self.button_explore = tk.Button(self, text="Browse Files", fg="blue",
                                        font="Arial 15", relief=tk.GROOVE, width=20,
                                        command=self.browse_files)
        self.button_explore.pack(padx=10, pady=10)
        self.button_exit = tk.Button(self, text="Close Program", width=20,
                                     font="Arial 15", relief=tk.GROOVE,
                                     # destroy root
                                     command=self.master.destroy)
        self.button_exit.pack(padx=10, pady=10)
        self.button_compress = tk.Button(self, text="Compress", width=20,
                                         font="Arial 15", relief=tk.GROOVE,
                                         command=self.but_comp)
        self.button_compress.pack(padx=10, pady=10)
        self.button_decompress = tk.Button(self, text="Decompress", width=20,
                                           font="Arial 15", relief=tk.GROOVE,
                                           command=self.but_decomp)
        self.button_decompress.pack(padx=10, pady=10)

    def browse_files(self):
        file_name = filedialog.askopenfilename(initialdir="/",
                                               title="Select a File",
                                               filetypes=(("all files", "*.*"),
                                                          ("text files", "*.txt*")))

        if file_name == "":  # if Cancel
            return
        else:
            self.label_file_explorer.configure(text="Selected File: " + file_name)
            self.path = file_name

    def but_comp(self):
        if self.path:
            H = HuffmanCoding(self.path)
            H.compress()

    def but_decomp(self):
        if self.path:
            H = HuffmanCoding(self.path)
            H.decompress()


root = tk.Tk()
root.title("Compression Utility")
root.geometry("800x600")
app = Application(master=root)
app.mainloop()
# the last path stored in the data attribute
# of the instance of the Application class
print(app.path)


What I have tried:

def but_decomp(self):
        if self.path:
            H = HuffmanCoding(self.path)
            H.decompress(self.path)

The error does not appear but the decompressed .bin file (which is now a text file) becomes empty even though the .bin file had data.
Posted
Updated 3-Sep-21 9:25am
v2
Comments
Richard MacCutchan 28-Aug-21 9:52am    
The variable input_path is not declared anywhere.
DrDoritos546 28-Aug-21 13:06pm    
Oh, I thought I declared it in the line 'def decompress(self, input_path):'
Richard MacCutchan 28-Aug-21 13:38pm    
That is the parameter that decompress will use. But self.input_path has not been declared anywhere so decompress does not receive any value.
DrDoritos546 28-Aug-21 13:08pm    
I can't figure out a way to instantiate the variable "input_path". Can't think of a way to move the variable into the tkinter class...
Richard MacCutchan 28-Aug-21 13:40pm    
You select filenames in browse-files and set self.path, but you never set self.input_path anywhere.

1 solution

Passed a valid value through.

def but_decomp(self):
        if self.path:
            H = HuffmanCoding(self.path)
            H.decompress(input_path = self.path)
 
Share this answer
 

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900