I want to compute multiple hashes for image. to speed it up I want to compute all hashes in parallel. I have method called hashImage which returns dictionary. Expected result is that all dictionary keys have values that are other than None. Actual result is that random keys have None as their value. Sometimes ret["dhash10"] is None while other times ret["averageHash10"] is None and third time something else might be None.
import time
from PIL import Image
import imagehash
import hashlib
import concurrent.futures
import numpy
import os
class ImageHasher:
def __init__(self):
print("__init__")
pass
def computeHash(self, image, hashFunction, hashSize):
try:
return hashFunction(image, hashSize)
except:
return None
def compute_md5_hash(self, filename):
with open(filename, "rb") as file:
content = file.read()
hash_object = hashlib.md5(content)
return hash_object.hexdigest()
def hashImage(self, imagePath):
ret = {}
try:
with concurrent.futures.ThreadPoolExecutor() as executor:
with Image.open(imagePath) as image:
future_dhash10 = executor.submit(self.computeHash, image, imagehash.dhash, 10)
future_averageHash10 = executor.submit(self.computeHash, image, imagehash.average_hash, 10)
future_phash10 = executor.submit(self.computeHash, image, imagehash.phash, 10)
future_whash16 = executor.submit(self.computeHash, image, imagehash.whash, 16)
future_phashSimple8 = executor.submit(self.computeHash, image, imagehash.phash_simple, 8)
future_md5 = executor.submit(self.compute_md5_hash, imagePath)
concurrent.futures.wait(
[future_dhash10, future_averageHash10, future_phash10, future_whash16, future_phashSimple8, future_md5]
)
ret["dhash10"] = future_dhash10.result()
ret["averageHash10"] = future_averageHash10.result()
ret["phash10"] = future_phash10.result()
ret["whash16"] = future_whash16.result()
ret["phashSimple8"] = future_phashSimple8.result()
ret["md5"] = future_md5.result()
except Exception as e:
print(f"Uh oh!! {e}")
return ret
def processImage(self):
result = self.hashImage('result_image.jpg')
count = 0
while result["dhash10"] != None and result["averageHash10"] != None and result["phash10"] != None and result["whash16"] != None and result["phashSimple8"] != None and result["md5"] != None:
result = self.hashImage('result_image.jpg')
count+=1
print("count: " + str(count))
self.print_keys_with_none_values(result)
return result
def print_keys_with_none_values(self, dictionary):
print("The following key(s) are None:")
for key, value in dictionary.items():
if value is None:
print(key)
if __name__ == '__main__':
if not os.path.exists('result_image.jpg'):
imarray = numpy.random.rand(3000, 4000,3) * 255
im = Image.fromarray(imarray.astype('uint8')).convert('RGB')
im.save('result_image.jpg',quality=95)
ih = ImageHasher()
start_time = time.time()
result = ih.processImage()
end_time = time.time()
print("Time taken:", end_time - start_time)
What I have tried:
I tried asking ChatGPT 3.5 but it was running circles with suggestions that didn't help. One of which was to use multiprocessing but turned out it did not work. It got UnpickleableError. It also suggested me to have hashImage without threading and instead call hashImage in threads. That didn't work because I later insert those hashes into sqlite database (code removed from this post to simplify code) and it does not support threads so the only way is make hashImage threaded.