Skip to content

Commit 9e99e78

Browse files
author
elisemercury
committed
Revert "Update to v2.0"
This reverts commit a06fcbe.
1 parent a06fcbe commit 9e99e78

File tree

3 files changed

+145
-180
lines changed

3 files changed

+145
-180
lines changed

__init__.py

-1
This file was deleted.

dif.py

-179
This file was deleted.

dupl_image_finder.py

+145
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
import skimage.measure
2+
import matplotlib.pyplot as plt
3+
import numpy as np
4+
import cv2
5+
import os
6+
import imghdr
7+
8+
"""
9+
Duplicate Image Finder (DIF): function that searches a given directory for images and finds duplicate/similar images among them.
10+
Outputs the number of found duplicate/similar image pairs with a list of the filenames having lower resolution.
11+
"""
12+
13+
def compare_images(directory, show_imgs=True, similarity="high", compression=50):
14+
"""
15+
directory (str).........folder to search for duplicate/similar images
16+
show_imgs (bool)........True = shows the duplicate/similar images found in output
17+
False = doesn't show found images
18+
similarity (str)........"high" = searches for duplicate images, more precise
19+
"low" = finds similar images
20+
compression (int).......recommended not to change default value
21+
compression in px (height x width) of the images before being compared
22+
the higher the compression i.e. the higher the pixel size, the more computational ressources and time required
23+
"""
24+
# list where the found duplicate/similar images are stored
25+
duplicates = []
26+
lower_res = []
27+
28+
imgs_matrix = create_imgs_matrix(directory, compression)
29+
30+
# search for similar images
31+
if similarity == "low":
32+
ref = 1000
33+
# search for 1:1 duplicate images
34+
else:
35+
ref = 200
36+
37+
main_img = 0
38+
compared_img = 1
39+
nrows, ncols = compression, compression
40+
srow_A = 0
41+
erow_A = nrows
42+
srow_B = erow_A
43+
erow_B = srow_B + nrows
44+
45+
while erow_B <= imgs_matrix.shape[0]:
46+
while compared_img < (len(image_files)):
47+
# select two images from imgs_matrix
48+
imgA = imgs_matrix[srow_A : erow_A, # rows
49+
0 : ncols] # columns
50+
imgB = imgs_matrix[srow_B : erow_B, # rows
51+
0 : ncols] # columns
52+
# compare the images
53+
rotations = 0
54+
while image_files[main_img] not in duplicates and rotations <= 3:
55+
if rotations != 0:
56+
imgB = rotate_img(imgB)
57+
err = mse(imgA, imgB)
58+
if err < ref:
59+
if show_imgs == True:
60+
show_img_figs(imgA, imgB, err)
61+
show_file_info(compared_img, main_img)
62+
add_to_list(image_files[main_img], duplicates)
63+
check_img_quality(directory, image_files[main_img], image_files[compared_img], lower_res)
64+
rotations += 1
65+
srow_B += nrows
66+
erow_B += nrows
67+
compared_img += 1
68+
69+
srow_A += nrows
70+
erow_A += nrows
71+
srow_B = erow_A
72+
erow_B = srow_B + nrows
73+
main_img += 1
74+
compared_img = main_img + 1
75+
76+
msg = "\n***\n DONE: found " + str(len(duplicates)) + " duplicate image pairs in " + str(len(image_files)) + " total images.\n The following files have lower resolution:"
77+
print(msg)
78+
return set(lower_res)
79+
80+
# Function that searches the folder for image files, converts them to a matrix
81+
def create_imgs_matrix(directory, compression):
82+
global image_files
83+
image_files = []
84+
# create list of all files in directory
85+
folder_files = [filename for filename in os.listdir(directory)]
86+
87+
# create images matrix
88+
counter = 0
89+
for filename in folder_files:
90+
if not os.path.isdir(directory + filename) and imghdr.what(directory + filename):
91+
img = cv2.imdecode(np.fromfile(directory + filename, dtype=np.uint8), cv2.IMREAD_UNCHANGED)
92+
if type(img) == np.ndarray:
93+
img = img[...,0:3]
94+
img = cv2.resize(img, dsize=(compression, compression), interpolation=cv2.INTER_CUBIC)
95+
if counter == 0:
96+
imgs_matrix = img
97+
image_files.append(filename)
98+
counter += 1
99+
else:
100+
imgs_matrix = np.concatenate((imgs_matrix, img))
101+
image_files.append(filename)
102+
return imgs_matrix
103+
104+
# Function that calulates the mean squared error (mse) between two image matrices
105+
def mse(imageA, imageB):
106+
err = np.sum((imageA.astype("float") - imageB.astype("float")) ** 2)
107+
err /= float(imageA.shape[0] * imageA.shape[1])
108+
return err
109+
110+
# Function that plots two compared image files and their mse
111+
def show_img_figs(imageA, imageB, err):
112+
fig = plt.figure()
113+
plt.suptitle("MSE: %.2f" % (err))
114+
# plot first image
115+
ax = fig.add_subplot(1, 2, 1)
116+
plt.imshow(imageA, cmap = plt.cm.gray)
117+
plt.axis("off")
118+
# plot second image
119+
ax = fig.add_subplot(1, 2, 2)
120+
plt.imshow(imageB, cmap = plt.cm.gray)
121+
plt.axis("off")
122+
# show the images
123+
plt.show()
124+
125+
#Function for rotating an image matrix by a 90 degree angle
126+
def rotate_img(image):
127+
image = np.rot90(image, k=1, axes=(0, 1))
128+
return image
129+
130+
# Function for printing filename info of plotted image files
131+
def show_file_info(compared_img, main_img):
132+
print("Duplicate file: " + image_files[main_img] + " and " + image_files[compared_img])
133+
134+
# Function for appending items to a list
135+
def add_to_list(filename, list):
136+
list.append(filename)
137+
138+
# Function for checking the quality of compared images, appends the lower quality image to the list
139+
def check_img_quality(directory, imageA, imageB, list):
140+
size_imgA = os.stat(directory + imageA).st_size
141+
size_imgB = os.stat(directory + imageB).st_size
142+
if size_imgA > size_imgB:
143+
add_to_list(imageB, list)
144+
else:
145+
add_to_list(imageA, list)

0 commit comments

Comments
 (0)