Skip to content

Commit 5580892

Browse files
committed
Ending words
1 parent 905c257 commit 5580892

File tree

2 files changed

+73
-44
lines changed

2 files changed

+73
-44
lines changed

Cargo.lock

+4-4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

czkawka_core/src/similar_images.rs

+69-40
Original file line numberDiff line numberDiff line change
@@ -741,10 +741,10 @@ impl SimilarImages {
741741
// // Jeśli tak to zmniejsz ilość dzieci starego rodzica, dodaj ilość dzieci w nowym rodzicu i podmień rekord hashes_similarity
742742
// // Jeśli nie to dodaj nowy rekord w hashes_similarity jak i hashes_parents z liczbą dzieci równą 1
743743

744-
for (index, hash_to_check) in hashes_to_check.into_iter().enumerate() {
744+
for (index, hash_to_check) in hashes_to_check.iter().enumerate() {
745745
// Don't check for user stop too often
746746
// Also don't add too ofter data to variables
747-
const CYCLES_COUNTER: usize = 100;
747+
const CYCLES_COUNTER: usize = 50;
748748
if index % CYCLES_COUNTER == 0 && index != 0 {
749749
atomic_mode_counter.fetch_add(CYCLES_COUNTER, Ordering::Relaxed);
750750
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
@@ -753,11 +753,6 @@ impl SimilarImages {
753753
}
754754
}
755755

756-
// Hash is already used as child
757-
if hashes_similarity.contains_key(hash_to_check) {
758-
continue;
759-
}
760-
761756
let mut found_items = self
762757
.bktree
763758
.find(hash_to_check, tolerance)
@@ -767,6 +762,7 @@ impl SimilarImages {
767762
found_items.sort_unstable_by_key(|f| f.0);
768763

769764
for (similarity, other_hash) in found_items {
765+
// SSSTART
770766
// Cannot use hash if already is used as master record(have more than 0 children)
771767
if let Some(children_number) = hashes_parents.get(other_hash) {
772768
if *children_number > 0 {
@@ -776,15 +772,32 @@ impl SimilarImages {
776772

777773
// If there is already record, with smaller sensitivity, then replace it
778774
let mut need_to_add = false;
779-
if let Some((parent_hash, other_similarity)) = hashes_similarity.get(other_hash) {
780-
if similarity < *other_similarity {
781-
need_to_add = true;
782-
*hashes_parents.get_mut(parent_hash).unwrap() -= 1;
775+
let mut need_to_check = false;
776+
777+
// TODO replace variables from above with closures
778+
// If current checked hash, have parent, first we must check if similarity between them is lower than checked item
779+
if let Some((current_parent_hash, current_similarity_with_parent)) = hashes_similarity.get(hash_to_check) {
780+
if *current_similarity_with_parent > similarity {
781+
need_to_check = true;
782+
783+
*hashes_parents.get_mut(current_parent_hash).unwrap() -= 1;
784+
hashes_similarity.remove(hash_to_check).unwrap();
783785
}
786+
} else {
787+
need_to_check = true;
784788
}
785-
// But when there is no record, just add it
786-
else {
787-
need_to_add = true
789+
790+
if need_to_check {
791+
if let Some((other_parent_hash, other_similarity)) = hashes_similarity.get(other_hash) {
792+
if *other_similarity > similarity {
793+
need_to_add = true;
794+
*hashes_parents.get_mut(other_parent_hash).unwrap() -= 1;
795+
}
796+
}
797+
// But when there is no record, just add it
798+
else {
799+
need_to_add = true
800+
}
788801
}
789802

790803
if need_to_add {
@@ -796,6 +809,7 @@ impl SimilarImages {
796809
hashes_parents.insert(hash_to_check, 1);
797810
}
798811
}
812+
// ENND
799813
}
800814
}
801815

@@ -816,68 +830,82 @@ impl SimilarImages {
816830
}
817831

818832
{
819-
let mut new_hashes_parents: HashMap<&Vec<u8>, u32> = Default::default();
820-
let mut new_hashes_similarity: HashMap<&Vec<u8>, (&Vec<u8>, u32)> = Default::default();
833+
let mut hashes_parents: HashMap<&Vec<u8>, u32> = Default::default();
834+
let mut hashes_similarity: HashMap<&Vec<u8>, (&Vec<u8>, u32)> = Default::default();
821835
let mut iter = parts.into_iter();
822836
// At start fill arrays with first item
823837
// Normal algorithm would do exactly same thing, but slower, one record after one
824-
if let Some((hashes_parents, hashes_similarity)) = iter.next() {
825-
new_hashes_parents = hashes_parents;
826-
new_hashes_similarity = hashes_similarity;
838+
if let Some((first_hashes_parents, first_hashes_similarity)) = iter.next() {
839+
hashes_parents = first_hashes_parents;
840+
hashes_similarity = first_hashes_similarity;
827841
}
828842

829-
for (_hashes_with_parents, hashes_with_similarity) in iter {
830-
for (hash_to_check, (other_hash, similarity)) in hashes_with_similarity {
831-
// Hash is already used as child
832-
if new_hashes_similarity.contains_key(hash_to_check) {
833-
continue;
834-
}
835-
843+
for (_partial_hashes_with_parents, partial_hashes_with_similarity) in iter {
844+
for (hash_to_check, (other_hash, similarity)) in partial_hashes_with_similarity {
845+
// SSSTART
836846
// Cannot use hash if already is used as master record(have more than 0 children)
837-
if let Some(children_number) = new_hashes_parents.get(other_hash) {
847+
if let Some(children_number) = hashes_parents.get(other_hash) {
838848
if *children_number > 0 {
839849
continue;
840850
}
841851
}
842852

843853
// If there is already record, with smaller sensitivity, then replace it
844854
let mut need_to_add = false;
845-
if let Some((parent_hash, other_similarity)) = new_hashes_similarity.get(other_hash) {
846-
if similarity < *other_similarity {
847-
need_to_add = true;
848-
*new_hashes_parents.get_mut(parent_hash).unwrap() -= 1;
855+
let mut need_to_check = false;
856+
857+
// TODO replace variables from above with closures
858+
// If current checked hash, have parent, first we must check if similarity between them is lower than checked item
859+
if let Some((current_parent_hash, current_similarity_with_parent)) = hashes_similarity.get(hash_to_check) {
860+
if *current_similarity_with_parent > similarity {
861+
need_to_check = true;
862+
863+
*hashes_parents.get_mut(current_parent_hash).unwrap() -= 1;
864+
hashes_similarity.remove(hash_to_check).unwrap();
849865
}
866+
} else {
867+
need_to_check = true;
850868
}
851-
// But when there is no record, just add it
852-
else {
853-
need_to_add = true
869+
870+
if need_to_check {
871+
if let Some((other_parent_hash, other_similarity)) = hashes_similarity.get(other_hash) {
872+
if *other_similarity > similarity {
873+
need_to_add = true;
874+
*hashes_parents.get_mut(other_parent_hash).unwrap() -= 1;
875+
}
876+
}
877+
// But when there is no record, just add it
878+
else {
879+
need_to_add = true
880+
}
854881
}
855882

856883
if need_to_add {
857-
new_hashes_similarity.insert(other_hash, (hash_to_check, similarity));
884+
hashes_similarity.insert(other_hash, (hash_to_check, similarity));
858885

859-
if let Some(number_of_children) = new_hashes_parents.get_mut(hash_to_check) {
886+
if let Some(number_of_children) = hashes_parents.get_mut(hash_to_check) {
860887
*number_of_children += 1;
861888
} else {
862-
new_hashes_parents.insert(hash_to_check, 1);
889+
hashes_parents.insert(hash_to_check, 1);
863890
}
864891
}
892+
// ENND
865893
}
866894
}
867895

868896
#[cfg(debug_assertions)]
869-
debug_check_for_duplicated_things(new_hashes_parents.clone(), new_hashes_similarity.clone(), all_hashed_images.clone(), "LATTER");
897+
debug_check_for_duplicated_things(hashes_parents.clone(), hashes_similarity.clone(), all_hashed_images.clone(), "LATTER");
870898

871899
// Collecting results
872900

873-
for (parent_hash, child_number) in new_hashes_parents {
901+
for (parent_hash, child_number) in hashes_parents {
874902
if child_number > 0 {
875903
let vec_fe = all_hashed_images.get(parent_hash).unwrap().clone();
876904
collected_similar_images.insert(parent_hash.clone(), vec_fe);
877905
}
878906
}
879907

880-
for (child_hash, (parent_hash, similarity)) in new_hashes_similarity {
908+
for (child_hash, (parent_hash, similarity)) in hashes_similarity {
881909
let mut vec_fe = all_hashed_images.get(child_hash).unwrap().clone();
882910
for mut fe in &mut vec_fe {
883911
fe.similarity = similarity;
@@ -1318,6 +1346,7 @@ pub fn test_image_conversion_speed() {
13181346
}
13191347
}
13201348

1349+
#[allow(dead_code)]
13211350
fn debug_check_for_duplicated_things(
13221351
hashes_parents: HashMap<&Vec<u8>, u32>,
13231352
hashes_similarity: HashMap<&Vec<u8>, (&Vec<u8>, u32)>,

0 commit comments

Comments
 (0)