@@ -741,10 +741,10 @@ impl SimilarImages {
741
741
// // Jeśli tak to zmniejsz ilość dzieci starego rodzica, dodaj ilość dzieci w nowym rodzicu i podmień rekord hashes_similarity
742
742
// // Jeśli nie to dodaj nowy rekord w hashes_similarity jak i hashes_parents z liczbą dzieci równą 1
743
743
744
- for ( index, hash_to_check) in hashes_to_check. into_iter ( ) . enumerate ( ) {
744
+ for ( index, hash_to_check) in hashes_to_check. iter ( ) . enumerate ( ) {
745
745
// Don't check for user stop too often
746
746
// Also don't add too ofter data to variables
747
- const CYCLES_COUNTER : usize = 100 ;
747
+ const CYCLES_COUNTER : usize = 50 ;
748
748
if index % CYCLES_COUNTER == 0 && index != 0 {
749
749
atomic_mode_counter. fetch_add ( CYCLES_COUNTER , Ordering :: Relaxed ) ;
750
750
if stop_receiver. is_some ( ) && stop_receiver. unwrap ( ) . try_recv ( ) . is_ok ( ) {
@@ -753,11 +753,6 @@ impl SimilarImages {
753
753
}
754
754
}
755
755
756
- // Hash is already used as child
757
- if hashes_similarity. contains_key ( hash_to_check) {
758
- continue ;
759
- }
760
-
761
756
let mut found_items = self
762
757
. bktree
763
758
. find ( hash_to_check, tolerance)
@@ -767,6 +762,7 @@ impl SimilarImages {
767
762
found_items. sort_unstable_by_key ( |f| f. 0 ) ;
768
763
769
764
for ( similarity, other_hash) in found_items {
765
+ // SSSTART
770
766
// Cannot use hash if already is used as master record(have more than 0 children)
771
767
if let Some ( children_number) = hashes_parents. get ( other_hash) {
772
768
if * children_number > 0 {
@@ -776,15 +772,32 @@ impl SimilarImages {
776
772
777
773
// If there is already record, with smaller sensitivity, then replace it
778
774
let mut need_to_add = false ;
779
- if let Some ( ( parent_hash, other_similarity) ) = hashes_similarity. get ( other_hash) {
780
- if similarity < * other_similarity {
781
- need_to_add = true ;
782
- * hashes_parents. get_mut ( parent_hash) . unwrap ( ) -= 1 ;
775
+ let mut need_to_check = false ;
776
+
777
+ // TODO replace variables from above with closures
778
+ // If current checked hash, have parent, first we must check if similarity between them is lower than checked item
779
+ if let Some ( ( current_parent_hash, current_similarity_with_parent) ) = hashes_similarity. get ( hash_to_check) {
780
+ if * current_similarity_with_parent > similarity {
781
+ need_to_check = true ;
782
+
783
+ * hashes_parents. get_mut ( current_parent_hash) . unwrap ( ) -= 1 ;
784
+ hashes_similarity. remove ( hash_to_check) . unwrap ( ) ;
783
785
}
786
+ } else {
787
+ need_to_check = true ;
784
788
}
785
- // But when there is no record, just add it
786
- else {
787
- need_to_add = true
789
+
790
+ if need_to_check {
791
+ if let Some ( ( other_parent_hash, other_similarity) ) = hashes_similarity. get ( other_hash) {
792
+ if * other_similarity > similarity {
793
+ need_to_add = true ;
794
+ * hashes_parents. get_mut ( other_parent_hash) . unwrap ( ) -= 1 ;
795
+ }
796
+ }
797
+ // But when there is no record, just add it
798
+ else {
799
+ need_to_add = true
800
+ }
788
801
}
789
802
790
803
if need_to_add {
@@ -796,6 +809,7 @@ impl SimilarImages {
796
809
hashes_parents. insert ( hash_to_check, 1 ) ;
797
810
}
798
811
}
812
+ // ENND
799
813
}
800
814
}
801
815
@@ -816,68 +830,82 @@ impl SimilarImages {
816
830
}
817
831
818
832
{
819
- let mut new_hashes_parents : HashMap < & Vec < u8 > , u32 > = Default :: default ( ) ;
820
- let mut new_hashes_similarity : HashMap < & Vec < u8 > , ( & Vec < u8 > , u32 ) > = Default :: default ( ) ;
833
+ let mut hashes_parents : HashMap < & Vec < u8 > , u32 > = Default :: default ( ) ;
834
+ let mut hashes_similarity : HashMap < & Vec < u8 > , ( & Vec < u8 > , u32 ) > = Default :: default ( ) ;
821
835
let mut iter = parts. into_iter ( ) ;
822
836
// At start fill arrays with first item
823
837
// Normal algorithm would do exactly same thing, but slower, one record after one
824
- if let Some ( ( hashes_parents , hashes_similarity ) ) = iter. next ( ) {
825
- new_hashes_parents = hashes_parents ;
826
- new_hashes_similarity = hashes_similarity ;
838
+ if let Some ( ( first_hashes_parents , first_hashes_similarity ) ) = iter. next ( ) {
839
+ hashes_parents = first_hashes_parents ;
840
+ hashes_similarity = first_hashes_similarity ;
827
841
}
828
842
829
- for ( _hashes_with_parents, hashes_with_similarity) in iter {
830
- for ( hash_to_check, ( other_hash, similarity) ) in hashes_with_similarity {
831
- // Hash is already used as child
832
- if new_hashes_similarity. contains_key ( hash_to_check) {
833
- continue ;
834
- }
835
-
843
+ for ( _partial_hashes_with_parents, partial_hashes_with_similarity) in iter {
844
+ for ( hash_to_check, ( other_hash, similarity) ) in partial_hashes_with_similarity {
845
+ // SSSTART
836
846
// Cannot use hash if already is used as master record(have more than 0 children)
837
- if let Some ( children_number) = new_hashes_parents . get ( other_hash) {
847
+ if let Some ( children_number) = hashes_parents . get ( other_hash) {
838
848
if * children_number > 0 {
839
849
continue ;
840
850
}
841
851
}
842
852
843
853
// If there is already record, with smaller sensitivity, then replace it
844
854
let mut need_to_add = false ;
845
- if let Some ( ( parent_hash, other_similarity) ) = new_hashes_similarity. get ( other_hash) {
846
- if similarity < * other_similarity {
847
- need_to_add = true ;
848
- * new_hashes_parents. get_mut ( parent_hash) . unwrap ( ) -= 1 ;
855
+ let mut need_to_check = false ;
856
+
857
+ // TODO replace variables from above with closures
858
+ // If current checked hash, have parent, first we must check if similarity between them is lower than checked item
859
+ if let Some ( ( current_parent_hash, current_similarity_with_parent) ) = hashes_similarity. get ( hash_to_check) {
860
+ if * current_similarity_with_parent > similarity {
861
+ need_to_check = true ;
862
+
863
+ * hashes_parents. get_mut ( current_parent_hash) . unwrap ( ) -= 1 ;
864
+ hashes_similarity. remove ( hash_to_check) . unwrap ( ) ;
849
865
}
866
+ } else {
867
+ need_to_check = true ;
850
868
}
851
- // But when there is no record, just add it
852
- else {
853
- need_to_add = true
869
+
870
+ if need_to_check {
871
+ if let Some ( ( other_parent_hash, other_similarity) ) = hashes_similarity. get ( other_hash) {
872
+ if * other_similarity > similarity {
873
+ need_to_add = true ;
874
+ * hashes_parents. get_mut ( other_parent_hash) . unwrap ( ) -= 1 ;
875
+ }
876
+ }
877
+ // But when there is no record, just add it
878
+ else {
879
+ need_to_add = true
880
+ }
854
881
}
855
882
856
883
if need_to_add {
857
- new_hashes_similarity . insert ( other_hash, ( hash_to_check, similarity) ) ;
884
+ hashes_similarity . insert ( other_hash, ( hash_to_check, similarity) ) ;
858
885
859
- if let Some ( number_of_children) = new_hashes_parents . get_mut ( hash_to_check) {
886
+ if let Some ( number_of_children) = hashes_parents . get_mut ( hash_to_check) {
860
887
* number_of_children += 1 ;
861
888
} else {
862
- new_hashes_parents . insert ( hash_to_check, 1 ) ;
889
+ hashes_parents . insert ( hash_to_check, 1 ) ;
863
890
}
864
891
}
892
+ // ENND
865
893
}
866
894
}
867
895
868
896
#[ cfg( debug_assertions) ]
869
- debug_check_for_duplicated_things ( new_hashes_parents . clone ( ) , new_hashes_similarity . clone ( ) , all_hashed_images. clone ( ) , "LATTER" ) ;
897
+ debug_check_for_duplicated_things ( hashes_parents . clone ( ) , hashes_similarity . clone ( ) , all_hashed_images. clone ( ) , "LATTER" ) ;
870
898
871
899
// Collecting results
872
900
873
- for ( parent_hash, child_number) in new_hashes_parents {
901
+ for ( parent_hash, child_number) in hashes_parents {
874
902
if child_number > 0 {
875
903
let vec_fe = all_hashed_images. get ( parent_hash) . unwrap ( ) . clone ( ) ;
876
904
collected_similar_images. insert ( parent_hash. clone ( ) , vec_fe) ;
877
905
}
878
906
}
879
907
880
- for ( child_hash, ( parent_hash, similarity) ) in new_hashes_similarity {
908
+ for ( child_hash, ( parent_hash, similarity) ) in hashes_similarity {
881
909
let mut vec_fe = all_hashed_images. get ( child_hash) . unwrap ( ) . clone ( ) ;
882
910
for mut fe in & mut vec_fe {
883
911
fe. similarity = similarity;
@@ -1318,6 +1346,7 @@ pub fn test_image_conversion_speed() {
1318
1346
}
1319
1347
}
1320
1348
1349
+ #[ allow( dead_code) ]
1321
1350
fn debug_check_for_duplicated_things (
1322
1351
hashes_parents : HashMap < & Vec < u8 > , u32 > ,
1323
1352
hashes_similarity : HashMap < & Vec < u8 > , ( & Vec < u8 > , u32 ) > ,
0 commit comments