1
+ use std:: collections:: HashMap ;
1
2
use std:: collections:: { BTreeMap , HashSet } ;
2
3
use std:: fmt:: Debug ;
3
4
use std:: fs:: File ;
@@ -424,11 +425,14 @@ impl DuplicateFinder {
424
425
debug ! ( "prehash_load_cache_at_start - started diff between loaded and prechecked files" ) ;
425
426
for ( size, mut vec_file_entry) in mem:: take ( & mut self . files_with_identical_size ) {
426
427
if let Some ( cached_vec_file_entry) = loaded_hash_map. get ( & size) {
427
- // TODO maybe hashset is not needed when using < 4 elements
428
- let cached_path_entries = cached_vec_file_entry. iter ( ) . map ( |e| & e. path ) . collect :: < HashSet < _ > > ( ) ;
428
+ // TODO maybe hashmap is not needed when using < 4 elements
429
+ let mut cached_path_entries: HashMap < & Path , FileEntry > = HashMap :: new ( ) ;
430
+ for file_entry in cached_vec_file_entry {
431
+ cached_path_entries. insert ( & file_entry. path , file_entry. clone ( ) ) ;
432
+ }
429
433
for file_entry in vec_file_entry {
430
- if cached_path_entries. contains ( & file_entry. path ) {
431
- records_already_cached. entry ( size) . or_default ( ) . push ( file_entry ) ;
434
+ if let Some ( cached_file_entry ) = cached_path_entries. remove ( file_entry. path . as_path ( ) ) {
435
+ records_already_cached. entry ( size) . or_default ( ) . push ( cached_file_entry ) ;
432
436
} else {
433
437
non_cached_files_to_check. entry ( size) . or_default ( ) . push ( file_entry) ;
434
438
}
@@ -508,7 +512,7 @@ impl DuplicateFinder {
508
512
debug ! ( "Starting calculating prehash" ) ;
509
513
#[ allow( clippy:: type_complexity) ]
510
514
let pre_hash_results: Vec < ( u64 , BTreeMap < String , Vec < FileEntry > > , Vec < String > ) > = non_cached_files_to_check
511
- . par_iter ( )
515
+ . into_par_iter ( )
512
516
. map ( |( size, vec_file_entry) | {
513
517
let mut hashmap_with_hash: BTreeMap < String , Vec < FileEntry > > = Default :: default ( ) ;
514
518
let mut errors: Vec < String > = Vec :: new ( ) ;
@@ -519,15 +523,16 @@ impl DuplicateFinder {
519
523
check_was_stopped. store ( true , Ordering :: Relaxed ) ;
520
524
return None ;
521
525
}
522
- for file_entry in vec_file_entry {
523
- match hash_calculation ( & mut buffer, file_entry, & check_type, 0 ) {
526
+ for mut file_entry in vec_file_entry {
527
+ match hash_calculation ( & mut buffer, & file_entry, & check_type, 0 ) {
524
528
Ok ( hash_string) => {
525
- hashmap_with_hash. entry ( hash_string. clone ( ) ) . or_default ( ) . push ( file_entry. clone ( ) ) ;
529
+ file_entry. hash = hash_string. clone ( ) ;
530
+ hashmap_with_hash. entry ( hash_string. clone ( ) ) . or_default ( ) . push ( file_entry) ;
526
531
}
527
532
Err ( s) => errors. push ( s) ,
528
533
}
529
534
}
530
- Some ( ( * size, hashmap_with_hash, errors) )
535
+ Some ( ( size, hashmap_with_hash, errors) )
531
536
} )
532
537
. while_some ( )
533
538
. collect ( ) ;
@@ -581,11 +586,14 @@ impl DuplicateFinder {
581
586
debug ! ( "full_hashing_load_cache_at_start - started diff between loaded and prechecked files" ) ;
582
587
for ( size, mut vec_file_entry) in pre_checked_map {
583
588
if let Some ( cached_vec_file_entry) = loaded_hash_map. get ( & size) {
584
- // TODO maybe hashset is not needed when using < 4 elements
585
- let cached_path_entries = cached_vec_file_entry. iter ( ) . map ( |e| & e. path ) . collect :: < HashSet < _ > > ( ) ;
589
+ // TODO maybe hashmap is not needed when using < 4 elements
590
+ let mut cached_path_entries: HashMap < & Path , FileEntry > = HashMap :: new ( ) ;
591
+ for file_entry in cached_vec_file_entry {
592
+ cached_path_entries. insert ( & file_entry. path , file_entry. clone ( ) ) ;
593
+ }
586
594
for file_entry in vec_file_entry {
587
- if cached_path_entries. contains ( & file_entry. path ) {
588
- records_already_cached. entry ( size) . or_default ( ) . push ( file_entry ) ;
595
+ if let Some ( cached_file_entry ) = cached_path_entries. remove ( file_entry. path . as_path ( ) ) {
596
+ records_already_cached. entry ( size) . or_default ( ) . push ( cached_file_entry ) ;
589
597
} else {
590
598
non_cached_files_to_check. entry ( size) . or_default ( ) . push ( file_entry) ;
591
599
}
0 commit comments