Skip to content

Commit 51198c2

Browse files
authored
Optimize excluded items (#1152)
1 parent 306648a commit 51198c2

21 files changed

+358
-254
lines changed

Changelog.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,14 @@
1212
### Core
1313
- Using normal crossbeam channels instead of asyncio tokio channel - [#1102](https://github.com/qarmin/czkawka/pull/1102)
1414
- Fixed tool type when using progress of empty directories - [#1102](https://github.com/qarmin/czkawka/pull/1102)
15-
- Fixed missing json support in saving size and name - [#1102](https://github.com/qarmin/czkawka/pull/1102)
15+
- Fixed missing json support when saving size and name duplicate results - [#1102](https://github.com/qarmin/czkawka/pull/1102)
1616
- Fix cross-compiled debug windows build - [#1102](https://github.com/qarmin/czkawka/pull/1102)
1717
- Added bigger stack size by default(fixes stack overflow in some musl apps) - [#1102](https://github.com/qarmin/czkawka/pull/1102)
1818
- Added optional libraw dependency(better single-core performance and support more raw files) - [#1102](https://github.com/qarmin/czkawka/pull/1102)
19+
- Speedup checking for wildcards and fix invalid recognizing long excluded items - [#1152](https://github.com/qarmin/czkawka/pull/1152)
20+
- Even 10x speedup when searching for empty folders - [#1152](https://github.com/qarmin/czkawka/pull/1152)
21+
- Collecting files for scan can be a lot of faster due lazy file metadata gathering - [#1152](https://github.com/qarmin/czkawka/pull/1152)
22+
- Fixed recognizing not accessible folders as non-empty - [#1152](https://github.com/qarmin/czkawka/pull/1152)
1923

2024
## Version 6.1.0 - 15.10.2023r
2125
- BREAKING CHANGE - Changed cache saving method, deduplicated, optimized and simplified procedure(all files needs to be hashed again) - [#1072](https://github.com/qarmin/czkawka/pull/1072), [#1086](https://github.com/qarmin/czkawka/pull/1086)

czkawka_core/src/bad_extensions.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,9 @@ impl PrintResults for BadExtensions {
419419
writeln!(
420420
writer,
421421
"Results of searching {:?} with excluded directories {:?} and excluded items {:?}",
422-
self.common_data.directories.included_directories, self.common_data.directories.excluded_directories, self.common_data.excluded_items.items
422+
self.common_data.directories.included_directories,
423+
self.common_data.directories.excluded_directories,
424+
self.common_data.excluded_items.get_excluded_items()
423425
)?;
424426
writeln!(writer, "Found {} files with invalid extension.\n", self.information.number_of_files_with_bad_extension)?;
425427

czkawka_core/src/big_file.rs

+23-15
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use std::collections::BTreeMap;
22
use std::fs;
3-
use std::fs::{DirEntry, Metadata};
3+
use std::fs::DirEntry;
44
use std::io::Write;
55
use std::path::{Path, PathBuf};
66
use std::sync::atomic::{AtomicUsize, Ordering};
@@ -14,7 +14,7 @@ use rayon::prelude::*;
1414
use serde::{Deserialize, Serialize};
1515

1616
use crate::common::{check_folder_children, check_if_stop_received, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, split_path};
17-
use crate::common_dir_traversal::{common_get_entry_data_metadata, common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData, ToolType};
17+
use crate::common_dir_traversal::{common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData, ToolType};
1818
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
1919
use crate::common_traits::{DebugPrint, PrintResults};
2020

@@ -68,7 +68,7 @@ impl BigFile {
6868

6969
#[fun_time(message = "look_for_big_files", level = "debug")]
7070
fn look_for_big_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
71-
let mut folders_to_check: Vec<PathBuf> = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector
71+
let mut folders_to_check: Vec<PathBuf> = Vec::with_capacity(1024 * 2);
7272
let mut old_map: BTreeMap<u64, Vec<FileEntry>> = Default::default();
7373

7474
// Add root folders for finding
@@ -99,22 +99,25 @@ impl BigFile {
9999

100100
// Check every sub folder/file/link etc.
101101
for entry in read_dir {
102-
let Some((entry_data, metadata)) = common_get_entry_data_metadata(&entry, &mut warnings, current_folder) else {
102+
let Ok(entry_data) = entry else {
103+
continue;
104+
};
105+
let Ok(file_type) = entry_data.file_type() else {
103106
continue;
104107
};
105108

106-
if metadata.is_dir() {
109+
if file_type.is_dir() {
107110
check_folder_children(
108111
&mut dir_result,
109112
&mut warnings,
110113
current_folder,
111-
entry_data,
114+
&entry_data,
112115
self.common_data.recursive_search,
113116
&self.common_data.directories,
114117
&self.common_data.excluded_items,
115118
);
116-
} else if metadata.is_file() {
117-
self.collect_file_entry(&atomic_counter, &metadata, entry_data, &mut fe_result, &mut warnings, current_folder);
119+
} else if file_type.is_file() {
120+
self.collect_file_entry(&atomic_counter, &entry_data, &mut fe_result, &mut warnings, current_folder);
118121
}
119122
}
120123
(dir_result, warnings, fe_result)
@@ -146,18 +149,13 @@ impl BigFile {
146149
pub fn collect_file_entry(
147150
&self,
148151
atomic_counter: &Arc<AtomicUsize>,
149-
metadata: &Metadata,
150152
entry_data: &DirEntry,
151153
fe_result: &mut Vec<(u64, FileEntry)>,
152154
warnings: &mut Vec<String>,
153155
current_folder: &Path,
154156
) {
155157
atomic_counter.fetch_add(1, Ordering::Relaxed);
156158

157-
if metadata.len() == 0 {
158-
return;
159-
}
160-
161159
let Some(file_name_lowercase) = get_lowercase_name(entry_data, warnings) else {
162160
return;
163161
};
@@ -171,10 +169,18 @@ impl BigFile {
171169
return;
172170
}
173171

172+
let Ok(metadata) = entry_data.metadata() else {
173+
return;
174+
};
175+
176+
if metadata.len() == 0 {
177+
return;
178+
}
179+
174180
let fe: FileEntry = FileEntry {
175181
path: current_file_name.clone(),
176182
size: metadata.len(),
177-
modified_date: get_modified_time(metadata, warnings, &current_file_name, false),
183+
modified_date: get_modified_time(&metadata, warnings, &current_file_name, false),
178184
};
179185

180186
fe_result.push((fe.size, fe));
@@ -253,7 +259,9 @@ impl PrintResults for BigFile {
253259
writeln!(
254260
writer,
255261
"Results of searching {:?} with excluded directories {:?} and excluded items {:?}",
256-
self.common_data.directories.included_directories, self.common_data.directories.excluded_directories, self.common_data.excluded_items.items
262+
self.common_data.directories.included_directories,
263+
self.common_data.directories.excluded_directories,
264+
self.common_data.excluded_items.get_excluded_items()
257265
)?;
258266

259267
if self.information.number_of_real_files != 0 {

czkawka_core/src/broken_files.rs

+20-18
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use std::collections::BTreeMap;
2-
use std::fs::{DirEntry, File, Metadata};
2+
use std::fs::{DirEntry, File};
33
use std::io::prelude::*;
44

55
use std::path::{Path, PathBuf};
@@ -22,7 +22,7 @@ use crate::common::{
2222
IMAGE_RS_BROKEN_FILES_EXTENSIONS, PDF_FILES_EXTENSIONS, ZIP_FILES_EXTENSIONS,
2323
};
2424
use crate::common_cache::{get_broken_files_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
25-
use crate::common_dir_traversal::{common_get_entry_data_metadata, common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData, ToolType};
25+
use crate::common_dir_traversal::{common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData, ToolType};
2626
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
2727
use crate::common_traits::*;
2828

@@ -108,7 +108,7 @@ impl BrokenFiles {
108108

109109
#[fun_time(message = "check_files", level = "debug")]
110110
fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
111-
let mut folders_to_check: Vec<PathBuf> = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector
111+
let mut folders_to_check: Vec<PathBuf> = Vec::with_capacity(1024 * 2);
112112

113113
// Add root folders for finding
114114
for id in &self.common_data.directories.included_directories {
@@ -138,22 +138,25 @@ impl BrokenFiles {
138138

139139
// Check every sub folder/file/link etc.
140140
for entry in read_dir {
141-
let Some((entry_data, metadata)) = common_get_entry_data_metadata(&entry, &mut warnings, current_folder) else {
141+
let Ok(entry_data) = entry else {
142+
continue;
143+
};
144+
let Ok(file_type) = entry_data.file_type() else {
142145
continue;
143146
};
144147

145-
if metadata.is_dir() {
148+
if file_type.is_dir() {
146149
check_folder_children(
147150
&mut dir_result,
148151
&mut warnings,
149152
current_folder,
150-
entry_data,
153+
&entry_data,
151154
self.common_data.recursive_search,
152155
&self.common_data.directories,
153156
&self.common_data.excluded_items,
154157
);
155-
} else if metadata.is_file() {
156-
if let Some(file_entry) = self.get_file_entry(&metadata, &atomic_counter, entry_data, &mut warnings, current_folder) {
158+
} else if file_type.is_file() {
159+
if let Some(file_entry) = self.get_file_entry(&atomic_counter, &entry_data, &mut warnings, current_folder) {
157160
fe_result.push((file_entry.path.to_string_lossy().to_string(), file_entry));
158161
}
159162
}
@@ -180,14 +183,7 @@ impl BrokenFiles {
180183
true
181184
}
182185

183-
fn get_file_entry(
184-
&self,
185-
metadata: &Metadata,
186-
atomic_counter: &Arc<AtomicUsize>,
187-
entry_data: &DirEntry,
188-
warnings: &mut Vec<String>,
189-
current_folder: &Path,
190-
) -> Option<FileEntry> {
186+
fn get_file_entry(&self, atomic_counter: &Arc<AtomicUsize>, entry_data: &DirEntry, warnings: &mut Vec<String>, current_folder: &Path) -> Option<FileEntry> {
191187
atomic_counter.fetch_add(1, Ordering::Relaxed);
192188

193189
let file_name_lowercase = get_lowercase_name(entry_data, warnings)?;
@@ -207,9 +203,13 @@ impl BrokenFiles {
207203
return None;
208204
}
209205

206+
let Ok(metadata) = entry_data.metadata() else {
207+
return None;
208+
};
209+
210210
let fe: FileEntry = FileEntry {
211211
path: current_file_name.clone(),
212-
modified_date: get_modified_time(metadata, warnings, &current_file_name, false),
212+
modified_date: get_modified_time(&metadata, warnings, &current_file_name, false),
213213
size: metadata.len(),
214214
type_of_file,
215215
error_string: String::new(),
@@ -464,7 +464,9 @@ impl PrintResults for BrokenFiles {
464464
writeln!(
465465
writer,
466466
"Results of searching {:?} with excluded directories {:?} and excluded items {:?}",
467-
self.common_data.directories.included_directories, self.common_data.directories.excluded_directories, self.common_data.excluded_items.items
467+
self.common_data.directories.included_directories,
468+
self.common_data.directories.excluded_directories,
469+
self.common_data.excluded_items.get_excluded_items()
468470
)?;
469471

470472
if !self.broken_files.is_empty() {

0 commit comments

Comments
 (0)