Skip to content

Commit 55b2744

Browse files
authored
Simplify, fix and improve similar images algorithm (#983)
* Random changer * CD * A1 * Almost? * Nein * Heif * Tests that needs to be fixed * Fixed test * Tests * Fixed image counting in reference folders * Lock * Catch possible more bugs in pdf * Find ever more bugs
1 parent 04a91ae commit 55b2744

12 files changed

+736
-657
lines changed

Cargo.lock

+254-376
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Changelog.md

+12
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
## Version 5.2.0 - ?
2+
- Add finding similar audio files by content - [#970](https://github.com/qarmin/czkawka/pull/970)
3+
- Allow to find duplicates by name/size at once - [#956](https://github.com/qarmin/czkawka/pull/956)
4+
- Fixed bug when cache for music tags not worked - [#970](https://github.com/qarmin/czkawka/pull/970)
5+
- Allow to set number of threads from CLI - [#972](https://github.com/qarmin/czkawka/pull/972)
6+
- Fix problem with invalid item sorting in bad extensions mode - [#972](https://github.com/qarmin/czkawka/pull/972)
7+
- Big refactor/cleaning of code - [#956](https://github.com/qarmin/czkawka/pull/956)/[#970](https://github.com/qarmin/czkawka/pull/970)/[#972](https://github.com/qarmin/czkawka/pull/972)
8+
- Use builtin gtk webp loader for previews - [#923](https://github.com/qarmin/czkawka/pull/923)
9+
- Fixed docker build - [#947](https://github.com/qarmin/czkawka/pull/947)
10+
- Restore snap builds broken since GTk 4 port - [#965](https://github.com/qarmin/czkawka/pull/947)
11+
- Instruction how to build native ARM64 binaries on Mac - [#945](https://github.com/qarmin/czkawka/pull/945)/[#971](https://github.com/qarmin/czkawka/pull/971)
12+
113
## Version 5.1.0 - 19.02.2023r
214
- Added sort button - [#894](https://github.com/qarmin/czkawka/pull/894)
315
- Allow to set number of thread used to scan - [#839](https://github.com/qarmin/czkawka/pull/839)

czkawka_cli/Cargo.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ homepage = "https://github.com/qarmin/czkawka"
1010
repository = "https://github.com/qarmin/czkawka"
1111

1212
[dependencies]
13-
clap = { version = "4.2", features = ["derive"] }
13+
clap = { version = "4.3", features = ["derive"] }
1414

1515
# For enum types
16-
image_hasher = "1.1"
16+
image_hasher = "1.2"
1717

1818
[dependencies.czkawka_core]
1919
path = "../czkawka_core"

czkawka_core/Cargo.toml

+11-10
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,14 @@ crossbeam-channel = "0.5"
1919
directories-next = "2.0"
2020

2121
# Needed by similar images
22-
image_hasher = "1.1"
22+
image_hasher = "1.2"
2323
bk-tree = "0.5"
2424
image = "0.24"
2525
hamming = "0.1"
2626

2727
# Needed by same music
28-
bitflags = "2.2"
29-
lofty = "0.12"
28+
bitflags = "2.3"
29+
lofty = "0.14"
3030

3131
# Futures - needed by async progress sender
3232
futures = "0.3.28"
@@ -41,11 +41,11 @@ rusty-chromaprint = "0.1"
4141
symphonia = { version = "0.5", features = ["all"] }
4242

4343
# Hashes for duplicate files
44-
blake3 = "1.3"
44+
blake3 = "1.4"
4545
crc32fast = "1.3"
4646
xxhash-rust = { version = "0.8", features = ["xxh3"] }
4747

48-
tempfile = "3.5"
48+
tempfile = "3.6"
4949

5050
# Video Duplicates
5151
vid_dup_finder_lib = "0.1"
@@ -59,8 +59,8 @@ serde_json = "1.0"
5959
# Language
6060
i18n-embed = { version = "0.13", features = ["fluent-system", "desktop-requester"] }
6161
i18n-embed-fl = "0.6"
62-
rust-embed = "6.6"
63-
once_cell = "1.17"
62+
rust-embed = "6.7"
63+
once_cell = "1.18"
6464

6565
# Raw image files
6666
rawloader = "0.37"
@@ -73,11 +73,12 @@ infer = "0.13"
7373
num_cpus = "1.15"
7474

7575
# Heif/Heic
76-
libheif-rs = { version = "0.18.0", optional = true } # Do not upgrade now, since Ubuntu 22.04 not works with newer version
76+
libheif-rs = { version = "=0.18.0", optional = true } # Do not upgrade now, since Ubuntu 22.04 not works with newer version
77+
libheif-sys = { version = "=1.14.2", optional = true } # 1.14.3 brake compilation on Ubuntu 22.04
7778
anyhow = { version = "1.0" }
7879

79-
state = "0.5"
80+
state = "0.6"
8081

8182
[features]
8283
default = []
83-
heif = ["dep:libheif-rs"]
84+
heif = ["dep:libheif-rs", "dep:libheif-sys"]

czkawka_core/src/broken_files.rs

+32-15
Original file line numberDiff line numberDiff line change
@@ -372,23 +372,27 @@ impl BrokenFiles {
372372

373373
let mut file_entry_clone = file_entry.clone();
374374
let result = panic::catch_unwind(|| {
375-
if let Err(e) = FileOptions::cached().parse_options(parser_options).open(&file_entry.path) {
376-
if let PdfError::Io { .. } = e {
377-
return None;
378-
}
379-
380-
let mut error_string = e.to_string();
381-
// Workaround for strange error message https://github.com/qarmin/czkawka/issues/898
382-
if error_string.starts_with("Try at") {
383-
if let Some(start_index) = error_string.find("/pdf-") {
384-
error_string = format!("Decoding error in pdf-rs library - {}", &error_string[start_index..]);
375+
match FileOptions::cached().parse_options(parser_options).open(&file_entry.path) {
376+
Ok(file) => {
377+
for idx in 0..file.num_pages() {
378+
if let Err(e) = file.get_page(idx) {
379+
let err = validate_pdf_error(&mut file_entry, e);
380+
if let PdfError::InvalidPassword = err {
381+
return None;
382+
} else {
383+
break;
384+
}
385+
}
385386
}
386387
}
387-
388-
file_entry.error_string = error_string;
389-
let error = unpack_pdf_error(e);
390-
if let PdfError::InvalidPassword = error {
391-
return None;
388+
Err(e) => {
389+
if let PdfError::Io { .. } = e {
390+
return None;
391+
}
392+
let err = validate_pdf_error(&mut file_entry, e);
393+
if let PdfError::InvalidPassword = err {
394+
return None;
395+
}
392396
}
393397
}
394398
Some(file_entry)
@@ -708,3 +712,16 @@ fn unpack_pdf_error(e: PdfError) -> PdfError {
708712
e
709713
}
710714
}
715+
716+
fn validate_pdf_error(file_entry: &mut FileEntry, e: PdfError) -> PdfError {
717+
let mut error_string = e.to_string();
718+
// Workaround for strange error message https://github.com/qarmin/czkawka/issues/898
719+
if error_string.starts_with("Try at") {
720+
if let Some(start_index) = error_string.find("/pdf-") {
721+
error_string = format!("Decoding error in pdf-rs library - {}", &error_string[start_index..]);
722+
}
723+
}
724+
725+
file_entry.error_string = error_string;
726+
unpack_pdf_error(e)
727+
}

czkawka_core/src/common.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use crate::common_directory::Directories;
2424
use crate::common_items::ExcludedItems;
2525
use crate::common_traits::ResultEntry;
2626

27-
static NUMBER_OF_THREADS: state::Storage<usize> = state::Storage::new();
27+
static NUMBER_OF_THREADS: state::InitCell<usize> = state::InitCell::new();
2828

2929
pub fn get_number_of_threads() -> usize {
3030
let data = NUMBER_OF_THREADS.get();

czkawka_core/src/common_dir_traversal.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -473,7 +473,7 @@ where
473473
}
474474
}
475475
if counter > 0 {
476-
// Do not increase counter one by one in threads, because usually it
476+
// Increase counter in batch, because usually it may be slow to add multiple times atomic value
477477
atomic_counter.fetch_add(counter, Ordering::Relaxed);
478478
}
479479
(dir_result, warnings, fe_result, set_as_not_empty_folder_list, folder_entries_list)

czkawka_core/src/common_directory.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ pub struct Directories {
1313
pub excluded_directories: Vec<PathBuf>,
1414
pub included_directories: Vec<PathBuf>,
1515
pub reference_directories: Vec<PathBuf>,
16-
exclude_other_filesystems: Option<bool>,
16+
pub exclude_other_filesystems: Option<bool>,
1717
#[cfg(target_family = "unix")]
18-
included_dev_ids: Vec<u64>,
18+
pub included_dev_ids: Vec<u64>,
1919
}
2020

2121
impl Directories {

0 commit comments

Comments
 (0)