diff --git a/.env.example b/.env.example index 58577aaf293b..4057e64ecb91 100644 --- a/.env.example +++ b/.env.example @@ -120,3 +120,6 @@ OPENDAL_WASABI_SECRET_ACCESS_KEY= OPENDAL_REDB_TEST=false OPENDAL_REDB_DATADIR=/tmp/redb OPENDAL_REDB_TABLE=redb-table +# cacache +OPENDAL_CACACHE_TEST=false +OPENDAL_CACACHE_DATADIR=/tmp/opendal/cacache/ diff --git a/.github/workflows/service_test_cacache.yml b/.github/workflows/service_test_cacache.yml new file mode 100644 index 000000000000..84c7ead1b3f2 --- /dev/null +++ b/.github/workflows/service_test_cacache.yml @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Service Test Cacache + +on: + push: + branches: + - main + pull_request: + branches: + - main + paths: + - "core/src/**" + - "core/tests/**" + - "!core/src/docs/**" + - "!core/src/services/**" + - "core/src/services/cacache/**" + - ".github/workflows/service_test_cacache.yml" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }} + cancel-in-progress: true + +jobs: + cacache: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Setup Rust toolchain + uses: ./.github/actions/setup + - name: Test cacache + shell: bash + working-directory: core + run: cargo test cacache --features services-cacache -j=1 + env: + RUST_BACKTRACE: full + RUST_LOG: debug + OPENDAL_CACACHE_TEST: on + OPENDAL_CACACHE_DATADIR: /tmp/opendal/cacache/ diff --git a/Cargo.lock b/Cargo.lock index 48ebdc80e0c6..3bec85b6f921 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -567,6 +567,33 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "cacache" +version = "11.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11e3f7fcc57143528b55ff07ce71b4608d674d2929c2365f768c6c5bcaaa7a17" +dependencies = [ + "digest", + "either", + "futures", + "hex", + "libc", + "memmap2", + "miette", + "reflink", + "serde", + "serde_derive", + "serde_json", + "sha1", + "sha2", + "ssri", + "tempfile", + "thiserror", + "tokio", + "tokio-stream", + "walkdir", +] + [[package]] name = "camino" version = "1.1.4" @@ -2057,9 +2084,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.140" +version = "0.2.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" [[package]] name = "libloading" @@ -2297,6 +2324,15 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "memmap2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.8.0" @@ -2328,6 +2364,29 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "miette" +version = "5.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a236ff270093b0b67451bc50a509bd1bad302cb1d3c7d37d5efe931238581fa9" +dependencies = [ + "miette-derive", + "once_cell", + "thiserror", + "unicode-width", +] + +[[package]] +name = "miette-derive" +version = "5.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4901771e1d44ddb37964565c654a3223ba41a594d02b8da471cc4464912b5cfa" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.16", +] + [[package]] name = "mime" version = "0.3.17" @@ -2755,6 +2814,7 @@ dependencies = [ "base64 0.21.0", "bb8", "bytes", + "cacache", "chrono", "criterion", "dashmap", @@ -3887,6 +3947,16 @@ dependencies = [ "thiserror", ] +[[package]] +name = "reflink" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc585ec28b565b4c28977ce8363a6636cedc280351ba25a7915f6c9f37f68cbe" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "regex" version = "1.7.1" @@ -4322,6 +4392,17 @@ dependencies = [ "serde", ] +[[package]] +name = "sha-1" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5058ada175748e33390e40e872bd0fe59a19f265d0158daa551c5a88a76009c" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sha1" version = "0.10.5" @@ -4537,6 +4618,23 @@ dependencies = [ "serde", ] +[[package]] +name = "ssri" +version = "9.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5327a6eb28e137e180380169adeae3ac6128438ca1e8a8dc80118f3d1812cbd" +dependencies = [ + "base64 0.21.0", + "digest", + "hex", + "miette", + "serde", + "sha-1", + "sha2", + "thiserror", + "xxhash-rust", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -4844,6 +4942,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-stream" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "tokio-util" version = "0.7.8" @@ -5620,6 +5729,12 @@ dependencies = [ "tokio", ] +[[package]] +name = "xxhash-rust" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "735a71d46c4d68d71d4b24d03fdc2b98e38cea81730595801db779c04fe80d70" + [[package]] name = "yansi" version = "0.5.1" diff --git a/README.md b/README.md index e17728c07c5d..c8f00006d2e3 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,7 @@ Major components of the project include:
Key-Value Storage Service (like rocksdb, sled) +- cacache: [cacache](https://crates.io/crates/cacache) backend - dashmap: [dashmap](https://github.com/xacrimon/dashmap) backend - memory: In memory backend - redis: [Redis](https://redis.io/) services diff --git a/core/Cargo.toml b/core/Cargo.toml index a0b816226dd6..eb9acbf7492c 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -105,6 +105,7 @@ services-azdfs = [ "reqsign?/services-azblob", "reqsign?/reqwest_request", ] +services-cacache = ["dep:cacache"] services-cos = [ "dep:reqsign", "reqsign?/services-tencent", @@ -180,6 +181,7 @@ backon = "0.4.0" base64 = "0.21" bb8 = { version = "0.8", optional = true } bytes = "1.2" +cacache = { version = "11.6", default-features = false, features = ["tokio-runtime", "mmap"], optional = true } chrono = "0.4.24" dashmap = { version = "5.4", optional = true } dirs = { version = "5.0.1", optional = true } diff --git a/core/README.md b/core/README.md index 625c7c010e9b..1ab16163f601 100644 --- a/core/README.md +++ b/core/README.md @@ -19,6 +19,7 @@ - [azblob](https://docs.rs/opendal/latest/opendal/services/struct.Azblob.html): [Azure Storage Blob](https://azure.microsoft.com/en-us/services/storage/blobs/) services. - [azdfs](https://docs.rs/opendal/latest/opendal/services/struct.Azdfs.html): [Azure Data Lake Storage Gen2](https://azure.microsoft.com/en-us/products/storage/data-lake-storage/) services. (As known as [abfs](https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-abfs-driver)) +- [cacache](https://docs.rs/opendal/latest/opendal/services/struct.Cacache.html): [cacache](https://crates.io/crates/cacache) backend support. - [dashmap](https://docs.rs/opendal/latest/opendal/services/struct.Dashmap.html): [dashmap](https://github.com/xacrimon/dashmap) backend support. - [fs](https://docs.rs/opendal/latest/opendal/services/struct.Fs.html): POSIX alike file system. - [ftp](https://docs.rs/opendal/latest/opendal/services/struct.Ftp.html): FTP and FTPS support. diff --git a/core/src/services/cacache/backend.rs b/core/src/services/cacache/backend.rs new file mode 100644 index 000000000000..4fc47f15e948 --- /dev/null +++ b/core/src/services/cacache/backend.rs @@ -0,0 +1,155 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::collections::HashMap; +use std::fmt::Debug; +use std::fmt::Formatter; +use std::str; + +use async_trait::async_trait; +use cacache; + +use crate::raw::adapters::kv; +use crate::Builder; +use crate::Error; +use crate::ErrorKind; +use crate::Scheme; +use crate::*; + +/// cacache service support. +#[doc = include_str!("docs.md")] +#[derive(Default)] +pub struct CacacheBuilder { + /// That path to the cacache data directory. + datadir: Option, +} + +impl CacacheBuilder { + /// Set the path to the cacache data directory. Will create if not exists. + pub fn datadir(&mut self, path: &str) -> &mut Self { + self.datadir = Some(path.into()); + self + } +} + +impl Builder for CacacheBuilder { + const SCHEME: Scheme = Scheme::Cacache; + type Accessor = CacacheBackend; + + fn from_map(map: HashMap) -> Self { + let mut builder = CacacheBuilder::default(); + + map.get("datadir").map(|v| builder.datadir(v)); + + builder + } + + fn build(&mut self) -> Result { + let datadir_path = self.datadir.take().ok_or_else(|| { + Error::new(ErrorKind::ConfigInvalid, "datadir is required but not set") + .with_context("service", Scheme::Cacache) + })?; + + Ok(CacacheBackend::new(Adapter { + datadir: datadir_path, + })) + } +} + +/// Backend for cacache services. +pub type CacacheBackend = kv::Backend; + +#[derive(Clone)] +pub struct Adapter { + datadir: String, +} + +impl Debug for Adapter { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let mut ds = f.debug_struct("Adapter"); + ds.field("path", &self.datadir); + ds.finish() + } +} + +#[async_trait] +impl kv::Adapter for Adapter { + fn metadata(&self) -> kv::Metadata { + kv::Metadata::new( + Scheme::Cacache, + &self.datadir, + Capability { + read: true, + write: true, + delete: true, + blocking: true, + ..Default::default() + }, + ) + } + + async fn get(&self, path: &str) -> Result>> { + Ok(Some( + cacache::read(&self.datadir, path) + .await + .map_err(parse_error)?, + )) + } + + fn blocking_get(&self, path: &str) -> Result>> { + Ok(Some( + cacache::read_sync(&self.datadir, path).map_err(parse_error)?, + )) + } + + async fn set(&self, path: &str, value: &[u8]) -> Result<()> { + cacache::write(&self.datadir, path, value) + .await + .map_err(parse_error)?; + + Ok(()) + } + + fn blocking_set(&self, path: &str, value: &[u8]) -> Result<()> { + cacache::write_sync(&self.datadir, path, value).map_err(parse_error)?; + + Ok(()) + } + + async fn delete(&self, path: &str) -> Result<()> { + cacache::remove(&self.datadir, path) + .await + .map_err(parse_error)?; + + Ok(()) + } + + fn blocking_delete(&self, path: &str) -> Result<()> { + cacache::remove_sync(&self.datadir, path).map_err(parse_error)?; + + Ok(()) + } +} + +fn parse_error(err: cacache::Error) -> Error { + let kind = match err { + cacache::Error::EntryNotFound(_, _) => ErrorKind::NotFound, + _ => ErrorKind::Unexpected, + }; + + Error::new(kind, "error from cacache").set_source(err) +} diff --git a/core/src/services/cacache/docs.md b/core/src/services/cacache/docs.md new file mode 100644 index 000000000000..b8d3e4df1723 --- /dev/null +++ b/core/src/services/cacache/docs.md @@ -0,0 +1,40 @@ +## Capabilities + +This service can be used to: + +- [x] stat +- [x] read +- [x] write +- [x] create_dir +- [x] delete +- [ ] copy +- [ ] rename +- [ ] list +- [ ] ~~scan~~ +- [ ] ~~presign~~ +- [x] blocking + +## Configuration + +- `datadir`: Set the path to the cacache data directory + +You can refer to [`CacacheBuilder`]'s docs for more information + +## Example + +### Via Builder + +```rust +use anyhow::Result; +use opendal::services::Cacache; +use opendal::Operator; + +#[tokio::main] +async fn main() -> Result<()> { + let mut builder = Cacache::default(); + builder.datadir("/tmp/opendal/cacache"); + + let op: Operator = Operator::new(builder)?.finish(); + Ok(()) +} +``` diff --git a/core/src/services/cacache/mod.rs b/core/src/services/cacache/mod.rs new file mode 100644 index 000000000000..e24897ca6124 --- /dev/null +++ b/core/src/services/cacache/mod.rs @@ -0,0 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +mod backend; + +pub use backend::CacacheBuilder as Cacache; diff --git a/core/src/services/mod.rs b/core/src/services/mod.rs index 0ec89585477f..6c84160b8256 100644 --- a/core/src/services/mod.rs +++ b/core/src/services/mod.rs @@ -109,6 +109,11 @@ mod oss; #[cfg(feature = "services-oss")] pub use oss::Oss; +#[cfg(feature = "services-cacache")] +mod cacache; +#[cfg(feature = "services-cacache")] +pub use self::cacache::Cacache; + #[cfg(feature = "services-redis")] mod redis; #[cfg(feature = "services-redis")] diff --git a/core/src/types/operator/builder.rs b/core/src/types/operator/builder.rs index 57a9545217f4..370bfeeca6c7 100644 --- a/core/src/types/operator/builder.rs +++ b/core/src/types/operator/builder.rs @@ -157,6 +157,8 @@ impl Operator { Scheme::Azblob => Self::from_map::(map)?.finish(), #[cfg(feature = "services-azdfs")] Scheme::Azdfs => Self::from_map::(map)?.finish(), + #[cfg(feature = "services-cacache")] + Scheme::Cacache => Self::from_map::(map)?.finish(), #[cfg(feature = "services-cos")] Scheme::Cos => Self::from_map::(map)?.finish(), #[cfg(feature = "services-dashmap")] diff --git a/core/src/types/scheme.rs b/core/src/types/scheme.rs index 553e31e1cc53..8a0facbe7e80 100644 --- a/core/src/types/scheme.rs +++ b/core/src/types/scheme.rs @@ -35,6 +35,8 @@ pub enum Scheme { Azblob, /// [azdfs][crate::services::Azdfs]: Azure Data Lake Storage Gen2. Azdfs, + /// [cacache][crate::services::Cacache]: cacache backend support. + Cacache, /// [cos][crate::services::Cos]: Tencent Cloud Object Storage services. Cos, /// [dashmap][crate::services::Dashmap]: dashmap backend support. @@ -132,6 +134,7 @@ impl FromStr for Scheme { match s.as_str() { "azblob" => Ok(Scheme::Azblob), "azdfs" => Ok(Scheme::Azdfs), + "cacache" => Ok(Scheme::Cacache), "cos" => Ok(Scheme::Cos), "dashmap" => Ok(Scheme::Dashmap), "fs" => Ok(Scheme::Fs), @@ -167,6 +170,7 @@ impl From for &'static str { match v { Scheme::Azblob => "azblob", Scheme::Azdfs => "azdfs", + Scheme::Cacache => "cacache", Scheme::Cos => "cos", Scheme::Dashmap => "dashmap", Scheme::Fs => "fs", diff --git a/core/tests/behavior/main.rs b/core/tests/behavior/main.rs index da3e220784a0..2ed195085031 100644 --- a/core/tests/behavior/main.rs +++ b/core/tests/behavior/main.rs @@ -98,6 +98,8 @@ fn main() -> anyhow::Result<()> { tests.extend(behavior_test::()); #[cfg(feature = "services-azdfs")] tests.extend(behavior_test::()); + #[cfg(feature = "services-cacache")] + tests.extend(behavior_test::()); #[cfg(feature = "services-cos")] tests.extend(behavior_test::()); #[cfg(feature = "services-dashmap")] diff --git a/website/src/components/HomepageFeatures/_feature_services.mdx b/website/src/components/HomepageFeatures/_feature_services.mdx index 8e824ebe0341..074311ba2587 100644 --- a/website/src/components/HomepageFeatures/_feature_services.mdx +++ b/website/src/components/HomepageFeatures/_feature_services.mdx @@ -46,6 +46,7 @@ Apache OpenDAL provides native support for all kinds for storage systems.
Key-Value Storage Service +- cacache: [cacache](https://crates.io/crates/cacache) backend - dashmap: [dashmap](https://github.com/xacrimon/dashmap) backend - memory: In memory backend - redis: [Redis](https://redis.io/) services