Skip to content

feat(bindings/python): Add start_after support for list #6054

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Apr 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion bindings/python/python/opendal/__base.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"""
> DO NOT EDIT IT MANUALLY <

This file is generated by opendal/dev/generate/python.rs.
This file is generated by opendal/dev/src/generate/python.rs.
`opendal.__base` doesn't exists.
"""

Expand Down Expand Up @@ -485,6 +485,7 @@ class _Base:
access_key_id: str = ...,
secret_access_key: str = ...,
bucket: str = ...,
enable_versioning: _bool = ...,
) -> None: ...

@overload
Expand Down Expand Up @@ -622,6 +623,7 @@ class _Base:
checksum_algorithm: str = ...,
disable_write_with_if_match: _bool = ...,
enable_write_with_append: _bool = ...,
disable_list_objects_v2: _bool = ...,
) -> None: ...

@overload
Expand Down
28 changes: 18 additions & 10 deletions bindings/python/python/opendal/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -135,17 +135,18 @@ class Operator(_Base):
Returns:
True if the object exists, False otherwise.
"""
def list(self, path: PathBuf) -> Iterable[Entry]:
def list(self, path: PathBuf, *, start_after: str | None = None) -> Iterable[Entry]:
"""List the objects at the given path.

Args:
path (str|Path): The path to the directory.
start_after (str | None): The key to start listing from.

Returns:
An iterable of entries representing the objects in the directory.
"""
def scan(self, path: PathBuf) -> Iterable[Entry]:
"""Scan the objects at the given path.
"""Scan the objects at the given path recursively.

Args:
path (str|Path): The path to the directory.
Expand Down Expand Up @@ -282,19 +283,28 @@ class AsyncOperator(_Base):
Returns:
True if the object exists, False otherwise.
"""
async def list(self, path: PathBuf) -> AsyncIterable[Entry]:
async def list(
self, path: PathBuf, *, start_after: str | None = None
) -> AsyncIterable[Entry]:
"""List the objects at the given path.

Args:
path (str|Path): The path to the directory.
start_after (str | None): The key to start listing from.

Returns:
An iterable of entries representing the objects in the directory.
"""
async def scan(self, path: PathBuf) -> AsyncIterable[Entry]: ...
async def presign_stat(
self, path: PathBuf, expire_second: int
) -> PresignedRequest:
async def scan(self, path: PathBuf) -> AsyncIterable[Entry]:
"""Scan the objects at the given path recursively.

Args:
path (str|Path): The path to the directory.

Returns:
An iterable of entries representing the objects in the directory.
"""
async def presign_stat(self, path: PathBuf, expire_second: int) -> PresignedRequest:
"""Generate a presigned URL for stat operation.

Args:
Expand All @@ -304,9 +314,7 @@ class AsyncOperator(_Base):
Returns:
A presigned request object.
"""
async def presign_read(
self, path: PathBuf, expire_second: int
) -> PresignedRequest:
async def presign_read(self, path: PathBuf, expire_second: int) -> PresignedRequest:
"""Generate a presigned URL for read operation.

Args:
Expand Down
30 changes: 21 additions & 9 deletions bindings/python/src/operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,14 @@ impl Operator {
}

/// List current dir path.
pub fn list(&self, path: PathBuf) -> PyResult<BlockingLister> {
#[pyo3(signature = (path, *, start_after=None))]
pub fn list(&self, path: PathBuf, start_after: Option<String>) -> PyResult<BlockingLister> {
let path = path.to_string_lossy().to_string();
let l = self.core.lister(&path).map_err(format_pyerr)?;
let mut builder = self.core.lister_with(&path);
if let Some(start_after) = start_after {
builder = builder.start_after(&start_after);
}
let l = builder.call().map_err(format_pyerr)?;
Ok(BlockingLister::new(l))
}

Expand Down Expand Up @@ -503,11 +508,21 @@ impl AsyncOperator {
}

/// List current dir path.
pub fn list<'p>(&'p self, py: Python<'p>, path: PathBuf) -> PyResult<Bound<'p, PyAny>> {
#[pyo3(signature = (path, *, start_after=None))]
pub fn list<'p>(
&'p self,
py: Python<'p>,
path: PathBuf,
start_after: Option<String>,
) -> PyResult<Bound<'p, PyAny>> {
let this = self.core.clone();
let path = path.to_string_lossy().to_string();
future_into_py(py, async move {
let lister = this.lister(&path).await.map_err(format_pyerr)?;
let mut builder = this.lister_with(&path);
if let Some(start_after) = start_after {
builder = builder.start_after(&start_after);
}
let lister = builder.await.map_err(format_pyerr)?;
let pylister = Python::with_gil(|py| AsyncLister::new(lister).into_py_any(py))?;

Ok(pylister)
Expand All @@ -519,11 +534,8 @@ impl AsyncOperator {
let this = self.core.clone();
let path = path.to_string_lossy().to_string();
future_into_py(py, async move {
let lister = this
.lister_with(&path)
.recursive(true)
.await
.map_err(format_pyerr)?;
let builder = this.lister_with(&path).recursive(true);
let lister = builder.await.map_err(format_pyerr)?;
let pylister: PyObject =
Python::with_gil(|py| AsyncLister::new(lister).into_py_any(py))?;
Ok(pylister)
Expand Down
63 changes: 63 additions & 0 deletions bindings/python/tests/test_async_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import asyncio
from uuid import uuid4

import pytest


@pytest.mark.asyncio
@pytest.mark.need_capability("read", "write", "copy", "list", "list_with_start_after")
async def test_async_list_with_start_after(service_name, operator, async_operator):
test_dir = f"test_async_list_dir_{uuid4()}/"
await async_operator.create_dir(test_dir)

# 1. Prepare data
files_to_create = [f"{test_dir}file_{i}" for i in range(5)]
# Create files concurrently
await asyncio.gather(
*(async_operator.write(f, b"test_content") for f in files_to_create)
)

# 2. Test basic list
entries = []
async for entry in await async_operator.list(test_dir):
entries.append(entry.path)
entries.sort() # Ensure order for comparison
expected_files = sorted([test_dir, *files_to_create])
assert entries == expected_files, (
f"Basic list failed. Expected {expected_files}, got {entries}"
)

# 3. Test list with start_after
start_after_file = files_to_create[2] # e.g., test_dir/file_2
entries_after = []
# Note: start_after expects the *full path* relative to the operator root
async for entry in await async_operator.list(
test_dir, start_after=start_after_file
):
entries_after.append(entry.path)
entries_after.sort() # Ensure order

# Expected files are those lexicographically after start_after_file
expected_files_after = sorted([f for f in files_to_create if f > start_after_file])
assert entries_after == expected_files_after, (
f"Expected {expected_files_after} after {start_after_file}, got {entries_after}"
)
# 6. Cleanup
await async_operator.remove_all(test_dir)
59 changes: 59 additions & 0 deletions bindings/python/tests/test_sync_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

from uuid import uuid4

import pytest


@pytest.mark.need_capability("read", "write", "copy", "list", "list_with_start_after")
def test_sync_list_with_start_after(service_name, operator, async_operator):
test_dir = f"test_sync_list_dir_{uuid4()}/"
operator.create_dir(test_dir)

# 1. Prepare data
files_to_create = [f"{test_dir}file_{i}" for i in range(5)]
for f in files_to_create:
operator.write(f, b"test_content")

# 2. Test basic list
entries = []
for entry in operator.list(test_dir):
entries.append(entry.path)
entries.sort() # Ensure order for comparison
expected_files = sorted([test_dir, *files_to_create])
assert entries == expected_files, (
f"Basic list failed. Expected {expected_files}, got {entries}"
)

# 3. Test list with start_after
start_after_file = files_to_create[2] # e.g., test_dir/file_2
entries_after = []
# Note: start_after expects the *full path* relative to the operator root
for entry in operator.list(
test_dir, start_after=start_after_file
):
entries_after.append(entry.path)
entries_after.sort() # Ensure order

# Expected files are those lexicographically after start_after_file
expected_files_after = sorted([f for f in files_to_create if f > start_after_file])
assert entries_after == expected_files_after, (
f"Expected {expected_files_after} after {start_after_file}, got {entries_after}"
)
# 6. Cleanup
operator.remove_all(test_dir)
2 changes: 1 addition & 1 deletion dev/src/generate/python.j2
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"""
> DO NOT EDIT IT MANUALLY <

This file is generated by opendal/dev/generate/python.rs.
This file is generated by opendal/dev/src/generate/python.rs.
`opendal.__base` doesn't exists.
"""

Expand Down
Loading