Skip to content

Commit ccc6e97

Browse files
committed
refactor: move archive extraction logic in its own module
1 parent 49e2084 commit ccc6e97

File tree

3 files changed

+93
-84
lines changed

3 files changed

+93
-84
lines changed

src/ops2deb/extracter.py

+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import asyncio
2+
import bz2
3+
import gzip
4+
import io
5+
import shutil
6+
import tarfile
7+
from pathlib import Path
8+
9+
import unix_ar
10+
import zstandard
11+
12+
from ops2deb import logger
13+
from ops2deb.exceptions import Ops2debExtractError
14+
from ops2deb.utils import log_and_raise
15+
16+
17+
def _unpack_gz(file_path: str, extract_path: str) -> None:
18+
output_path = Path(extract_path) / Path(file_path).stem
19+
with output_path.open("wb") as output:
20+
with gzip.open(file_path, "rb") as gz_archive:
21+
shutil.copyfileobj(gz_archive, output)
22+
23+
24+
def _unpack_bz2(file_path: str, extract_path: str) -> None:
25+
output_path = Path(extract_path) / Path(file_path).stem
26+
with output_path.open(mode="wb") as output:
27+
with bz2.open(file_path, "rb") as bz2_archive:
28+
shutil.copyfileobj(bz2_archive, output)
29+
30+
31+
def _unpack_deb(file_path: str, extract_path: str) -> None:
32+
ar_file = unix_ar.open(file_path)
33+
file_names = [info.name.decode("utf-8") for info in ar_file.infolist()]
34+
for file_name in file_names:
35+
if file_name.startswith("debian-binary"):
36+
continue
37+
tarball = ar_file.open(file_name)
38+
tar_file = tarfile.open(fileobj=tarball)
39+
try:
40+
tar_file.extractall(Path(extract_path) / file_name.split(".")[0])
41+
finally:
42+
tar_file.close()
43+
44+
45+
def _unpack_zst(file_path: str, extract_path: str) -> None:
46+
output_path = Path(extract_path) / Path(file_path).stem
47+
dctx = zstandard.ZstdDecompressor()
48+
with open(file_path, "rb") as ifh, output_path.open("wb") as ofh:
49+
dctx.copy_stream(ifh, ofh)
50+
51+
52+
def _unpack_tar_zst(file_path: str, extract_path: str) -> None:
53+
dctx = zstandard.ZstdDecompressor()
54+
with open(file_path, "rb") as ifh, io.BytesIO() as ofh:
55+
dctx.copy_stream(ifh, ofh)
56+
ofh.seek(0)
57+
with tarfile.open(fileobj=ofh) as tar_file:
58+
tar_file.extractall(extract_path)
59+
60+
61+
shutil.register_unpack_format("gz", [".gz"], _unpack_gz)
62+
shutil.register_unpack_format("bz2", [".bz2"], _unpack_bz2)
63+
shutil.register_unpack_format("deb", [".deb"], _unpack_deb)
64+
shutil.register_unpack_format("zsttar", [".tar.zst"], _unpack_tar_zst)
65+
shutil.register_unpack_format("zst", [".zst"], _unpack_zst)
66+
67+
68+
def is_archive_format_supported(archive_path: Path) -> bool:
69+
for name, extensions, _ in shutil.get_unpack_formats():
70+
for extension in extensions:
71+
if archive_path.name.endswith(extension):
72+
return True
73+
return False
74+
75+
76+
async def extract_archive(archive_path: Path, extract_path: Path) -> None:
77+
tmp_extract_path = f"{extract_path}_tmp"
78+
Path(tmp_extract_path).mkdir(exist_ok=True)
79+
logger.info(f"Extracting {archive_path.name}...")
80+
81+
try:
82+
await asyncio.get_running_loop().run_in_executor(
83+
None, shutil.unpack_archive, archive_path, tmp_extract_path
84+
)
85+
except Exception as e:
86+
error = f"Failed to extract archive {archive_path}"
87+
if str(e):
88+
error += f" ({e})"
89+
log_and_raise(Ops2debExtractError(error))
90+
91+
shutil.move(tmp_extract_path, extract_path)

src/ops2deb/fetcher.py

+2-84
Original file line numberDiff line numberDiff line change
@@ -1,80 +1,24 @@
11
import asyncio
2-
import bz2
3-
import gzip
42
import hashlib
5-
import io
63
import shutil
7-
import tarfile
84
from dataclasses import dataclass
95
from pathlib import Path
106
from typing import Sequence
117

128
import aiofiles
139
import httpx
14-
import unix_ar
15-
import zstandard
1610

1711
from ops2deb import logger
1812
from ops2deb.client import client_factory
19-
from ops2deb.exceptions import Ops2debError, Ops2debExtractError, Ops2debFetcherError
13+
from ops2deb.exceptions import Ops2debError, Ops2debFetcherError
14+
from ops2deb.extracter import extract_archive, is_archive_format_supported
2015
from ops2deb.lockfile import Lock
2116
from ops2deb.parser import Configuration
2217
from ops2deb.utils import log_and_raise, separate_results_from_errors
2318

2419
DEFAULT_CACHE_DIRECTORY = Path("/tmp/ops2deb_cache")
2520

2621

27-
def _unpack_gz(file_path: str, extract_path: str) -> None:
28-
output_path = Path(extract_path) / Path(file_path).stem
29-
with output_path.open("wb") as output:
30-
with gzip.open(file_path, "rb") as gz_archive:
31-
shutil.copyfileobj(gz_archive, output)
32-
33-
34-
def _unpack_bz2(file_path: str, extract_path: str) -> None:
35-
output_path = Path(extract_path) / Path(file_path).stem
36-
with output_path.open(mode="wb") as output:
37-
with bz2.open(file_path, "rb") as bz2_archive:
38-
shutil.copyfileobj(bz2_archive, output)
39-
40-
41-
def _unpack_deb(file_path: str, extract_path: str) -> None:
42-
ar_file = unix_ar.open(file_path)
43-
file_names = [info.name.decode("utf-8") for info in ar_file.infolist()]
44-
for file_name in file_names:
45-
if file_name.startswith("debian-binary"):
46-
continue
47-
tarball = ar_file.open(file_name)
48-
tar_file = tarfile.open(fileobj=tarball)
49-
try:
50-
tar_file.extractall(Path(extract_path) / file_name.split(".")[0])
51-
finally:
52-
tar_file.close()
53-
54-
55-
def _unpack_zst(file_path: str, extract_path: str) -> None:
56-
output_path = Path(extract_path) / Path(file_path).stem
57-
dctx = zstandard.ZstdDecompressor()
58-
with open(file_path, "rb") as ifh, output_path.open("wb") as ofh:
59-
dctx.copy_stream(ifh, ofh)
60-
61-
62-
def _unpack_tar_zst(file_path: str, extract_path: str) -> None:
63-
dctx = zstandard.ZstdDecompressor()
64-
with open(file_path, "rb") as ifh, io.BytesIO() as ofh:
65-
dctx.copy_stream(ifh, ofh)
66-
ofh.seek(0)
67-
with tarfile.open(fileobj=ofh) as tar_file:
68-
tar_file.extractall(extract_path)
69-
70-
71-
shutil.register_unpack_format("gz", [".gz"], _unpack_gz)
72-
shutil.register_unpack_format("bz2", [".bz2"], _unpack_bz2)
73-
shutil.register_unpack_format("deb", [".deb"], _unpack_deb)
74-
shutil.register_unpack_format("zsttar", [".tar.zst"], _unpack_tar_zst)
75-
shutil.register_unpack_format("zst", [".zst"], _unpack_zst)
76-
77-
7822
async def _download_file(url: str, download_path: Path) -> None:
7923
tmp_path = f"{download_path}.part"
8024
logger.info(f"Downloading {download_path.name}...")
@@ -106,32 +50,6 @@ async def _hash_file(file_path: Path) -> str:
10650
return sha256_hash.hexdigest()
10751

10852

109-
def is_archive_format_supported(archive_path: Path) -> bool:
110-
for name, extensions, _ in shutil.get_unpack_formats():
111-
for extension in extensions:
112-
if archive_path.name.endswith(extension):
113-
return True
114-
return False
115-
116-
117-
async def extract_archive(archive_path: Path, extract_path: Path) -> None:
118-
tmp_extract_path = f"{extract_path}_tmp"
119-
Path(tmp_extract_path).mkdir(exist_ok=True)
120-
logger.info(f"Extracting {archive_path.name}...")
121-
122-
try:
123-
await asyncio.get_running_loop().run_in_executor(
124-
None, shutil.unpack_archive, archive_path, tmp_extract_path
125-
)
126-
except Exception as e:
127-
error = f"Failed to extract archive {archive_path}"
128-
if str(e):
129-
error += f" ({e})"
130-
log_and_raise(Ops2debExtractError(error))
131-
132-
shutil.move(tmp_extract_path, extract_path)
133-
134-
13553
@dataclass
13654
class FetchResult:
13755
url: str
File renamed without changes.

0 commit comments

Comments
 (0)