Skip to content

Commit bf44da4

Browse files
committed
add to anilist script
1 parent 1f0f06a commit bf44da4

File tree

2 files changed

+116
-0
lines changed

2 files changed

+116
-0
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
tags*
22
*.pdf
3+
*.xml
34

45
# Byte-compiled / optimized / DLL files
56
__pycache__/

scripts/to_anilist.py

+115
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
#!/usr/bin/env python3
2+
3+
"""
4+
A script to convert my XML export into chunks, so that
5+
it can imported to anilist without cloudflare timing out
6+
"""
7+
8+
from functools import partial
9+
from typing import Tuple
10+
from pathlib import Path
11+
12+
import click
13+
import lxml.etree as ET
14+
from malexport.list_type import ListType
15+
from malexport.paths import LocalDir
16+
from malexport.exporter import ExportDownloader
17+
18+
REMOVE_ATTRS = set(["my_tags"])
19+
20+
21+
def remove_attrs(
22+
xml_file: Path, media_type: ListType, filter_activity: bool
23+
) -> Tuple[str, int]:
24+
tree = ET.parse(str(xml_file))
25+
root = tree.getroot()
26+
root.remove(root.find("myinfo"))
27+
for entry in root.findall(media_type.value):
28+
for attr in entry:
29+
if attr.tag in REMOVE_ATTRS:
30+
entry.remove(attr)
31+
if not filter_activity:
32+
continue
33+
# if this has some sort of activity
34+
has_score = str(entry.find("my_score").text).strip() != "0"
35+
start_date = str(entry.find("my_start_date").text).strip()
36+
has_start_date: bool = len(start_date) > 0 and not start_date.startswith("0000")
37+
completed: bool = str(entry.find("my_status").text).strip() == "Completed"
38+
# episodes or chapters
39+
tag_name = (
40+
"my_watched_episodes"
41+
if media_type == ListType.ANIME
42+
else "my_read_chapters"
43+
)
44+
has_progress: bool = str(entry.find(tag_name).text).strip() != "0"
45+
if has_start_date or has_score or completed or has_progress:
46+
continue
47+
root.remove(entry)
48+
return ET.tostring(root, encoding="unicode"), len(root.findall(media_type.value))
49+
50+
51+
def extract_xml_range(xml_data: str, media_type: ListType, in_range: range) -> str:
52+
tree = ET.fromstring(xml_data)
53+
for i, tag in enumerate(tree.findall(media_type.value)):
54+
if i not in in_range:
55+
tree.remove(tag)
56+
return str(ET.tostring(tree, encoding="unicode"))
57+
58+
59+
def run_type(
60+
xml_file: Path,
61+
media_type: ListType,
62+
chunk_size: int,
63+
in_dir: Path,
64+
filter_activity: bool,
65+
) -> None:
66+
cleaned_tree, element_count = remove_attrs(xml_file, media_type, filter_activity)
67+
m = media_type.value
68+
lower, upper = 0, chunk_size
69+
while lower < element_count:
70+
target = in_dir / f"{m}_{str(upper // chunk_size).zfill(3)}.xml"
71+
click.echo(f"Chunking {m} from {lower} to {upper} to {str(target)}")
72+
chunked_xml = extract_xml_range(cleaned_tree, media_type, range(lower, upper))
73+
target.write_text(chunked_xml)
74+
lower, upper = upper, upper + chunk_size
75+
76+
77+
@click.command(help=__doc__)
78+
@click.option("-u", "--username", envvar="MAL_USERNAME", required=True)
79+
@click.option("-c", "--chunk-size", default=3000)
80+
@click.option(
81+
"-d",
82+
"--to-dir",
83+
type=click.Path(dir_okay=True, file_okay=False, path_type=Path),
84+
default=Path("."),
85+
help="Directory to write chunked xml files to",
86+
)
87+
@click.option(
88+
"-r",
89+
"--remove-items-without-activity",
90+
is_flag=True,
91+
default=False,
92+
help="Removes any items which don't have activity (a score, start date, on my completed, or has some episode/chapter progress)",
93+
)
94+
def main(
95+
username: str, chunk_size: int, to_dir: Path, remove_items_without_activity: bool
96+
) -> None:
97+
ex = ExportDownloader(LocalDir.from_username(username))
98+
run_with_opts = partial(
99+
run_type,
100+
chunk_size=chunk_size,
101+
in_dir=to_dir,
102+
filter_activity=remove_items_without_activity,
103+
)
104+
if ex.animelist_path.exists():
105+
run_with_opts(ex.animelist_path, ListType.ANIME)
106+
else:
107+
print(f"{ex.animelist_path} doesn't exist, run 'malexport update export' first")
108+
if ex.mangalist_path.exists():
109+
run_with_opts(ex.mangalist_path, ListType.MANGA)
110+
else:
111+
print(f"{ex.mangalist_path} doesn't exist, run 'malexport update export' first")
112+
113+
114+
if __name__ == "__main__":
115+
main()

0 commit comments

Comments
 (0)