|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +""" |
| 4 | +A script to convert my XML export into chunks, so that |
| 5 | +it can imported to anilist without cloudflare timing out |
| 6 | +""" |
| 7 | + |
| 8 | +from functools import partial |
| 9 | +from typing import Tuple |
| 10 | +from pathlib import Path |
| 11 | + |
| 12 | +import click |
| 13 | +import lxml.etree as ET |
| 14 | +from malexport.list_type import ListType |
| 15 | +from malexport.paths import LocalDir |
| 16 | +from malexport.exporter import ExportDownloader |
| 17 | + |
| 18 | +REMOVE_ATTRS = set(["my_tags"]) |
| 19 | + |
| 20 | + |
| 21 | +def remove_attrs( |
| 22 | + xml_file: Path, media_type: ListType, filter_activity: bool |
| 23 | +) -> Tuple[str, int]: |
| 24 | + tree = ET.parse(str(xml_file)) |
| 25 | + root = tree.getroot() |
| 26 | + root.remove(root.find("myinfo")) |
| 27 | + for entry in root.findall(media_type.value): |
| 28 | + for attr in entry: |
| 29 | + if attr.tag in REMOVE_ATTRS: |
| 30 | + entry.remove(attr) |
| 31 | + if not filter_activity: |
| 32 | + continue |
| 33 | + # if this has some sort of activity |
| 34 | + has_score = str(entry.find("my_score").text).strip() != "0" |
| 35 | + start_date = str(entry.find("my_start_date").text).strip() |
| 36 | + has_start_date: bool = len(start_date) > 0 and not start_date.startswith("0000") |
| 37 | + completed: bool = str(entry.find("my_status").text).strip() == "Completed" |
| 38 | + # episodes or chapters |
| 39 | + tag_name = ( |
| 40 | + "my_watched_episodes" |
| 41 | + if media_type == ListType.ANIME |
| 42 | + else "my_read_chapters" |
| 43 | + ) |
| 44 | + has_progress: bool = str(entry.find(tag_name).text).strip() != "0" |
| 45 | + if has_start_date or has_score or completed or has_progress: |
| 46 | + continue |
| 47 | + root.remove(entry) |
| 48 | + return ET.tostring(root, encoding="unicode"), len(root.findall(media_type.value)) |
| 49 | + |
| 50 | + |
| 51 | +def extract_xml_range(xml_data: str, media_type: ListType, in_range: range) -> str: |
| 52 | + tree = ET.fromstring(xml_data) |
| 53 | + for i, tag in enumerate(tree.findall(media_type.value)): |
| 54 | + if i not in in_range: |
| 55 | + tree.remove(tag) |
| 56 | + return str(ET.tostring(tree, encoding="unicode")) |
| 57 | + |
| 58 | + |
| 59 | +def run_type( |
| 60 | + xml_file: Path, |
| 61 | + media_type: ListType, |
| 62 | + chunk_size: int, |
| 63 | + in_dir: Path, |
| 64 | + filter_activity: bool, |
| 65 | +) -> None: |
| 66 | + cleaned_tree, element_count = remove_attrs(xml_file, media_type, filter_activity) |
| 67 | + m = media_type.value |
| 68 | + lower, upper = 0, chunk_size |
| 69 | + while lower < element_count: |
| 70 | + target = in_dir / f"{m}_{str(upper // chunk_size).zfill(3)}.xml" |
| 71 | + click.echo(f"Chunking {m} from {lower} to {upper} to {str(target)}") |
| 72 | + chunked_xml = extract_xml_range(cleaned_tree, media_type, range(lower, upper)) |
| 73 | + target.write_text(chunked_xml) |
| 74 | + lower, upper = upper, upper + chunk_size |
| 75 | + |
| 76 | + |
| 77 | +@click.command(help=__doc__) |
| 78 | +@click.option("-u", "--username", envvar="MAL_USERNAME", required=True) |
| 79 | +@click.option("-c", "--chunk-size", default=3000) |
| 80 | +@click.option( |
| 81 | + "-d", |
| 82 | + "--to-dir", |
| 83 | + type=click.Path(dir_okay=True, file_okay=False, path_type=Path), |
| 84 | + default=Path("."), |
| 85 | + help="Directory to write chunked xml files to", |
| 86 | +) |
| 87 | +@click.option( |
| 88 | + "-r", |
| 89 | + "--remove-items-without-activity", |
| 90 | + is_flag=True, |
| 91 | + default=False, |
| 92 | + help="Removes any items which don't have activity (a score, start date, on my completed, or has some episode/chapter progress)", |
| 93 | +) |
| 94 | +def main( |
| 95 | + username: str, chunk_size: int, to_dir: Path, remove_items_without_activity: bool |
| 96 | +) -> None: |
| 97 | + ex = ExportDownloader(LocalDir.from_username(username)) |
| 98 | + run_with_opts = partial( |
| 99 | + run_type, |
| 100 | + chunk_size=chunk_size, |
| 101 | + in_dir=to_dir, |
| 102 | + filter_activity=remove_items_without_activity, |
| 103 | + ) |
| 104 | + if ex.animelist_path.exists(): |
| 105 | + run_with_opts(ex.animelist_path, ListType.ANIME) |
| 106 | + else: |
| 107 | + print(f"{ex.animelist_path} doesn't exist, run 'malexport update export' first") |
| 108 | + if ex.mangalist_path.exists(): |
| 109 | + run_with_opts(ex.mangalist_path, ListType.MANGA) |
| 110 | + else: |
| 111 | + print(f"{ex.mangalist_path} doesn't exist, run 'malexport update export' first") |
| 112 | + |
| 113 | + |
| 114 | +if __name__ == "__main__": |
| 115 | + main() |
0 commit comments