|
20 | 20 | from collections.abc import Awaitable
|
21 | 21 |
|
22 | 22 | import asyncio
|
23 |
| -import time |
24 | 23 | import aiographfix as aiograph
|
| 24 | +import re |
| 25 | +import time |
25 | 26 | from io import BytesIO
|
26 | 27 | from bs4 import BeautifulSoup
|
27 | 28 | from contextlib import suppress
|
|
40 | 41 | else:
|
41 | 42 | convert_table_to_png = None
|
42 | 43 |
|
| 44 | +DOMAIN_PATTERN_TEMPLATE: Final[str] = r'^https?://(?:[^./]+\.)?(?:{domains})\.?(?:/|:|$)' |
| 45 | +BLOCKED_BY_WESERV_DOMAIN: Final[set[str]] = { |
| 46 | + 'sinaimg.cn', |
| 47 | + 'wp.com', |
| 48 | +} |
| 49 | +BLOCKED_BY_WESERV_RE: Final[re.Pattern] = re.compile( |
| 50 | + DOMAIN_PATTERN_TEMPLATE.format( |
| 51 | + domains='|'.join(map(re.escape, BLOCKED_BY_WESERV_DOMAIN)), |
| 52 | + ), |
| 53 | + re.I, |
| 54 | +) |
| 55 | +ALLOW_REFERER_DOMAIN: Final[set[str]] = set(filter(None, { |
| 56 | + 'wp.com', |
| 57 | + env.IMG_RELAY_SERVER.partition('://')[2].partition('/')[0].strip('.'), |
| 58 | + env.IMAGES_WESERV_NL.partition('://')[2].partition('/')[0].strip('.'), |
| 59 | +})) |
| 60 | +ALLOW_REFERER_RE: Final[re.Pattern] = re.compile( |
| 61 | + DOMAIN_PATTERN_TEMPLATE.format( |
| 62 | + domains='|'.join(map(re.escape, ALLOW_REFERER_DOMAIN)), |
| 63 | + ), |
| 64 | + re.I, |
| 65 | +) |
| 66 | + |
43 | 67 | logger = log.getLogger('RSStT.tgraph')
|
44 | 68 |
|
45 | 69 | apis: Optional[APIs] = None
|
@@ -309,13 +333,15 @@ async def generate_page(self):
|
309 | 333 | if not isAbsoluteHttpLink(attr_content):
|
310 | 334 | tag.replaceWithChildren()
|
311 | 335 | continue
|
312 |
| - if not attr_content.startswith(env.IMG_RELAY_SERVER): |
| 336 | + if not ALLOW_REFERER_RE.match(attr_content): |
313 | 337 | if tag.name == 'video':
|
314 | 338 | attr_content = env.IMG_RELAY_SERVER + attr_content
|
315 |
| - if tag.name == 'img' and not attr_content.startswith(env.IMAGES_WESERV_NL): |
316 |
| - if attr_content.split('.', 1)[1].split('/', 1)[0] == 'sinaimg.cn': |
317 |
| - attr_content = env.IMG_RELAY_SERVER + attr_content |
318 |
| - attr_content = construct_weserv_url(attr_content) |
| 339 | + elif tag.name == 'img': |
| 340 | + attr_content = ( |
| 341 | + env.IMG_RELAY_SERVER + attr_content |
| 342 | + if BLOCKED_BY_WESERV_RE.match(attr_content) |
| 343 | + else construct_weserv_url(attr_content) |
| 344 | + ) |
319 | 345 | tag.attrs = {attr_name: attr_content}
|
320 | 346 |
|
321 | 347 | if self.feed_title:
|
|
0 commit comments