Skip to content

Commit ad2e839

Browse files
authored
Merge pull request #287 from bellingcat/fix/insta_tbot_empty
Only return success for instagram_tbot_extractor.py with content.
2 parents c7c7eb0 + 144adaa commit ad2e839

File tree

2 files changed

+11
-2
lines changed

2 files changed

+11
-2
lines changed

src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_extractor.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ def download(self, item: Metadata) -> Metadata:
8888

8989
if message:
9090
result.set_content(message).set_title(message[:128])
91+
elif result.is_empty():
92+
logger.debug(f"No media found for link {url=} for {self.name}: {message}")
93+
return False
9194
return result.success("insta-via-bot")
9295

9396
def _send_url_to_bot(self, url: str):
@@ -104,13 +107,13 @@ def _process_messages(self, chat, since_id, tmp_dir, result):
104107
message = ""
105108
time.sleep(3)
106109
# media is added before text by the bot so it can be used as a stop-logic mechanism
107-
while attempts < max(self.timeout - 3, 3) and (not message or not len(seen_media)):
110+
while attempts < max(self.timeout - 3, 15) and (not message or not len(seen_media)):
108111
attempts += 1
109112
time.sleep(1)
110113
for post in self.client.iter_messages(chat, min_id=since_id):
111114
since_id = max(since_id, post.id)
112115
# Skip known filler message:
113-
if post.message == "The bot receives information through https://hikerapi.com/p/hJqpppqi":
116+
if "The bot receives information through https://hikerapi.com/" in post.message:
114117
continue
115118
if post.media and post.id not in seen_media:
116119
filename_dest = os.path.join(tmp_dir, f"{chat.id}_{post.id}")

tests/extractors/test_instagram_tbot_extractor.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,12 @@ def test_download_invalid(extractor, metadata_sample, mocker):
6868
assert extractor.download(metadata_sample) is False
6969

7070

71+
def test_fails_with_empty_response(extractor, metadata_sample, mocker):
72+
mocker.patch.object(extractor, "_send_url_to_bot", return_value=(mocker.MagicMock(), 101))
73+
mocker.patch.object(extractor, "_process_messages", return_value="")
74+
assert extractor.download(metadata_sample) is False
75+
76+
7177
@pytest.mark.skip(reason="Requires authentication.")
7278
class TestInstagramTbotExtractorReal(TestExtractorBase):
7379
# To run these tests set the TELEGRAM_API_ID and TELEGRAM_API_HASH environment variables, and ensure the session file exists.

0 commit comments

Comments
 (0)