@@ -305,15 +305,12 @@ async def _get_remote_media_impl(
305
305
# file_id is the ID we use to track the file locally. If we've already
306
306
# seen the file then reuse the existing ID, otherwise genereate a new
307
307
# one.
308
- if media_info :
309
- file_id = media_info ["filesystem_id" ]
310
- else :
311
- file_id = random_string (24 )
312
-
313
- file_info = FileInfo (server_name , file_id )
314
308
315
309
# If we have an entry in the DB, try and look for it
316
310
if media_info :
311
+ file_id = media_info ["filesystem_id" ]
312
+ file_info = FileInfo (server_name , file_id )
313
+
317
314
if media_info ["quarantined_by" ]:
318
315
logger .info ("Media is quarantined" )
319
316
raise NotFoundError ()
@@ -324,14 +321,34 @@ async def _get_remote_media_impl(
324
321
325
322
# Failed to find the file anywhere, lets download it.
326
323
327
- media_info = await self ._download_remote_file (server_name , media_id , file_id )
324
+ try :
325
+ media_info = await self ._download_remote_file (server_name , media_id ,)
326
+ except SynapseError :
327
+ raise
328
+ except Exception as e :
329
+ # An exception may be because we downloaded media in another
330
+ # process, so let's check if we magically have the media.
331
+ media_info = await self .store .get_cached_remote_media (server_name , media_id )
332
+ if not media_info :
333
+ raise e
334
+
335
+ file_id = media_info ["filesystem_id" ]
336
+ file_info = FileInfo (server_name , file_id )
337
+
338
+ # We generate thumbnails even if another process downloaded the media
339
+ # as a) it's conceivable that the other download request dies before it
340
+ # generates thumbnails, but mainly b) we want to be sure the thumbnails
341
+ # have finished being generated before responding to the client,
342
+ # otherwise they'll request thumbnails and get a 404 if they're not
343
+ # ready yet.
344
+ await self ._generate_thumbnails (
345
+ server_name , media_id , file_id , media_info ["media_type" ]
346
+ )
328
347
329
348
responder = await self .media_storage .fetch_media (file_info )
330
349
return responder , media_info
331
350
332
- async def _download_remote_file (
333
- self , server_name : str , media_id : str , file_id : str
334
- ) -> dict :
351
+ async def _download_remote_file (self , server_name : str , media_id : str ,) -> dict :
335
352
"""Attempt to download the remote file from the given server name,
336
353
using the given file_id as the local id.
337
354
@@ -346,6 +363,8 @@ async def _download_remote_file(
346
363
The media info of the file.
347
364
"""
348
365
366
+ file_id = random_string (24 )
367
+
349
368
file_info = FileInfo (server_name = server_name , file_id = file_id )
350
369
351
370
with self .media_storage .store_into_file (file_info ) as (f , fname , finish ):
@@ -401,22 +420,32 @@ async def _download_remote_file(
401
420
402
421
await finish ()
403
422
404
- media_type = headers [b"Content-Type" ][0 ].decode ("ascii" )
405
- upload_name = get_filename_from_headers (headers )
406
- time_now_ms = self .clock .time_msec ()
423
+ media_type = headers [b"Content-Type" ][0 ].decode ("ascii" )
424
+ upload_name = get_filename_from_headers (headers )
425
+ time_now_ms = self .clock .time_msec ()
426
+
427
+ # Multiple remote media download requests can race (when using
428
+ # multiple media repos), so this may throw a violation constraint
429
+ # exception. If it does we'll delete the newly downloaded file from
430
+ # disk (as we're in the ctx manager).
431
+ #
432
+ # However: we've already called `finish()` so we may have also
433
+ # written to the storage providers. This is preferable to the
434
+ # alternative where we call `finish()` *after* this, where we could
435
+ # end up having an entry in the DB but fail to write the files to
436
+ # the storage providers.
437
+ await self .store .store_cached_remote_media (
438
+ origin = server_name ,
439
+ media_id = media_id ,
440
+ media_type = media_type ,
441
+ time_now_ms = self .clock .time_msec (),
442
+ upload_name = upload_name ,
443
+ media_length = length ,
444
+ filesystem_id = file_id ,
445
+ )
407
446
408
447
logger .info ("Stored remote media in file %r" , fname )
409
448
410
- await self .store .store_cached_remote_media (
411
- origin = server_name ,
412
- media_id = media_id ,
413
- media_type = media_type ,
414
- time_now_ms = self .clock .time_msec (),
415
- upload_name = upload_name ,
416
- media_length = length ,
417
- filesystem_id = file_id ,
418
- )
419
-
420
449
media_info = {
421
450
"media_type" : media_type ,
422
451
"media_length" : length ,
@@ -425,8 +454,6 @@ async def _download_remote_file(
425
454
"filesystem_id" : file_id ,
426
455
}
427
456
428
- await self ._generate_thumbnails (server_name , media_id , file_id , media_type )
429
-
430
457
return media_info
431
458
432
459
def _get_thumbnail_requirements (self , media_type ):
@@ -692,42 +719,60 @@ async def _generate_thumbnails(
692
719
if not t_byte_source :
693
720
continue
694
721
695
- try :
696
- file_info = FileInfo (
697
- server_name = server_name ,
698
- file_id = file_id ,
699
- thumbnail = True ,
700
- thumbnail_width = t_width ,
701
- thumbnail_height = t_height ,
702
- thumbnail_method = t_method ,
703
- thumbnail_type = t_type ,
704
- url_cache = url_cache ,
705
- )
706
-
707
- output_path = await self .media_storage .store_file (
708
- t_byte_source , file_info
709
- )
710
- finally :
711
- t_byte_source .close ()
712
-
713
- t_len = os .path .getsize (output_path )
722
+ file_info = FileInfo (
723
+ server_name = server_name ,
724
+ file_id = file_id ,
725
+ thumbnail = True ,
726
+ thumbnail_width = t_width ,
727
+ thumbnail_height = t_height ,
728
+ thumbnail_method = t_method ,
729
+ thumbnail_type = t_type ,
730
+ url_cache = url_cache ,
731
+ )
714
732
715
- # Write to database
716
- if server_name :
717
- await self .store .store_remote_media_thumbnail (
718
- server_name ,
719
- media_id ,
720
- file_id ,
721
- t_width ,
722
- t_height ,
723
- t_type ,
724
- t_method ,
725
- t_len ,
726
- )
727
- else :
728
- await self .store .store_local_thumbnail (
729
- media_id , t_width , t_height , t_type , t_method , t_len
730
- )
733
+ with self .media_storage .store_into_file (file_info ) as (f , fname , finish ):
734
+ try :
735
+ await self .media_storage .write_to_file (t_byte_source , f )
736
+ await finish ()
737
+ finally :
738
+ t_byte_source .close ()
739
+
740
+ t_len = os .path .getsize (fname )
741
+
742
+ # Write to database
743
+ if server_name :
744
+ # Multiple remote media download requests can race (when
745
+ # using multiple media repos), so this may throw a violation
746
+ # constraint exception. If it does we'll delete the newly
747
+ # generated thumbnail from disk (as we're in the ctx
748
+ # manager).
749
+ #
750
+ # However: we've already called `finish()` so we may have
751
+ # also written to the storage providers. This is preferable
752
+ # to the alternative where we call `finish()` *after* this,
753
+ # where we could end up having an entry in the DB but fail
754
+ # to write the files to the storage providers.
755
+ try :
756
+ await self .store .store_remote_media_thumbnail (
757
+ server_name ,
758
+ media_id ,
759
+ file_id ,
760
+ t_width ,
761
+ t_height ,
762
+ t_type ,
763
+ t_method ,
764
+ t_len ,
765
+ )
766
+ except Exception as e :
767
+ thumbnail_exists = await self .store .get_remote_media_thumbnail (
768
+ server_name , media_id , t_width , t_height , t_type ,
769
+ )
770
+ if not thumbnail_exists :
771
+ raise e
772
+ else :
773
+ await self .store .store_local_thumbnail (
774
+ media_id , t_width , t_height , t_type , t_method , t_len
775
+ )
731
776
732
777
return {"width" : m_width , "height" : m_height }
733
778
0 commit comments