@@ -738,10 +738,13 @@ def find_matched_articles(self, articles):
738
738
739
739
self .logs .write_log_in_file ("info" , f"Total matched unique articles: { len (set (matched_articles ))} ." , True )
740
740
self .logs .write_log_in_file ("info" , f"Total unmatched unique articles: { len (set (unmatched_articles ))} ." , True )
741
-
742
741
self .logs .write_log_in_file ("info" , f"Total matched article versions: { no_matched } ." , True )
743
742
self .logs .write_log_in_file ("info" , f"Total unmatched article versions: { len (self .article_non_match_info )} ." , True )
744
743
744
+ if len (set (unmatched_articles )) > 0 or len (self .article_non_match_info ) > 0 :
745
+ self .logs .write_log_in_file ("warning" , "There were unmatched articles or article versions."
746
+ + f"Check { self .curation_storage_location } for each of the unmatched items." , True )
747
+
745
748
return article_data
746
749
747
750
"""
@@ -761,7 +764,7 @@ def __can_copy_files(self, version_data):
761
764
return copy_files
762
765
763
766
"""
764
- Final process for matched articles.
767
+ Final process for matched articles. Returns True if succeeded.
765
768
"""
766
769
def __final_process (self , check_files , copy_files , check_dir , version_data , folder_name , version_no , value_pre_process ):
767
770
success = True
@@ -798,14 +801,19 @@ def __final_process(self, check_files, copy_files, check_dir, version_data, fold
798
801
self .logs .write_log_in_file ("error" ,
799
802
f"{ version_data ['id' ]} version { version_data ['version' ]} - Post-processing script failed." ,
800
803
True )
804
+ success = False
805
+ else :
806
+ success = True
801
807
else :
802
808
self .logs .write_log_in_file ("info" ,
803
809
f"No further processing for { version_data ['id' ]} version { version_data ['version' ]} due to errors." ,
804
810
True )
811
+ success = False
805
812
else :
806
813
# if download process has any errors then delete complete folder
807
814
self .logs .write_log_in_file ("info" , "Download process had an error so complete folder is being deleted." , True )
808
815
self .delete_folder (check_dir )
816
+ success = False
809
817
else :
810
818
if check_files or copy_files :
811
819
if success :
@@ -815,12 +823,18 @@ def __final_process(self, check_files, copy_files, check_dir, version_data, fold
815
823
self .logs .write_log_in_file ("error" ,
816
824
f"{ version_data ['id' ]} version { version_data ['version' ]} - Post-processing script failed." ,
817
825
True )
826
+ success = False
827
+ else :
828
+ success = True
818
829
else :
819
830
self .logs .write_log_in_file ("info" ,
820
831
f"No further processing for { version_data ['id' ]} version { version_data ['version' ]} due to errors." ,
821
832
True )
833
+ success = False
822
834
else :
823
835
self .logs .write_log_in_file ("error" , "Unexpected condidion in final processing. No further actions taken." , True )
836
+ success = False
837
+ return success
824
838
825
839
"""
826
840
Called before articles processing.
@@ -839,9 +853,10 @@ def __initial_process(self):
839
853
return curation_storage_location
840
854
841
855
"""
842
- Process all articles after fetching from API.
856
+ Process all articles after fetching from API. Returns the number of successfully processed articles.
843
857
"""
844
- def process_articles (self , articles , total_file_size ):
858
+ def process_articles (self , articles ):
859
+ processed_count = 0
845
860
curation_storage_location = self .__initial_process ()
846
861
self .logs .write_log_in_file ("info" , "Finding matched articles." , True )
847
862
article_data = self .find_matched_articles (articles )
@@ -859,7 +874,7 @@ def process_articles(self, articles, total_file_size):
859
874
860
875
required_space = curation_folder_size + self .total_all_articles_file_size
861
876
862
- self .logs .write_log_in_file ("info" , f"Total size of aritcles to be processed: { self .total_all_articles_file_size } bytes" , True )
877
+ self .logs .write_log_in_file ("info" , f"Total size of articles to be processed: { self .total_all_articles_file_size } bytes" , True )
863
878
self .logs .write_log_in_file ("info" , f"Total size of the curated folders for the matched articles: { curation_folder_size } bytes" , True )
864
879
self .logs .write_log_in_file ("info" , f"Total space required: { required_space } bytes" , True )
865
880
@@ -905,19 +920,22 @@ def process_articles(self, articles, total_file_size):
905
920
self .logs .write_log_in_file ("error" , f"{ version_data ['id' ]} version { version_data ['version' ]} - "
906
921
+ "Post-processing script error found." , True )
907
922
break
923
+
908
924
# end check main folder exists in preservation storage.
909
925
# check required files exist in curation UAL_RDM folder
910
926
self .logs .write_log_in_file ("info" , "Checking required files exist in associated curation "
911
927
+ f"folder { curation_storage_location } ." , True )
912
928
copy_files = self .__can_copy_files (version_data )
913
- self .__final_process (check_files , copy_files , check_dir , version_data , folder_name , version_no , value_pre_process )
929
+ if self .__final_process (check_files , copy_files , check_dir , version_data , folder_name , version_no , value_pre_process ):
930
+ processed_count += 1
914
931
else :
915
932
self .logs .write_log_in_file ("error" , "Pre-processing script failed. Running post-processing script." , True )
916
933
# call post process script function for each matched item.
917
934
value_post_process = self .processor .post_process_script_function ("Article" , check_dir , value_pre_process )
918
935
if (value_post_process != 0 ):
919
936
self .logs .write_log_in_file ("error" , f"{ version_data ['id' ]} version { version_data ['version' ]} - "
920
937
+ "Post-processing script failed." , True )
938
+ return processed_count
921
939
922
940
"""
923
941
Preservation and Curation directory access check while processing.
0 commit comments