Implement CWL filename tracking...

jmchilton · jmchilton · commit 67ffc5573f45 · 2017-08-05T07:35:24.000-04:00
diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py
@@ -1322,7 +1322,7 @@ def path_rewriter( path ):
                             dataset.set_peek( is_multi_byte=True )
                         else:
                             dataset.set_peek()
-                    for context_key in ['name', 'info', 'dbkey']:
+                    for context_key in ['name', 'info', 'dbkey', 'cwl_filename']:
                         if context_key in context:
                             context_value = context[context_key]
                             setattr(dataset, context_key, context_value)
diff --git a/lib/galaxy/managers/hdas.py b/lib/galaxy/managers/hdas.py
@@ -299,7 +299,8 @@ def __init__( self, app ):
 
             'annotation',
 
-            'api_type'
+            'api_type',
+            'cwl_file_name',
         ], include_keys_from='summary' )
 
         self.add_view( 'extended', [
@@ -353,7 +354,8 @@ def add_serializers( self ):
             # TODO: to DatasetAssociationSerializer
             'accessible'    : lambda i, k, user=None, **c: self.manager.is_accessible( i, user ),
             'api_type'      : lambda *a, **c: 'file',
-            'type'          : lambda *a, **c: 'file'
+            'type'          : lambda *a, **c: 'file',
+            'cwl_file_name' : lambda i, k, **c: i.cwl_filename,
         })
 
     def serialize( self, hda, keys, user=None, **context ):
diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py
@@ -2069,6 +2069,19 @@ def has_data( self ):
         """Detects whether there is any data"""
         return self.dataset.has_data()
 
+    def get_cwl_filename( self ):
+        return self.dataset.cwl_filename
+
+    def set_cwl_filename( self, cwl_filename ):
+        # This should be a write-once property intrinsic to the underlying
+        # dataset for pure CWL workflows. We may wish to revisit that for
+        # usability longer term.
+        if self.dataset.cwl_filename is not None:
+            raise Exception("Underlying dataset already has a cwlfilename set.")
+        self.dataset.cwl_filename = cwl_filename
+
+    cwl_filename = property( get_cwl_filename, set_cwl_filename )
+
     def get_raw_data( self ):
         """Returns the full data. To stream it open the file_name and read/write as needed"""
         return self.datatype.get_raw_data( self )
diff --git a/lib/galaxy/model/mapping.py b/lib/galaxy/model/mapping.py
@@ -176,6 +176,7 @@
     Column( "object_store_id", TrimmedString( 255 ), index=True ),
     Column( "external_filename", TEXT ),
     Column( "_extra_files_path", TEXT ),
+    Column( "cwl_filename", TEXT ),
     Column( 'file_size', Numeric( 15, 0 ) ),
     Column( 'total_size', Numeric( 15, 0 ) ),
     Column( 'uuid', UUIDType() ) )
diff --git a/lib/galaxy/model/migrate/versions/0138_cwl_state.py b/lib/galaxy/model/migrate/versions/0138_cwl_state.py
@@ -4,7 +4,7 @@
 import datetime
 import logging
 
-from sqlalchemy import Integer, Column, MetaData, Table
+from sqlalchemy import Integer, Column, MetaData, Table, TEXT
 from galaxy.model.custom_types import JSONType
 
 now = datetime.datetime.utcnow
@@ -19,8 +19,12 @@ def upgrade(migrate_engine):
 
     cwl_command_column = Column( "cwl_command_state", JSONType, default=True )
     cwl_command_version_column = Column( "cwl_command_state_version", Integer, default=True )
+
+    cwl_file_name = Column( "cwl_filename", TEXT, default=None, )
+
     __add_column( cwl_command_column, "job", metadata )
     __add_column( cwl_command_version_column, "job", metadata )
+    __add_column( cwl_file_name, "dataset", metadata )
 
 
 def downgrade(migrate_engine):
@@ -30,6 +34,8 @@ def downgrade(migrate_engine):
     __drop_column( "cwl_command_state", "job", metadata )
     __drop_column( "cwl_command_state_version", "job", metadata )
 
+    __drop_column( "cwl_filename", "dataset", metadata )
+
 
 def __add_column(column, table_name, metadata, **kwds):
     try:
diff --git a/lib/galaxy/tools/cwl/runtime_actions.py b/lib/galaxy/tools/cwl/runtime_actions.py
@@ -27,12 +27,21 @@ def handle_outputs(job_directory=None):
 
     def handle_output_location(output, target_path):
         output_path = ref_resolver.uri_file_path(output["location"])
-        if output["class"] != "File":
-            open("galaxy.json", "w").write(json.dump({
-                "dataset_id": job_proxy.output_id(output_name),
-                "type": "dataset",
-                "ext": "expression.json",
-            }))
+        with open("galaxy.json", "a+") as f:
+            if output["class"] != "File":
+                json.dump({
+                    "dataset_id": job_proxy.output_id(output_name),
+                    "type": "dataset",
+                    "ext": "expression.json",
+                }, f)
+            else:
+                json.dump({
+                    "dataset_id": job_proxy.output_id(output_name),
+                    "type": "dataset",
+                    "cwl_filename": output["basename"],
+                }, f)
+            f.write("\n")
+
         shutil.move(output_path, target_path)
         for secondary_file in output.get("secondaryFiles", []):
             # TODO: handle nested files...
diff --git a/test/api/test_tools_cwl.py b/test/api/test_tools_cwl.py
@@ -135,15 +135,18 @@ def _get_job_stdout(self, job_id):
 
     @skip_without_tool( "cat3-tool" )
     def test_cat3( self ):
-        history_id = self.dataset_populator.new_history()
-        hda1 = _dataset_to_param( self.dataset_populator.new_dataset( history_id, content='1\t2\t3' ) )
-        inputs = {
-            "f1": hda1,
-        }
-        response = self._run( "cat3-tool", history_id, inputs, assert_ok=True )
-        output1 = response[ "outputs" ][ 0 ]
-        output1_content = self.dataset_populator.get_history_dataset_content( history_id, dataset=output1 )
-        assert output1_content == "1\t2\t3\n", output1_content
+        with self.dataset_populator.test_history() as history_id:
+            hda1 = _dataset_to_param( self.dataset_populator.new_dataset( history_id, content='1\t2\t3' ) )
+            inputs = {
+                "f1": hda1,
+            }
+            response = self._run( "cat3-tool", history_id, inputs, assert_ok=True )
+            output1 = response[ "outputs" ][ 0 ]
+            output1_details = self.dataset_populator.get_history_dataset_details( history_id, dataset=output1 )
+            assert "cwl_file_name" in output1_details, output1_details.keys()
+            assert output1_details["cwl_file_name"] == "output.txt", output1_details["cwl_file_name"]
+            output1_content = self.dataset_populator.get_history_dataset_content( history_id, dataset=output1 )
+            assert output1_content == "1\t2\t3\n", output1_content
 
     @skip_without_tool( "sorttool" )
     def test_sorttool( self ):