Skip to content

Commit e6be28a

Browse files
committed
WIP: CWL record inputs and outputs as well as shell tools.
1 parent 4a74ad9 commit e6be28a

File tree

9 files changed

+191
-49
lines changed

9 files changed

+191
-49
lines changed

lib/galaxy/tools/cwl/parser.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
"DockerRequirement",
4040
"EnvVarRequirement",
4141
"InlineJavascriptRequirement",
42+
"ShellCommandRequirement",
4243
]
4344

4445

@@ -703,6 +704,7 @@ def _simple_field_to_input_type_kwds(field, field_type=None):
703704
"double": INPUT_TYPE.INTEGER,
704705
"string": INPUT_TYPE.TEXT,
705706
"boolean": INPUT_TYPE.BOOLEAN,
707+
"record": INPUT_TYPE.DATA_COLLECTON,
706708
}
707709

708710
if field_type is None:
@@ -766,6 +768,7 @@ def _simple_field_to_output(field):
766768
BOOLEAN="boolean",
767769
SELECT="select",
768770
CONDITIONAL="conditional",
771+
DATA_COLLECTON="data_collection",
769772
)
770773

771774

@@ -848,6 +851,9 @@ def to_dict(self, itemwise=True):
848851
as_dict["value"] = "0"
849852
if self.input_type == INPUT_TYPE.FLOAT:
850853
as_dict["value"] = "0.0"
854+
elif self.input_type == INPUT_TYPE.DATA_COLLECTON:
855+
as_dict["collection_type"] = "record"
856+
851857
return as_dict
852858

853859

@@ -857,13 +863,15 @@ def to_dict(self, itemwise=True):
857863
)
858864

859865

866+
# TODO: Different subclasses - this is representing different types of things.
860867
class OutputInstance(object):
861868

862-
def __init__(self, name, output_data_type, output_type, path=None):
869+
def __init__(self, name, output_data_type, output_type, path=None, fields=None):
863870
self.name = name
864871
self.output_data_type = output_data_type
865872
self.output_type = output_type
866873
self.path = path
874+
self.fields = fields
867875

868876

869877
__all__ = (

lib/galaxy/tools/cwl/representation.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
'float': 'float',
2020
'data': 'File',
2121
'boolean': 'boolean',
22-
'text': 'text'
22+
'text': 'text',
23+
'data_collection': 'data_collection',
2324
}
2425

2526

@@ -35,15 +36,8 @@ def to_cwl_job(tool, param_dict, local_working_directory):
3536
def simple_value(input, param_dict_value, cwl_type=None):
3637
# Hmm... cwl_type isn't really the cwl type in every case,
3738
# like in the case of json for instance.
38-
if cwl_type is None:
39-
input_type = input.type
40-
cwl_type = GALAXY_TO_CWL_TYPES[input_type]
4139

42-
if cwl_type == "null":
43-
assert param_dict_value is None
44-
return None
45-
if cwl_type == "File":
46-
dataset_wrapper = param_dict_value
40+
def dataset_wrapper_to_file_json(dataset_wrapper):
4741
extra_files_path = dataset_wrapper.extra_files_path
4842
secondary_files_path = os.path.join(extra_files_path, "__secondary_files__")
4943
path = str(dataset_wrapper)
@@ -59,6 +53,17 @@ def simple_value(input, param_dict_value, cwl_type=None):
5953

6054
return {"location": path,
6155
"class": "File"}
56+
57+
if cwl_type is None:
58+
input_type = input.type
59+
cwl_type = GALAXY_TO_CWL_TYPES[input_type]
60+
61+
if cwl_type == "null":
62+
assert param_dict_value is None
63+
return None
64+
if cwl_type == "File":
65+
dataset_wrapper = param_dict_value
66+
return dataset_wrapper_to_file_json(dataset_wrapper)
6267
elif cwl_type == "integer":
6368
return int(str(param_dict_value))
6469
elif cwl_type == "long":
@@ -75,6 +80,11 @@ def simple_value(input, param_dict_value, cwl_type=None):
7580
raw_value = param_dict_value.value
7681
log.info("raw_value is %s (%s)" % (raw_value, type(raw_value)))
7782
return json.loads(raw_value)
83+
elif cwl_type == "data_collection":
84+
rval = dict() # TODO: THIS NEEDS TO BE ORDERED BUT odict not json serializable!
85+
for key, value in param_dict_value.items():
86+
rval[key] = dataset_wrapper_to_file_json(value)
87+
return rval
7888
else:
7989
return str(param_dict_value)
8090

lib/galaxy/tools/cwl/runtime_actions.py

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -24,30 +24,41 @@ def handle_outputs(job_directory=None):
2424
job_proxy = load_job_proxy(job_directory, strict_cwl_validation=False)
2525
tool_working_directory = os.path.join(job_directory, "working")
2626
outputs = job_proxy.collect_outputs(tool_working_directory)
27+
28+
def handle_output_location(output, target_path):
29+
output_path = ref_resolver.uri_file_path(output["location"])
30+
if output["class"] != "File":
31+
open("galaxy.json", "w").write(json.dump({
32+
"dataset_id": job_proxy.output_id(output_name),
33+
"type": "dataset",
34+
"ext": "expression.json",
35+
}))
36+
shutil.move(output_path, target_path)
37+
for secondary_file in output.get("secondaryFiles", []):
38+
# TODO: handle nested files...
39+
secondary_file_path = ref_resolver.uri_file_path(secondary_file["location"])
40+
assert secondary_file_path.startswith(output_path)
41+
secondary_file_name = secondary_file_path[len(output_path):]
42+
secondary_files_dir = job_proxy.output_secondary_files_dir(
43+
output_name, create=True
44+
)
45+
extra_target = os.path.join(secondary_files_dir, secondary_file_name)
46+
shutil.move(
47+
secondary_file_path,
48+
extra_target,
49+
)
50+
2751
for output_name, output in outputs.items():
28-
target_path = job_proxy.output_path(output_name)
2952
if isinstance(output, dict) and "location" in output:
30-
output_path = ref_resolver.uri_file_path(output["location"])
31-
if output["class"] != "File":
32-
open("galaxy.json", "w").write(json.dump({
33-
"dataset_id": job_proxy.output_id(output_name),
34-
"type": "dataset",
35-
"ext": "expression.json",
36-
}))
37-
shutil.move(output_path, target_path)
38-
for secondary_file in output.get("secondaryFiles", []):
39-
# TODO: handle nested files...
40-
secondary_file_path = ref_resolver.uri_file_path(secondary_file["location"])
41-
assert secondary_file_path.startswith(output_path)
42-
secondary_file_name = secondary_file_path[len(output_path):]
43-
secondary_files_dir = job_proxy.output_secondary_files_dir(
44-
output_name, create=True
45-
)
46-
extra_target = os.path.join(secondary_files_dir, secondary_file_name)
47-
shutil.move(
48-
secondary_file_path,
49-
extra_target,
50-
)
53+
target_path = job_proxy.output_path(output_name)
54+
handle_output_location(output, target_path)
55+
elif isinstance(output, dict):
56+
prefix = "%s|__part__|" % output_name
57+
for record_key, record_value in output.items():
58+
record_value_output_key = "%s%s" % (prefix, record_key)
59+
target_path = job_proxy.output_path(record_value_output_key)
60+
61+
handle_output_location(record_value, target_path)
5162
else:
5263
with open(target_path, "w") as f:
5364
f.write(json.dumps(output))

lib/galaxy/tools/parser/cwl.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
from .interface import ToolSource
1010
from .interface import ToolStdioExitCode
1111
from .output_actions import ToolOutputActionGroup
12+
from .output_objects import ToolOutputCollection
13+
from .output_objects import ToolOutputCollectionStructure
1214
from .output_objects import ToolOutput
1315
from .yaml import YamlInputSource
1416

@@ -106,15 +108,28 @@ def parse_input_pages(self):
106108
def parse_outputs(self, tool):
107109
output_instances = self.tool_proxy.output_instances()
108110
outputs = odict()
111+
output_collections = odict()
109112
output_defs = []
110113
for output_instance in output_instances:
111114
output_defs.append(self._parse_output(tool, output_instance))
115+
112116
# TODO: parse outputs collections
113117
for output_def in output_defs:
114-
outputs[output_def.name] = output_def
115-
return outputs, odict()
118+
if isinstance(output_def, ToolOutput):
119+
outputs[output_def.name] = output_def
120+
else:
121+
outputs[output_def.name] = output_def
122+
output_collections[output_def.name] = output_def
123+
return outputs, output_collections
116124

117125
def _parse_output(self, tool, output_instance):
126+
output_type = output_instance.output_data_type
127+
if isinstance(output_type, dict) and output_type.get("type") == "record":
128+
return self._parse_output_record(tool, output_instance)
129+
else:
130+
return self._parse_output_data(tool, output_instance)
131+
132+
def _parse_output_data(self, tool, output_instance):
118133
name = output_instance.name
119134
# TODO: handle filters, actions, change_format
120135
output = ToolOutput( name )
@@ -135,6 +150,20 @@ def _parse_output(self, tool, output_instance):
135150
output.actions = ToolOutputActionGroup( output, None )
136151
return output
137152

153+
def _parse_output_record(self, tool, output_instance):
154+
name = output_instance.name
155+
# TODO: clean output bindings and other non-structure information
156+
# from this.
157+
fields = output_instance.output_data_type.get("fields")
158+
output_collection = ToolOutputCollection(
159+
name,
160+
ToolOutputCollectionStructure(
161+
collection_type="record",
162+
fields=fields,
163+
),
164+
)
165+
return output_collection
166+
138167
def parse_requirements_and_containers(self):
139168
containers = []
140169
docker_identifier = self.tool_proxy.docker_identifier()

lib/galaxy/tools/parser/output_objects.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ def __init__(
201201
collection_type_source=None,
202202
structured_like=None,
203203
dataset_collector_descriptions=None,
204+
fields=None,
204205
):
205206
self.collection_type = collection_type
206207
self.collection_type_source = collection_type_source
@@ -213,6 +214,10 @@ def __init__(
213214
raise ValueError( "Output collection types must be specify type of structured_like" )
214215
if dataset_collector_descriptions and structured_like:
215216
raise ValueError( "Cannot specify dynamic structure (discovered_datasets) and structured_like attribute." )
217+
if collection_type == "record" and fields is None:
218+
raise ValueError( "If record outputs are defined, fields must be defined as well." )
219+
if fields is not None and collection_type != "record":
220+
raise ValueError( "If fields are specified for outputs, the collection type must be record." )
216221
self.dynamic = dataset_collector_descriptions is not None
217222

218223
def collection_prototype( self, inputs, type_registry ):

lib/galaxy/tools/wrappers.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,9 @@ def keys( self ):
420420
return []
421421
return self.__element_instances.keys()
422422

423+
def items( self ):
424+
return self.__element_instances.items()
425+
423426
@property
424427
def is_collection( self ):
425428
return True

test/api/test_tools_cwl.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,18 @@ def test_parse_int_tool( self ):
316316
output1 = self.dataset_populator.get_history_dataset_details( run_object.history_id, hid=2 )
317317
self.assertEquals(output1["extension"], "expression.json")
318318

319+
@skip_without_tool( "record-output" )
320+
def test_record_output( self ):
321+
run_object = self.dataset_populator.run_cwl_tool( "record-output", "test/functional/tools/cwl_tools/v1.0/record-output-job.json")
322+
result_record = run_object.output_collection(0)
323+
assert result_record["collection_type"] == "record"
324+
record_elements = result_record["elements"]
325+
first_element = record_elements[0]
326+
assert first_element["element_identifier"] == "ofoo"
327+
first_hda = first_element["object"]
328+
output1_content = self.dataset_populator.get_history_dataset_content( run_object.history_id, hid=first_hda["hid"] )
329+
assert "Call me Ishmael." in output1_content, "Expected contents of whale.txt, got [%s]" % output1_content
330+
319331
# def test_dynamic_tool_execution( self ):
320332
# workflow_tool_json = {
321333
# 'inputs': [{'inputBinding': {}, 'type': 'File', 'id': 'file:///home/john/workspace/galaxy/test/unit/tools/cwl_tools/draft3/count-lines2-wf.cwl#step1/wc/wc_file1'}],

0 commit comments

Comments
 (0)