Skip to content

Commit 3e984fd

Browse files
committed
Precreate certain outputs for upload 2.0 API.
Trying to improve the user experience of this rule based uploader by placing HDAs and HDCAs in the history at the outset that the history panel can poll and that we can turn red if the upload fails. From Marius' PR review: > I can see that a job launched in my logs, but it failed and there were no visual indications of this in the UI Not every HDA for instance can be created, for example if reading them from a zip file for instance that happens on the backend still. Likewise if HDCAs don't define a collection type up front they cannot be pre-created (if for instance that is inferred from a folder structure). Library things aren't precreated at all in this commit. There is room to pre-create more but I think this is an atomic commit as it is now and it will hopefully improve the user experience for the rule based uploader considerably.
1 parent 579d643 commit 3e984fd

File tree

6 files changed

+142
-34
lines changed

6 files changed

+142
-34
lines changed

lib/galaxy/tools/actions/upload.py

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
import json
2+
import os
23
import logging
34

5+
from galaxy.dataset_collections.structure import UnitializedTree
46
from galaxy.exceptions import RequestParameterMissingException
57
from galaxy.tools.actions import upload_common
68
from galaxy.util import ExecutionTimer
9+
from galaxy.util.bunch import Bunch
710
from . import ToolAction
811

912
log = logging.getLogger(__name__)
@@ -79,7 +82,67 @@ def replace_file_srcs(request_part):
7982

8083
replace_file_srcs(request)
8184

85+
outputs = []
86+
for target in request.get("targets", []):
87+
destination = target.get("destination")
88+
destination_type = destination.get("type")
89+
# Start by just pre-creating HDAs.
90+
if destination_type == "hdas":
91+
if target.get("elements_from"):
92+
# Dynamic collection required I think.
93+
continue
94+
_precreate_fetched_hdas(trans, history, target, outputs)
95+
96+
if destination_type == "hdca":
97+
_precreate_fetched_collection_instance(trans, history, target, outputs)
98+
8299
incoming["request_json"] = json.dumps(request)
83100
return self._create_job(
84-
trans, incoming, tool, None, [], history=history
101+
trans, incoming, tool, None, outputs, history=history
102+
)
103+
104+
105+
def _precreate_fetched_hdas(trans, history, target, outputs):
106+
for item in target.get("elements", []):
107+
name = item.get("name", None)
108+
if name is None:
109+
src = item.get("src", None)
110+
if src == "url":
111+
url = item.get("url")
112+
if name is None:
113+
name = url.split("/")[-1]
114+
elif src == "path":
115+
path = item["path"]
116+
if name is None:
117+
name = os.path.basename(path)
118+
119+
file_type = item.get("ext", "auto")
120+
dbkey = item.get("dbkey", "?")
121+
uploaded_dataset = Bunch(
122+
type='file', name=name, file_type=file_type, dbkey=dbkey
85123
)
124+
data = upload_common.new_upload(trans, '', uploaded_dataset, library_bunch=None, history=history)
125+
outputs.append(data)
126+
item["object_id"] = data.id
127+
128+
129+
def _precreate_fetched_collection_instance(trans, history, target, outputs):
130+
collection_type = target.get("collection_type")
131+
if not collection_type:
132+
# Can't precreate collections of unknown type at this time.
133+
return
134+
135+
name = target.get("name")
136+
if not name:
137+
return
138+
139+
collections_service = trans.app.dataset_collections_service
140+
collection_type_description = collections_service.collection_type_descriptions.for_collection_type(collection_type)
141+
structure = UnitializedTree(collection_type_description)
142+
hdca = collections_service.precreate_dataset_collection_instance(
143+
trans, history, name, structure=structure
144+
)
145+
outputs.append(hdca)
146+
# Following flushed needed for an ID.
147+
trans.sa_session.flush()
148+
target["destination"]["object_id"] = hdca.id

lib/galaxy/tools/actions/upload_common.py

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ def _chown(path):
384384
return json_file_path
385385

386386

387-
def create_job(trans, params, tool, json_file_path, data_list, folder=None, history=None, job_params=None):
387+
def create_job(trans, params, tool, json_file_path, outputs, folder=None, history=None, job_params=None):
388388
"""
389389
Create the upload job.
390390
"""
@@ -412,21 +412,28 @@ def create_job(trans, params, tool, json_file_path, data_list, folder=None, hist
412412
job.add_parameter(name, value)
413413
job.add_parameter('paramfile', dumps(json_file_path))
414414
object_store_id = None
415-
for i, dataset in enumerate(data_list):
416-
if folder:
417-
job.add_output_library_dataset('output%i' % i, dataset)
415+
for i, output_object in enumerate(outputs):
416+
output_name = "output%i" % i
417+
if hasattr(output_object, "collection"):
418+
job.add_output_dataset_collection(output_name, output_object)
419+
output_object.job = job
418420
else:
419-
job.add_output_dataset('output%i' % i, dataset)
420-
# Create an empty file immediately
421-
if not dataset.dataset.external_filename:
422-
dataset.dataset.object_store_id = object_store_id
423-
try:
424-
trans.app.object_store.create(dataset.dataset)
425-
except ObjectInvalid:
426-
raise Exception('Unable to create output dataset: object store is full')
427-
object_store_id = dataset.dataset.object_store_id
428-
trans.sa_session.add(dataset)
429-
# open( dataset.file_name, "w" ).close()
421+
dataset = output_object
422+
if folder:
423+
job.add_output_library_dataset(output_name, dataset)
424+
else:
425+
job.add_output_dataset(output_name, dataset)
426+
# Create an empty file immediately
427+
if not dataset.dataset.external_filename:
428+
dataset.dataset.object_store_id = object_store_id
429+
try:
430+
trans.app.object_store.create(dataset.dataset)
431+
except ObjectInvalid:
432+
raise Exception('Unable to create output dataset: object store is full')
433+
object_store_id = dataset.dataset.object_store_id
434+
435+
trans.sa_session.add(output_object)
436+
430437
job.object_store_id = object_store_id
431438
job.set_state(job.states.NEW)
432439
job.set_handler(tool.get_job_handler(None))
@@ -440,8 +447,9 @@ def create_job(trans, params, tool, json_file_path, data_list, folder=None, hist
440447
trans.app.job_manager.job_queue.put(job.id, job.tool_id)
441448
trans.log_event("Added job to the job queue, id: %s" % str(job.id), tool_id=job.tool_id)
442449
output = odict()
443-
for i, v in enumerate(data_list):
444-
output['output%i' % i] = v
450+
for i, v in enumerate(outputs):
451+
if not hasattr(output_object, "collection_type"):
452+
output['output%i' % i] = v
445453
return job, output
446454

447455

lib/galaxy/tools/data_fetch.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ def _resolve_src(item):
9898
dbkey = item.get("dbkey", "?")
9999
requested_ext = item.get("ext", "auto")
100100
info = item.get("info", None)
101+
object_id = item.get("object_id", None)
101102
link_data_only = upload_config.link_data_only
102103
if "link_data_only" in item:
103104
# Allow overriding this on a per file basis.
@@ -170,6 +171,8 @@ def _resolve_src(item):
170171
rval = {"name": name, "filename": path, "dbkey": dbkey, "ext": ext, "link_data_only": link_data_only}
171172
if info is not None:
172173
rval["info"] = info
174+
if object_id is not None:
175+
rval["object_id"] = object_id
173176
return rval
174177

175178
elements = elements_tree_map(_resolve_src, items)

lib/galaxy/tools/parameters/output_collect.py

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -218,13 +218,18 @@ def add_elements_to_folder(elements, library_folder):
218218
elif destination_type == "hdca":
219219
history = job.history
220220
assert "collection_type" in unnamed_output_dict
221-
name = unnamed_output_dict.get("name", "unnamed collection")
222-
collection_type = unnamed_output_dict["collection_type"]
223-
collection_type_description = collections_service.collection_type_descriptions.for_collection_type(collection_type)
224-
structure = UnitializedTree(collection_type_description)
225-
hdca = collections_service.precreate_dataset_collection_instance(
226-
trans, history, name, structure=structure
227-
)
221+
object_id = destination.get("object_id")
222+
if object_id:
223+
sa_session = tool.app.model.context
224+
hdca = sa_session.query(app.model.HistoryDatasetCollectionAssociation).get(int(object_id))
225+
else:
226+
name = unnamed_output_dict.get("name", "unnamed collection")
227+
collection_type = unnamed_output_dict["collection_type"]
228+
collection_type_description = collections_service.collection_type_descriptions.for_collection_type(collection_type)
229+
structure = UnitializedTree(collection_type_description)
230+
hdca = collections_service.precreate_dataset_collection_instance(
231+
trans, history, name, structure=structure
232+
)
228233
filenames = odict.odict()
229234

230235
def add_to_discovered_files(elements, parent_identifiers=[]):
@@ -268,6 +273,12 @@ def collect_elements_for_history(elements):
268273
# Create new primary dataset
269274
name = fields_match.name or designation
270275

276+
hda_id = discovered_file.match.object_id
277+
primary_dataset = None
278+
if hda_id:
279+
sa_session = tool.app.model.context
280+
primary_dataset = sa_session.query(app.model.HistoryDatasetAssociation).get(int(hda_id))
281+
271282
dataset = job_context.create_dataset(
272283
ext=ext,
273284
designation=designation,
@@ -276,7 +287,8 @@ def collect_elements_for_history(elements):
276287
name=name,
277288
filename=discovered_file.path,
278289
info=info,
279-
link_data=link_data
290+
link_data=link_data,
291+
primary_data=primary_dataset,
280292
)
281293
dataset.raw_set_dataset_state('ok')
282294
datasets.append(dataset)
@@ -447,14 +459,16 @@ def create_dataset(
447459
info=None,
448460
library_folder=None,
449461
link_data=False,
462+
primary_data=None,
450463
):
451464
app = self.app
452465
sa_session = self.sa_session
453466

454-
if not library_folder:
455-
primary_data = _new_hda(app, sa_session, ext, designation, visible, dbkey, self.permissions)
456-
else:
457-
primary_data = _new_ldda(self.work_context, name, ext, visible, dbkey, library_folder)
467+
if primary_data is None:
468+
if not library_folder:
469+
primary_data = _new_hda(app, sa_session, ext, designation, visible, dbkey, self.permissions)
470+
else:
471+
primary_data = _new_ldda(self.work_context, name, ext, visible, dbkey, library_folder)
458472

459473
# Copy metadata from one of the inputs if requested.
460474
metadata_source = None
@@ -838,6 +852,10 @@ def visible(self):
838852
def link_data(self):
839853
return bool(self.as_dict.get("link_data_only", False))
840854

855+
@property
856+
def object_id(self):
857+
return self.as_dict.get("object_id", None)
858+
841859

842860
class RegexCollectedDatasetMatch(JsonCollectedDatasetMatch):
843861

lib/galaxy/webapps/galaxy/api/_fetch_util.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ def validate_and_normalize_targets(trans, payload):
3737
for target in targets:
3838
destination = _get_required_item(target, "destination", "Each target must specify a 'destination'")
3939
destination_type = _get_required_item(destination, "type", "Each target destination must specify a 'type'")
40+
if "object_id" in destination:
41+
raise RequestParameterInvalidException("object_id not allowed to appear in the request.")
42+
4043
if destination_type not in VALID_DESTINATION_TYPES:
4144
template = "Invalid target destination type [%s] encountered, must be one of %s"
4245
msg = template % (destination_type, VALID_DESTINATION_TYPES)
@@ -63,6 +66,9 @@ def validate_and_normalize_targets(trans, payload):
6366
payload["check_content"] = trans.app.config.check_upload_content
6467

6568
def check_src(item):
69+
if "object_id" in item:
70+
raise RequestParameterInvalidException("object_id not allowed to appear in the request.")
71+
6672
# Normalize file:// URLs into paths.
6773
if item["src"] == "url" and item["url"].startswith("file://"):
6874
item["src"] = "path"

test/integration/test_upload_configuration_options.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -419,9 +419,14 @@ def test_ftp_fetch(self):
419419
"destination": {"type": "hdca"},
420420
"elements": elements,
421421
"collection_type": "list",
422+
"name": "cool collection",
422423
}
423424
response = self.fetch_target(target)
424425
self._assert_status_code_is(response, 200)
426+
response_object = response.json()
427+
assert "output_collections" in response_object
428+
output_collections = response_object["output_collections"]
429+
assert len(output_collections) == 1, response_object
425430
dataset = self.dataset_populator.get_history_dataset_details(self.history_id, hid=2)
426431
self._check_content(dataset, content)
427432

@@ -787,13 +792,18 @@ def test_fetch_history_compressed_type(self):
787792
"history_id": self.history_id, # TODO: Shouldn't be needed :(
788793
"targets": json.dumps(targets),
789794
}
790-
self.dataset_populator.fetch(payload)
795+
fetch_response = self.dataset_populator.fetch(payload)
796+
self._assert_status_code_is(fetch_response, 200)
797+
outputs = fetch_response.json()["outputs"]
798+
assert len(outputs) == 1
799+
output = outputs[0]
800+
assert output["name"] == "1.fastqsanger.gz"
791801
contents_response = self.dataset_populator._get_contents_request(self.history_id)
792802
assert contents_response.status_code == 200
793803
contents = contents_response.json()
794-
assert len(contents) == 1
795-
print(contents)
796-
contents[0]["extension"] == "fastqsanger.gz"
804+
assert len(contents) == 1, contents
805+
assert contents[0]["extension"] == "fastqsanger.gz", contents[0]
806+
assert contents[0]["name"] == "1.fastqsanger.gz", contents[0]
797807

798808
def test_fetch_recursive_archive_history(self):
799809
destination = {"type": "hdas"}

0 commit comments

Comments
 (0)