Skip to content

Commit 17c5b27

Browse files
committed
Re-engineer the datatypes registry so that it is initialized once when the Galaxy server is started, but data types can continue to be loaded throughout the Galaxy server's session (hopefully this doesn't break anything).
Add support for a single "import_module" to be passed to the new load_datatypes() method in the datatypes registry. This provides the ability to load a single class module from an installed tool shed repository along with a datatypes_conf.xml file included in the installed repository and pass them to the new load_datatypes() method. In the future, multiple imported modules may be allowed. The datatypes_conf.xml file included in the repository must conform to a slightly different definition than the same named file that comes with the distribution. This new definition will be documented in the Galaxy tool shed wiki. We now have the ability to load new data types into the Galaxy server from an installed tool shed repository without restarting the Galaxy server.
1 parent 1fb135c commit 17c5b27

File tree

12 files changed

+89
-62
lines changed

12 files changed

+89
-62
lines changed

lib/galaxy/app.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ def __init__( self, **kwargs ):
2222
self.config.check()
2323
config.configure_logging( self.config )
2424
# Set up datatypes registry
25-
self.datatypes_registry = galaxy.datatypes.registry.Registry( self.config.root, self.config.datatypes_config )
25+
self.datatypes_registry = galaxy.datatypes.registry.Registry()
26+
self.datatypes_registry.load_datatypes( self.config.root, self.config.datatypes_config )
2627
galaxy.model.set_datatypes_registry( self.datatypes_registry )
2728
# Set up the tool sheds registry
2829
if os.path.isfile( self.config.tool_sheds_config ):

lib/galaxy/datatypes/registry.py

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ class ConfigurationError( Exception ):
1212
pass
1313

1414
class Registry( object ):
15-
def __init__( self, root_dir=None, config=None ):
15+
def __init__( self ):
1616
self.log = logging.getLogger(__name__)
1717
self.log.addHandler( logging.NullHandler() )
1818
self.datatypes_by_extension = {}
@@ -27,21 +27,33 @@ def __init__( self, root_dir=None, config=None ):
2727
self.sniff_order = []
2828
self.upload_file_formats = []
2929
self.display_applications = odict() #map a display application id to a display application
30-
inherit_display_application_by_class = []
30+
self.datatype_converters_path = None
31+
self.datatype_indexers_path = None
32+
self.display_applications_path = None
33+
def load_datatypes( self, root_dir=None, config=None, imported_module=None ):
3134
if root_dir and config:
35+
inherit_display_application_by_class = []
3236
# Parse datatypes_conf.xml
3337
tree = galaxy.util.parse_xml( config )
3438
root = tree.getroot()
3539
# Load datatypes and converters from config
3640
self.log.debug( 'Loading datatypes from %s' % config )
3741
registration = root.find( 'registration' )
38-
self.datatype_converters_path = os.path.join( root_dir, registration.get( 'converters_path', 'lib/galaxy/datatypes/converters' ) )
39-
self.datatype_indexers_path = os.path.join( root_dir, registration.get( 'indexers_path', 'lib/galaxy/datatypes/indexers' ) )
40-
self.display_applications_path = os.path.join( root_dir, registration.get( 'display_path', 'display_applications' ) )
41-
if not os.path.isdir( self.datatype_converters_path ):
42-
raise ConfigurationError( "Directory does not exist: %s" % self.datatype_converters_path )
43-
if not os.path.isdir( self.datatype_indexers_path ):
44-
raise ConfigurationError( "Directory does not exist: %s" % self.datatype_indexers_path )
42+
# The following implementation implies that only the first datatypes_conf.xml parsed will
43+
# define the various paths. This is probably ok, since we can justifiably require that the
44+
# local datatypes_conf.xml file sets the standard, and all additional datatypes_conf.xml
45+
# files installed with repositories from tool sheds must use the same paths. However, we
46+
# may discover at some future time that allowing for multiple paths is more optimal.
47+
if not self.datatype_converters_path:
48+
self.datatype_converters_path = os.path.join( root_dir, registration.get( 'converters_path', 'lib/galaxy/datatypes/converters' ) )
49+
if not os.path.isdir( self.datatype_converters_path ):
50+
raise ConfigurationError( "Directory does not exist: %s" % self.datatype_converters_path )
51+
if not self.datatype_indexers_path:
52+
self.datatype_indexers_path = os.path.join( root_dir, registration.get( 'indexers_path', 'lib/galaxy/datatypes/indexers' ) )
53+
if not os.path.isdir( self.datatype_indexers_path ):
54+
raise ConfigurationError( "Directory does not exist: %s" % self.datatype_indexers_path )
55+
if not self.display_applications_path:
56+
self.display_applications_path = os.path.join( root_dir, registration.get( 'display_path', 'display_applications' ) )
4557
for elem in registration.findall( 'datatype' ):
4658
try:
4759
extension = elem.get( 'extension', None )
@@ -55,11 +67,14 @@ def __init__( self, root_dir=None, config=None ):
5567
fields = dtype.split( ':' )
5668
datatype_module = fields[0]
5769
datatype_class_name = fields[1]
58-
fields = datatype_module.split( '.' )
59-
module = __import__( fields.pop(0) )
60-
for mod in fields:
61-
module = getattr( module, mod )
62-
datatype_class = getattr( module, datatype_class_name )
70+
if imported_module:
71+
datatype_class = getattr( imported_module, datatype_class_name )
72+
else:
73+
fields = datatype_module.split( '.' )
74+
module = __import__( fields.pop(0) )
75+
for mod in fields:
76+
module = getattr( module, mod )
77+
datatype_class = getattr( module, datatype_class_name )
6378
elif type_extension:
6479
datatype_class = self.datatypes_by_extension[type_extension].__class__
6580
if make_subclass:
@@ -252,10 +267,8 @@ def append_to_sniff_order():
252267
if not included:
253268
self.sniff_order.append(datatype)
254269
append_to_sniff_order()
255-
256270
def get_available_tracks(self):
257271
return self.available_tracks
258-
259272
def get_mimetype_by_extension(self, ext, default = 'application/octet-stream' ):
260273
"""Returns a mimetype based on an extension"""
261274
try:
@@ -265,15 +278,13 @@ def get_mimetype_by_extension(self, ext, default = 'application/octet-stream' ):
265278
mimetype = default
266279
self.log.warning('unknown mimetype in data factory %s' % ext)
267280
return mimetype
268-
269281
def get_datatype_by_extension(self, ext ):
270282
"""Returns a datatype based on an extension"""
271283
try:
272284
builder = self.datatypes_by_extension[ext]
273285
except KeyError:
274286
builder = data.Text()
275287
return builder
276-
277288
def change_datatype(self, data, ext, set_meta = True ):
278289
data.extension = ext
279290
# call init_meta and copy metadata from itself. The datatype
@@ -287,15 +298,13 @@ def change_datatype(self, data, ext, set_meta = True ):
287298
data.set_meta( overwrite = False )
288299
data.set_peek()
289300
return data
290-
291301
def old_change_datatype(self, data, ext):
292302
"""Creates and returns a new datatype based on an existing data and an extension"""
293303
newdata = factory(ext)(id=data.id)
294304
for key, value in data.__dict__.items():
295305
setattr(newdata, key, value)
296306
newdata.ext = ext
297307
return newdata
298-
299308
def load_datatype_converters( self, toolbox ):
300309
"""Adds datatype converters from self.converters to the calling app's toolbox"""
301310
for elem in self.converters:
@@ -312,7 +321,6 @@ def load_datatype_converters( self, toolbox ):
312321
self.log.debug( "Loaded converter: %s", converter.id )
313322
except:
314323
self.log.exception( "error reading converter from path: %s" % converter_path )
315-
316324
def load_external_metadata_tool( self, toolbox ):
317325
"""Adds a tool which is used to set external metadata"""
318326
#we need to be able to add a job to the queue to set metadata. The queue will currently only accept jobs with an associated tool.
@@ -337,7 +345,6 @@ def load_external_metadata_tool( self, toolbox ):
337345
toolbox.tools_by_id[ set_meta_tool.id ] = set_meta_tool
338346
self.set_external_metadata_tool = set_meta_tool
339347
self.log.debug( "Loaded external metadata tool: %s", self.set_external_metadata_tool.id )
340-
341348
def load_datatype_indexers( self, toolbox ):
342349
"""Adds indexers from self.indexers to the toolbox from app"""
343350
for elem in self.indexers:
@@ -347,7 +354,6 @@ def load_datatype_indexers( self, toolbox ):
347354
toolbox.tools_by_id[indexer.id] = indexer
348355
self.datatype_indexers[datatype] = indexer
349356
self.log.debug( "Loaded indexer: %s", indexer.id )
350-
351357
def get_converters_by_datatype(self, ext):
352358
"""Returns available converters by source type"""
353359
converters = odict()
@@ -360,7 +366,6 @@ def get_converters_by_datatype(self, ext):
360366
if ext in self.datatype_converters.keys():
361367
converters.update(self.datatype_converters[ext])
362368
return converters
363-
364369
def get_indexers_by_datatype( self, ext ):
365370
"""Returns indexers based on datatype"""
366371
class_chain = list()
@@ -373,14 +378,12 @@ def get_indexers_by_datatype( self, ext ):
373378
ext2type = lambda x: self.get_datatype_by_extension(x)
374379
class_chain = sorted(class_chain, lambda x,y: issubclass(ext2type(x),ext2type(y)) and -1 or 1)
375380
return [self.datatype_indexers[x] for x in class_chain]
376-
377381
def get_converter_by_target_type(self, source_ext, target_ext):
378382
"""Returns a converter based on source and target datatypes"""
379383
converters = self.get_converters_by_datatype(source_ext)
380384
if target_ext in converters.keys():
381385
return converters[target_ext]
382386
return None
383-
384387
def find_conversion_destination_for_dataset_by_extensions( self, dataset, accepted_formats, converter_safe = True ):
385388
"""Returns ( target_ext, existing converted dataset )"""
386389
for convert_ext in self.get_converters_by_datatype( dataset.ext ):
@@ -394,10 +397,8 @@ def find_conversion_destination_for_dataset_by_extensions( self, dataset, accept
394397
ret_data = None
395398
return ( convert_ext, ret_data )
396399
return ( None, None )
397-
398400
def get_composite_extensions( self ):
399401
return [ ext for ( ext, d_type ) in self.datatypes_by_extension.iteritems() if d_type.composite_type is not None ]
400-
401402
def get_upload_metadata_params( self, context, group, tool ):
402403
"""Returns dict of case value:inputs for metadata conditional for upload tool"""
403404
rval = {}
@@ -413,4 +414,3 @@ def get_upload_metadata_params( self, context, group, tool ):
413414
if 'auto' not in rval and 'txt' in rval: #need to manually add 'auto' datatype
414415
rval[ 'auto' ] = rval[ 'txt' ]
415416
return rval
416-

lib/galaxy/datatypes/sniff.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ def guess_ext( fname, sniff_order=None, is_multi_byte=False ):
280280
"""
281281
if sniff_order is None:
282282
datatypes_registry = registry.Registry()
283+
datatypes_registry.load_datatypes()
283284
sniff_order = datatypes_registry.sniff_order
284285
for datatype in sniff_order:
285286
"""

lib/galaxy/model/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@
2525

2626
log = logging.getLogger( __name__ )
2727

28-
datatypes_registry = galaxy.datatypes.registry.Registry() #Default Value Required for unit tests
28+
datatypes_registry = galaxy.datatypes.registry.Registry()
29+
# Default Value Required for unit tests
30+
datatypes_registry.load_datatypes()
2931

3032
class NoConverterException(Exception):
3133
def __init__(self, value):

lib/galaxy/tools/parameters/basic.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1307,7 +1307,9 @@ def __init__( self, tool, elem ):
13071307
if tool is None:
13081308
#This occurs for things such as unit tests
13091309
import galaxy.datatypes.registry
1310-
formats.append( galaxy.datatypes.registry.Registry().get_datatype_by_extension( extension.lower() ).__class__ )
1310+
datatypes_registry = galaxy.datatypes.registry.Registry()
1311+
datatypes_registry.load_datatypes()
1312+
formats.append( datatypes_registry.get_datatype_by_extension( extension.lower() ).__class__ )
13111313
else:
13121314
formats.append( tool.app.datatypes_registry.get_datatype_by_extension( extension.lower() ).__class__ )
13131315
self.formats = tuple( formats )

lib/galaxy/util/none_like.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ class NoneDataset( RecursiveNone ):
2020
def __init__( self, datatypes_registry = None, ext = 'data', dbkey = '?' ):
2121
self.ext = self.extension = ext
2222
self.dbkey = dbkey
23-
if datatypes_registry is None: datatypes_registry = Registry()
23+
if datatypes_registry is None:
24+
datatypes_registry = Registry()
25+
datatypes_registry.load_datatypes()
2426
self.datatype = datatypes_registry.get_datatype_by_extension( ext )
2527
self._metadata = None
2628
self.metadata = MetadataCollection( self )

lib/galaxy/web/controllers/admin.py

Lines changed: 36 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from galaxy.web.framework.helpers import time_ago, iff, grids
55
from galaxy.tools.search import ToolBoxSearch
66
from galaxy.tools import ToolSection, json_fix
7-
from galaxy.util import inflector
7+
from galaxy.util import parse_xml, inflector
88
import logging
99
log = logging.getLogger( __name__ )
1010

@@ -874,8 +874,7 @@ def install_tool_shed_repository( self, trans, **kwd ):
874874
tmp_stderr.close()
875875
if returncode == 0:
876876
# Load data types required by tools.
877-
# TODO: uncomment the following when we're ready...
878-
#self.__load_datatypes( trans, repo_files_dir )
877+
self.__load_datatypes( trans, repo_files_dir )
879878
# Load tools and tool data files required by them.
880879
sample_files, repository_tools_tups = self.__get_repository_tools_and_sample_files( trans, tool_path, repo_files_dir )
881880
if repository_tools_tups:
@@ -898,9 +897,8 @@ def install_tool_shed_repository( self, trans, **kwd ):
898897
persisted_new_tool_section.write( new_tool_section )
899898
persisted_new_tool_section.close()
900899
# Parse the persisted tool panel section
901-
tree = ElementTree.parse( tmp_name )
900+
tree = parse_xml( tmp_name )
902901
root = tree.getroot()
903-
ElementInclude.include( root )
904902
# Load the tools in the section into the tool panel.
905903
trans.app.toolbox.load_section_tag_set( root, trans.app.toolbox.tool_panel, tool_path )
906904
# Remove the temporary file
@@ -1186,35 +1184,49 @@ def __load_datatypes( self, trans, repo_files_dir ):
11861184
datatypes_config = os.path.abspath( os.path.join( root, name ) )
11871185
break
11881186
if datatypes_config:
1187+
imported_module = None
11891188
# Parse datatypes_config.
1190-
tree = ElementTree.parse( datatypes_config )
1191-
root = tree.getroot()
1192-
ElementInclude.include( root )
1193-
datatype_files = root.find( 'datatype_files' )
1189+
tree = parse_xml( datatypes_config )
1190+
datatypes_config_root = tree.getroot()
1191+
relative_path_to_datatype_file_name = None
1192+
datatype_files = datatypes_config_root.find( 'datatype_files' )
1193+
# Currently only a single datatype_file is supported. For example:
1194+
# <datatype_files>
1195+
# <datatype_file name="gmap.py"/>
1196+
# </datatype_files>
11941197
for elem in datatype_files.findall( 'datatype_file' ):
11951198
datatype_file_name = elem.get( 'name', None )
11961199
if datatype_file_name:
11971200
# Find the file in the installed repository.
1198-
relative_path = None
11991201
for root, dirs, files in os.walk( repo_files_dir ):
12001202
if root.find( '.hg' ) < 0:
12011203
for name in files:
12021204
if name == datatype_file_name:
1203-
relative_path = os.path.join( root, name )
1205+
relative_path_to_datatype_file_name = os.path.join( root, name )
12041206
break
1205-
relative_head, relative_tail = os.path.split( relative_path )
1206-
# TODO: get the import_module by parsing the <registration><datatype> tags
1207-
if datatype_file_name.find( '.' ) > 0:
1208-
import_module = datatype_file_name.split( '.' )[ 0 ]
1209-
else:
1210-
import_module = datatype_file_name
1211-
try:
1212-
sys.path.insert( 0, relative_head )
1213-
module = __import__( import_module )
1214-
sys.path.pop( 0 )
1215-
except Exception, e:
1216-
log.debug( "Exception importing datatypes code file included in installed repository: %s" % str( e ) )
1217-
trans.app.datatypes_registry = galaxy.datatypes.registry.Registry( trans.app.config.root, datatypes_config )
1207+
break
1208+
if relative_path_to_datatype_file_name:
1209+
relative_head, relative_tail = os.path.split( relative_path_to_datatype_file_name )
1210+
registration = datatypes_config_root.find( 'registration' )
1211+
# Get the module by parsing the <datatype> tag.
1212+
for elem in registration.findall( 'datatype' ):
1213+
# A 'type' attribute is currently required. The attribute
1214+
# should be something like: type="gmap:GmapDB".
1215+
dtype = elem.get( 'type', None )
1216+
if dtype:
1217+
fields = dtype.split( ':' )
1218+
datatype_module = fields[0]
1219+
datatype_class_name = fields[1]
1220+
# Since we currently support only a single datatype_file,
1221+
# we have what we need.
1222+
break
1223+
try:
1224+
sys.path.insert( 0, relative_head )
1225+
imported_module = __import__( datatype_module )
1226+
sys.path.pop( 0 )
1227+
except Exception, e:
1228+
log.debug( "Exception importing datatypes code file included in installed repository: %s" % str( e ) )
1229+
trans.app.datatypes_registry.load_datatypes( root_dir=trans.app.config.root, config=datatypes_config, imported_module=imported_module )
12181230
def __get_repository_tools_and_sample_files( self, trans, tool_path, repo_files_dir ):
12191231
# The sample_files list contains all files whose name ends in .sample
12201232
sample_files = []

lib/galaxy/webapps/community/app.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ def __init__( self, **kwargs ):
1515
self.config.check()
1616
config.configure_logging( self.config )
1717
# Set up datatypes registry
18-
self.datatypes_registry = galaxy.datatypes.registry.Registry( self.config.root, self.config.datatypes_config )
18+
self.datatypes_registry = galaxy.datatypes.registry.Registry()
19+
self.datatypes_registry.load_datatypes( self.config.root, self.config.datatypes_config )
1920
# Determine the database url
2021
if self.config.database_connection:
2122
db_url = self.config.database_connection

scripts/functional_tests.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,9 @@ def main():
244244
else:
245245
# FIXME: This doesn't work at all now that toolbox requires an 'app' instance
246246
# (to get at datatypes, might just pass a datatype registry directly)
247-
my_app = bunch.Bunch( datatypes_registry = galaxy.datatypes.registry.Registry() )
247+
datatypes_registry = galaxy.datatypes.registry.Registry()
248+
datatypes_registry.load_datatypes()
249+
my_app = bunch.Bunch( datatypes_registry )
248250
test_toolbox.toolbox = tools.ToolBox( 'tool_conf.xml.test', 'tools', my_app )
249251

250252
# ---- Find tests ---------------------------------------------------------

0 commit comments

Comments
 (0)