Adds support for module:task and updates gymnasium to >=1.0 (#2467)

kellyguo11 · web-flow · commit 9f1aa4cdefa3 · 2025-06-05T22:16:03.000-07:00
# Description Gymnasium 1.0 introduced support for specifying module:task to automatically import modules instead of pre-importing task modules. This PR adds support for this feature and enforces the gymnasium version to be >= 1.0. ## Type of change - New feature (non-breaking change which adds functionality) ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [x] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [ ] I have added my name to the `CONTRIBUTORS.md` or my name already exists there  --------- Signed-off-by: Kelly Guo <kellyg@nvidia.com>
diff --git a/scripts/imitation_learning/isaaclab_mimic/annotate_demos.py b/scripts/imitation_learning/isaaclab_mimic/annotate_demos.py
@@ -174,13 +174,13 @@ def main():
         os.makedirs(output_dir)
 
     if args_cli.task is not None:
-        env_name = args_cli.task
+        env_name = args_cli.task.split(":")[-1]
     if env_name is None:
         raise ValueError("Task/env name was not specified nor found in the dataset.")
 
     env_cfg = parse_env_cfg(env_name, device=args_cli.device, num_envs=1)
 
-    env_cfg.env_name = args_cli.task
+    env_cfg.env_name = env_name
 
     # extract success checking function to invoke manually
     success_term = None
diff --git a/scripts/imitation_learning/isaaclab_mimic/consolidated_demo.py b/scripts/imitation_learning/isaaclab_mimic/consolidated_demo.py
@@ -366,7 +366,7 @@ def main():
 
     # get the environment name
     if args_cli.task is not None:
-        env_name = args_cli.task
+        env_name = args_cli.task.split(":")[-1]
     elif args_cli.input_file:
         # if the environment name is not specified, try to get it from the dataset file
         dataset_file_handler = HDF5DatasetFileHandler()
@@ -406,7 +406,7 @@ def main():
         env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_SUCCEEDED_ONLY
 
     # create environment
-    env = gym.make(env_name, cfg=env_cfg)
+    env = gym.make(args_cli.task, cfg=env_cfg)
 
     if not isinstance(env.unwrapped, ManagerBasedRLMimicEnv):
         raise ValueError("The environment should be derived from ManagerBasedRLMimicEnv")
diff --git a/scripts/imitation_learning/isaaclab_mimic/generate_dataset.py b/scripts/imitation_learning/isaaclab_mimic/generate_dataset.py
@@ -86,7 +86,10 @@ def main():
 
     # Setup output paths and get env name
     output_dir, output_file_name = setup_output_paths(args_cli.output_file)
-    env_name = args_cli.task or get_env_name_from_dataset(args_cli.input_file)
+    task_name = args_cli.task
+    if task_name:
+        task_name = args_cli.task.split(":")[-1]
+    env_name = task_name or get_env_name_from_dataset(args_cli.input_file)
 
     # Configure environment
     env_cfg, success_term = setup_env_config(
diff --git a/scripts/imitation_learning/robomimic/train.py b/scripts/imitation_learning/robomimic/train.py
@@ -359,15 +359,16 @@ def main(args: argparse.Namespace):
     if args.task is not None:
         # obtain the configuration entry point
         cfg_entry_point_key = f"robomimic_{args.algo}_cfg_entry_point"
+        task_name = args.task.split(":")[-1]
 
-        print(f"Loading configuration for task: {args.task}")
+        print(f"Loading configuration for task: {task_name}")
         print(gym.envs.registry.keys())
         print(" ")
-        cfg_entry_point_file = gym.spec(args.task).kwargs.pop(cfg_entry_point_key)
+        cfg_entry_point_file = gym.spec(task_name).kwargs.pop(cfg_entry_point_key)
         # check if entry point exists
         if cfg_entry_point_file is None:
             raise ValueError(
-                f"Could not find configuration for the environment: '{args.task}'."
+                f"Could not find configuration for the environment: '{task_name}'."
                 f" Please check that the gym registry has the entry point: '{cfg_entry_point_key}'."
             )
 
diff --git a/scripts/reinforcement_learning/rl_games/play.py b/scripts/reinforcement_learning/rl_games/play.py
@@ -77,6 +77,7 @@
 
 def main():
     """Play with RL-Games agent."""
+    task_name = args_cli.task.split(":")[-1]
     # parse env configuration
     env_cfg = parse_env_cfg(
         args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
@@ -89,7 +90,7 @@ def main():
     print(f"[INFO] Loading experiment from directory: {log_root_path}")
     # find checkpoint
     if args_cli.use_pretrained_checkpoint:
-        resume_path = get_published_pretrained_checkpoint("rl_games", args_cli.task)
+        resume_path = get_published_pretrained_checkpoint("rl_games", task_name)
         if not resume_path:
             print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
             return
diff --git a/scripts/reinforcement_learning/rsl_rl/play.py b/scripts/reinforcement_learning/rsl_rl/play.py
@@ -71,18 +71,19 @@
 
 def main():
     """Play with RSL-RL agent."""
+    task_name = args_cli.task.split(":")[-1]
     # parse configuration
     env_cfg = parse_env_cfg(
         args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
     )
-    agent_cfg: RslRlOnPolicyRunnerCfg = cli_args.parse_rsl_rl_cfg(args_cli.task, args_cli)
+    agent_cfg: RslRlOnPolicyRunnerCfg = cli_args.parse_rsl_rl_cfg(task_name, args_cli)
 
     # specify directory for logging experiments
     log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
     log_root_path = os.path.abspath(log_root_path)
     print(f"[INFO] Loading experiment from directory: {log_root_path}")
     if args_cli.use_pretrained_checkpoint:
-        resume_path = get_published_pretrained_checkpoint("rsl_rl", args_cli.task)
+        resume_path = get_published_pretrained_checkpoint("rsl_rl", task_name)
         if not resume_path:
             print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
             return
diff --git a/scripts/reinforcement_learning/sb3/play.py b/scripts/reinforcement_learning/sb3/play.py
@@ -80,12 +80,14 @@ def main():
     )
     agent_cfg = load_cfg_from_registry(args_cli.task, "sb3_cfg_entry_point")
 
+    task_name = args_cli.task.split(":")[-1]
+
     # directory for logging into
-    log_root_path = os.path.join("logs", "sb3", args_cli.task)
+    log_root_path = os.path.join("logs", "sb3", task_name)
     log_root_path = os.path.abspath(log_root_path)
     # checkpoint and log_dir stuff
     if args_cli.use_pretrained_checkpoint:
-        checkpoint_path = get_published_pretrained_checkpoint("sb3", args_cli.task)
+        checkpoint_path = get_published_pretrained_checkpoint("sb3", task_name)
         if not checkpoint_path:
             print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
             return
diff --git a/scripts/reinforcement_learning/skrl/play.py b/scripts/reinforcement_learning/skrl/play.py
@@ -108,22 +108,24 @@ def main():
     if args_cli.ml_framework.startswith("jax"):
         skrl.config.jax.backend = "jax" if args_cli.ml_framework == "jax" else "numpy"
 
+    task_name = args_cli.task.split(":")[-1]
+
     # parse configuration
     env_cfg = parse_env_cfg(
         args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
     )
     try:
-        experiment_cfg = load_cfg_from_registry(args_cli.task, f"skrl_{algorithm}_cfg_entry_point")
+        experiment_cfg = load_cfg_from_registry(task_name, f"skrl_{algorithm}_cfg_entry_point")
     except ValueError:
-        experiment_cfg = load_cfg_from_registry(args_cli.task, "skrl_cfg_entry_point")
+        experiment_cfg = load_cfg_from_registry(task_name, "skrl_cfg_entry_point")
 
     # specify directory for logging experiments (load checkpoint)
     log_root_path = os.path.join("logs", "skrl", experiment_cfg["agent"]["experiment"]["directory"])
     log_root_path = os.path.abspath(log_root_path)
     print(f"[INFO] Loading experiment from directory: {log_root_path}")
     # get checkpoint path
     if args_cli.use_pretrained_checkpoint:
-        resume_path = get_published_pretrained_checkpoint("skrl", args_cli.task)
+        resume_path = get_published_pretrained_checkpoint("skrl", task_name)
         if not resume_path:
             print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
             return
diff --git a/scripts/tools/record_demos.py b/scripts/tools/record_demos.py
@@ -209,7 +209,7 @@ def main():
 
     # parse configuration
     env_cfg = parse_env_cfg(args_cli.task, device=args_cli.device, num_envs=1)
-    env_cfg.env_name = args_cli.task
+    env_cfg.env_name = args_cli.task.split(":")[-1]
 
     # extract success checking function to invoke in the main loop
     success_term = None
diff --git a/scripts/tools/replay_demos.py b/scripts/tools/replay_demos.py
@@ -140,7 +140,7 @@ def main():
         episode_indices_to_replay = list(range(episode_count))
 
     if args_cli.task is not None:
-        env_name = args_cli.task
+        env_name = args_cli.task.split(":")[-1]
     if env_name is None:
         raise ValueError("Task/env name was not specified nor found in the dataset.")
 
@@ -153,7 +153,7 @@ def main():
     env_cfg.terminations = {}
 
     # create environment from loaded config
-    env = gym.make(env_name, cfg=env_cfg).unwrapped
+    env = gym.make(args_cli.task, cfg=env_cfg).unwrapped
 
     teleop_interface = Se3Keyboard(pos_sensitivity=0.1, rot_sensitivity=0.1)
     teleop_interface.add_callback("N", play_cb)
diff --git a/source/isaaclab/config/extension.toml b/source/isaaclab/config/extension.toml
@@ -1,8 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.40.1"
-
+version = "0.40.2"
 
 # Description
 title = "Isaac Lab framework for Robot Learning"
diff --git a/source/isaaclab/docs/CHANGELOG.rst b/source/isaaclab/docs/CHANGELOG.rst
@@ -1,11 +1,22 @@
 Changelog
 ---------
 
+0.40.2 (2025-05-10)
+~~~~~~~~~~~~~~~~~~~
+
+Added
+^^^^^
+
+* Updated gymnasium to >= 1.0
+* Added support for specifying module:task_name as task name to avoid module import for ``gym.make``
+
+
 0.40.1 (2025-06-02)
 ~~~~~~~~~~~~~~~~~~~
 
 Added
 ^^^^^
+
 * Added time observation functions to ~isaaclab.envs.mdp.observations module,
   :func:`~isaaclab.envs.mdp.observations.current_time_s` and :func:`~isaaclab.envs.mdp.observations.remaining_time_s`.
 
@@ -108,9 +119,6 @@ Fixed
 0.39.1 (2025-05-14)
 ~~~~~~~~~~~~~~~~~~~
 
-Added
-^^^^^
-
 * Added a new attribute :attr:`articulation_root_prim_path` to the :class:`~isaaclab.assets.ArticulationCfg` class
   to allow explicitly specifying the prim path of the articulation root.
 
diff --git a/source/isaaclab/setup.py b/source/isaaclab/setup.py
@@ -32,7 +32,7 @@
     # devices
     "hidapi==0.14.0.post2",
     # reinforcement learning
-    "gymnasium",
+    "gymnasium>=1.0",
     # procedural-generation
     "trimesh",
     "pyglet<2",
diff --git a/source/isaaclab_tasks/isaaclab_tasks/utils/hydra.py b/source/isaaclab_tasks/isaaclab_tasks/utils/hydra.py
@@ -83,10 +83,10 @@ def decorator(func):
         @functools.wraps(func)
         def wrapper(*args, **kwargs):
             # register the task to Hydra
-            env_cfg, agent_cfg = register_task_to_hydra(task_name, agent_cfg_entry_point)
+            env_cfg, agent_cfg = register_task_to_hydra(task_name.split(":")[-1], agent_cfg_entry_point)
 
             # define the new Hydra main function
-            @hydra.main(config_path=None, config_name=task_name, version_base="1.3")
+            @hydra.main(config_path=None, config_name=task_name.split(":")[-1], version_base="1.3")
             def hydra_main(hydra_env_cfg: DictConfig, env_cfg=env_cfg, agent_cfg=agent_cfg):
                 # convert to a native dictionary
                 hydra_env_cfg = OmegaConf.to_container(hydra_env_cfg, resolve=True)
diff --git a/source/isaaclab_tasks/isaaclab_tasks/utils/parse_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/utils/parse_cfg.py
@@ -57,7 +57,7 @@ def load_cfg_from_registry(task_name: str, entry_point_key: str) -> dict | objec
         ValueError: If the entry point key is not available in the gym registry for the task.
     """
     # obtain the configuration entry point
-    cfg_entry_point = gym.spec(task_name).kwargs.get(entry_point_key)
+    cfg_entry_point = gym.spec(task_name.split(":")[-1]).kwargs.get(entry_point_key)
     # check if entry point exists
     if cfg_entry_point is None:
         raise ValueError(
@@ -122,7 +122,7 @@ def parse_env_cfg(
             environment configuration.
     """
     # load the default configuration
-    cfg = load_cfg_from_registry(task_name, "env_cfg_entry_point")
+    cfg = load_cfg_from_registry(task_name.split(":")[-1], "env_cfg_entry_point")
 
     # check that it is not a dict
     # we assume users always use a class for the configuration