volcengine
diff --git a/‎tests/rollout/test_vllm_multi_turn.py
Lines changed: 29 additions & 30 deletions b/‎tests/rollout/test_vllm_multi_turn.py
Lines changed: 29 additions & 30 deletions
diff --git a/‎verl/single_controller/base/register_center/ray.py
Lines changed: 1 addition & 1 deletion b/‎verl/single_controller/base/register_center/ray.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎verl/single_controller/ray/base.py
Lines changed: 44 additions & 39 deletions b/‎verl/single_controller/ray/base.py
Lines changed: 44 additions & 39 deletions
diff --git a/‎verl/trainer/config/generation.yaml
Lines changed: 1 addition & 0 deletions b/‎verl/trainer/config/generation.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎verl/trainer/config/ppo_megatron_trainer.yaml
Lines changed: 1 addition & 0 deletions b/‎verl/trainer/config/ppo_megatron_trainer.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎verl/trainer/ppo/ray_trainer.py
Lines changed: 1 addition & 1 deletion b/‎verl/trainer/ppo/ray_trainer.py
Lines changed: 1 addition & 1 deletion
@@ -20,7 +20,7 @@
 from openai.types.chat.chat_completion import ChatCompletion
 
 from verl.single_controller.ray import RayClassWithInitArgs, RayWorkerGroup
-from verl.single_controller.ray.base import Worker, create_colocated_worker_cls
+from verl.single_controller.ray.base import create_colocated_worker_cls
 from verl.trainer.ppo.ray_trainer import ResourcePoolManager, Role
 from verl.workers.fsdp_async_workers import AsyncActorRolloutRefWorker, AsyncLLMManager
 from verl.workers.rollout.chat_scheduler import ChatCompletionScheduler
@@ -35,20 +35,25 @@ async def test_vllm_multi_turn():
     config.actor_rollout_ref.rollout.prompt_length = 4096
     config.actor_rollout_ref.rollout.response_length = 4096
 
+    # test sleep/wake_up with fsdp offload
+    config.actor_rollout_ref.actor.fsdp_config.param_offload = True
+    config.actor_rollout_ref.actor.fsdp_config.optimizer_offload = True
+
     # =========================== 1. Create hybrid ActorRollout workers ===========================
     ray.init(
         runtime_env={
-            'env_vars': {
-                'TOKENIZERS_PARALLELISM': 'true',
-                'NCCL_DEBUG': 'WARN',
-                'VLLM_LOGGING_LEVEL': 'WARN',
-                'VLLM_USE_V1': '1',
+            "env_vars": {
+                "TOKENIZERS_PARALLELISM": "true",
+                "NCCL_DEBUG": "WARN",
+                "VLLM_LOGGING_LEVEL": "WARN",
+                "VLLM_USE_V1": "1",
             }
-        })
+        }
+    )
     role_worker_mapping = {
         Role.ActorRollout: ray.remote(AsyncActorRolloutRefWorker),
     }
-    global_pool_id = 'global_pool'
+    global_pool_id = "global_pool"
     resource_pool_spec = {
         global_pool_id: [config.trainer.n_gpus_per_node] * config.trainer.nnodes,
     }
@@ -61,20 +66,20 @@ async def test_vllm_multi_turn():
 
     # create actor and rollout
     resource_pool = resource_pool_manager.get_resource_pool(Role.ActorRollout)
-    actor_rollout_cls = RayClassWithInitArgs(cls=role_worker_mapping[Role.ActorRollout],
-                                             config=config.actor_rollout_ref,
-                                             role='actor_rollout')
-    resource_pool_to_cls[resource_pool]['actor_rollout'] = actor_rollout_cls
+    actor_rollout_cls = RayClassWithInitArgs(
+        cls=role_worker_mapping[Role.ActorRollout], config=config.actor_rollout_ref, role="actor_rollout"
+    )
+    resource_pool_to_cls[resource_pool]["actor_rollout"] = actor_rollout_cls
 
     all_wg = {}
     wg_dicts = []
     for resource_pool, class_dict in resource_pool_to_cls.items():
-        worker_dict_cls = create_colocated_worker_cls(class_dict=class_dict, worker_cls=Worker)
+        worker_dict_cls = create_colocated_worker_cls(class_dict=class_dict)
         wg_dict = RayWorkerGroup(resource_pool=resource_pool, ray_cls_with_init=worker_dict_cls)
         spawn_wg = wg_dict.spawn(prefix_set=class_dict.keys())
         all_wg.update(spawn_wg)
         wg_dicts.append(wg_dict)
-    actor_rollout_wg = all_wg['actor_rollout']
+    actor_rollout_wg = all_wg["actor_rollout"]
     actor_rollout_wg.init_model()
 
     # =========================== 2. Create AsyncLLMManager&ChatScheduler  ===========================
@@ -89,6 +94,10 @@ async def test_vllm_multi_turn():
         server_addresses=async_rollout_manager.server_addresses,
     )
 
+    # test sleep and wake_up
+    async_rollout_manager.sleep()
+    async_rollout_manager.wake_up()
+
     # =========================== 3. Multi turn rollout  ===========================
     async def callback(completions: ChatCompletion, info: Dict[str, Any]):
         messages, round = info["messages"], info["round"]
@@ -101,10 +110,7 @@ async def callback(completions: ChatCompletion, info: Dict[str, Any]):
             messages.append({"role": "user", "content": "What is your name?"})
             await async_chat_scheduler.submit_chat_completions(
                 callback=callback,
-                callback_additional_info={
-                    "messages": messages,
-                    "round": 1
-                },
+                callback_additional_info={"messages": messages, "round": 1},
                 model=model_name,
                 messages=messages,
                 extra_headers=extra_headers,
@@ -113,27 +119,20 @@ async def callback(completions: ChatCompletion, info: Dict[str, Any]):
             messages.append({"role": "user", "content": "What is your favorite color?"})
             await async_chat_scheduler.submit_chat_completions(
                 callback=callback,
-                callback_additional_info={
-                    "messages": messages,
-                    "round": 2
-                },
+                callback_additional_info={"messages": messages, "round": 2},
                 model=model_name,
                 messages=messages,
                 extra_headers=extra_headers,
             )
         else:
             print("Done!")
 
-    messages = [{
-        "role": "user",
-        "content": "Let's play a role playing game. Your name is Bob, your favorite color is red."
-    }]
+    messages = [
+        {"role": "user", "content": "Let's play a role playing game. Your name is Bob, your favorite color is red."}
+    ]
     await async_chat_scheduler.submit_chat_completions(
         callback=callback,
-        callback_additional_info={
-            "messages": messages,
-            "round": 0
-        },
+        callback_additional_info={"messages": messages, "round": 0},
         model=model_name,
         messages=messages,
     )
 
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Dict, Tuple
+from typing import Dict
 
 import ray
 
 
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import logging
+import os
 import time
 from typing import Any, Dict, List, Optional, Tuple
+from unittest.mock import patch
 
 import ray
 from ray.experimental.state.api import get_actor
@@ -23,6 +25,7 @@
 from ray.util.scheduling_strategies import NodeAffinitySchedulingStrategy, PlacementGroupSchedulingStrategy
 
 from verl.single_controller.base import ClassWithInitArgs, ResourcePool, Worker, WorkerGroup
+from verl.single_controller.base.decorator import MAGIC_ATTR, Dispatch
 
 __all__ = ["Worker"]
 
@@ -300,17 +303,23 @@ def _init_with_resource_pool(self, resource_pool, ray_cls_with_init, bin_pack, d
                         elapsed = int(time.time() - start_time)
                         if elapsed % 30 == 0:
                             logging.warning(
-                                f"Waiting for register center actor {actor_name} to be ready. "
-                                f"Elapsed time: {elapsed} seconds out of {self._ray_wait_register_center_timeout} seconds."
+                                "Waiting for register center actor %s to be ready. "
+                                "Elapsed time: %s seconds out of %s seconds.",
+                                actor_name,
+                                elapsed,
+                                self._ray_wait_register_center_timeout,
                             )
                         time.sleep(1)
 
                     if register_center_actor is None:
                         raise TimeoutError(
-                            f"Failed to get register_center_actor {actor_name} in {list_named_actors(all_namespaces=True)} "
+                            f"Failed to get register_center_actor {actor_name} "
+                            f"in {list_named_actors(all_namespaces=True)} "
                             f"for {self._ray_wait_register_center_timeout} seconds. "
-                            "Ensure that any lingering Ray resources from previous runs are cleaned up (e.g., by restarting the Ray cluster), "
-                            "or adjust the waiting time by modifying the config `trainer.ray_wait_register_center_timeout`."
+                            "Ensure that any lingering Ray resources from previous "
+                            "runs are cleaned up (e.g., by restarting the Ray cluster), "
+                            "or adjust the waiting time by modifying the config "
+                            "`trainer.ray_wait_register_center_timeout`."
                         )
 
                     rank_zero_info = ray.get(register_center_actor.get_rank_zero_info.remote())
@@ -329,10 +338,9 @@ def from_detached(
         worker_names=None,
         ray_cls_with_init=None,
     ):
-        worker_group = cls(resource_pool=None,
-                           ray_cls_with_init=ray_cls_with_init,
-                           name_prefix=name_prefix,
-                           worker_names=worker_names)
+        worker_group = cls(
+            resource_pool=None, ray_cls_with_init=ray_cls_with_init, name_prefix=name_prefix, worker_names=worker_names
+        )
         return worker_group
 
     def spawn(self, prefix_set):
@@ -382,8 +390,9 @@ def execute_all_sync(self, method_name: str, *args, **kwargs):
         return ray.get(self.execute_all_async(method_name, *args, **kwargs))
 
     def execute_all_async(self, method_name: str, *args, **kwargs):
-        # Here, we assume that if all arguments in args and kwargs are lists, and their lengths match len(self._workers),
-        # we'll distribute each element in these lists to the corresponding worker
+        # Here, we assume that if all arguments in args and kwargs are lists,
+        # and their lengths match len(self._workers), we'll distribute each
+        # element in these lists to the corresponding worker
         # print(f"execute_all_async: method {method_name}({args}, {kwargs})")
         length = len(self._workers)
         if all(isinstance(arg, list) for arg in args) and all(isinstance(kwarg, list) for kwarg in kwargs.values()):
@@ -421,11 +430,6 @@ def world_size(self):
 with code written in separate ray.Actors.
 """
 
-import os
-from unittest.mock import patch
-
-from verl.single_controller.base.decorator import MAGIC_ATTR, Dispatch
-
 
 def _bind_workers_method_to_parent(cls, key, user_defined_cls):
     """
@@ -443,12 +447,12 @@ def _bind_workers_method_to_parent(cls, key, user_defined_cls):
 
         if hasattr(method, MAGIC_ATTR):
 
-            def generate_function(name):
+            def generate_function(name, key=key):
                 def func(self, *args, **kwargs):
                     # dispatch to the actual worker
                     return getattr(self.worker_dict[key], name)(*args, **kwargs)
 
-                return func
+                return func  # noqa: B023
 
             func = generate_function(method_name)
             # pass MAGIC_ATTR for outer worker group
@@ -457,15 +461,16 @@ def func(self, *args, **kwargs):
             try:
                 # bind direct rollout method to class without prefix
                 if attrs["dispatch_mode"] == Dispatch.DIRECT_ROLLOUT_METHOD and "rollout" in key:
-                    assert not hasattr(cls, method_name), \
+                    assert not hasattr(cls, method_name), (
                         f"conflict direct rollout method {method_name} with role {key}"
+                    )
                     setattr(cls, method_name, func)
                     print(f"bind role {key} method {method_name} to class {cls}")
                 else:
-                    method_name_with_prefix = key + '_' + method_name
+                    method_name_with_prefix = key + "_" + method_name
                     setattr(cls, method_name_with_prefix, func)
             except Exception as e:
-                raise ValueError(f"Fail to set method_name {method_name}")
+                raise ValueError(f"Fail to set method_name {method_name}") from e
 
 
 def _unwrap_ray_remote(cls):
@@ -474,32 +479,31 @@ def _unwrap_ray_remote(cls):
     return cls
 
 
-def _nearest_common_base(mros: List):
-    last_common = object
-    min_len = min([len(mro) for mro in mros]) - 1  # exclude final derived class
-
-    for i in range(min_len):
-        mro = mros[0][i]
-        for j in range(1, len(mros)):
-            if mro != mros[j][i]:
-                return last_common
-        last_common = mro
-
-    return last_common
+def _determine_fsdp_megatron_base_class(mros: List):
+    """
+    - megatron: base class should be MegatronWorker
+    - fsdp: base class should be Worker
+    """
+    for cls in mros[0]:
+        if cls.__name__ == "MegatronWorker":
+            return cls
+        if cls.__name__ == "Worker":
+            return cls
+    raise ValueError(f"Cannot determine base class for {mros}")
 
 
-def create_colocated_worker_cls(class_dict: dict[str, RayClassWithInitArgs], worker_cls: type = None):
+def create_colocated_worker_cls(class_dict: dict[str, RayClassWithInitArgs]):
     """
     This function should return a class instance that delegates the calls to every
     cls in cls_dict
     """
     cls_dict = {}
     init_args_dict = {}
-    if worker_cls is None:
-        worker_cls = _nearest_common_base(
-            [list(reversed(cls.cls.__ray_actor_class__.__mro__)) for cls in class_dict.values()])
+    worker_cls = _determine_fsdp_megatron_base_class(
+        [cls.cls.__ray_actor_class__.__mro__ for cls in class_dict.values()]
+    )
     assert issubclass(worker_cls, Worker), f"worker_cls {worker_cls} should be a subclass of Worker"
-    print(f"find nearest common base class {worker_cls}")
+    print(f"colocated worker base class {worker_cls}")
 
     for key, cls in class_dict.items():
         cls_dict[key] = cls.cls
@@ -515,7 +519,8 @@ def __init__(self):
             for key, user_defined_cls in cls_dict.items():
                 user_defined_cls = _unwrap_ray_remote(user_defined_cls)
                 # directly instantiate the class without remote
-                # in worker class, e.g. <verl.single_controller.base.worker.Worker> when DISABLE_WORKER_INIT == 1 it will return immediately
+                # in worker class, e.g. <verl.single_controller.base.worker.Worker>
+                # when DISABLE_WORKER_INIT == 1 it will return immediately
                 with patch.dict(os.environ, {"DISABLE_WORKER_INIT": "1"}):
                     self.worker_dict[key] = user_defined_cls(
                         *init_args_dict[key].get("args", ()), **init_args_dict[key].get("kwargs", {})
 
@@ -14,6 +14,7 @@ model:
   external_lib: null
 rollout:
   name: vllm
+  mode: "sync" # sync: LLM, async: AsyncLLM
   temperature: 1.0
   top_k: 50 # 0 for hf rollout, -1 for vllm rollout
   top_p: 0.7
 
@@ -94,6 +94,7 @@ actor_rollout_ref:
     log_prob_micro_batch_size_per_gpu: null
   rollout:
     name: vllm
+    mode: "sync" # sync: LLM, async: AsyncLLM
     temperature: 1.0
     top_k: -1 # 0 for hf rollout, -1 for vllm rollout
     top_p: 1
 
@@ -736,7 +736,7 @@ def init_workers(self):
 
         # create async rollout manager and request scheduler
         self.async_rollout_mode = False
-        if self.config.actor_rollout_ref.rollout.get("mode", "sync") == 'async':
+        if self.config.actor_rollout_ref.rollout.mode == "async":
             from verl.workers.fsdp_async_workers import AsyncLLMManager
 
             self.async_rollout_mode = True