modelscope · hjh0119 · Apr 8, 2025 · Apr 8, 2025 · Apr 8, 2025
diff --git a/swift/trainers/rlhf_trainer/grpo_trainer.py b/swift/trainers/rlhf_trainer/grpo_trainer.py
@@ -249,6 +249,11 @@ def __init__(self,
                             cache_max_entry_count=args.lmdeploy_cache_max_entry_count,
                             reload_weights=True)
                         self.infer_device = fast_infer_device
+                        from lmdeploy.turbomind.turbomind import TurboMind
+                        lmdeploy_engine = self.engine.engine.engine
+                        assert isinstance(lmdeploy_engine, TurboMind), (
+                            "Currently only LMDeploy's TurboMind backend is supported. "
+                            'The current model is incompatible - please use vLLM or PyTorch backend instead.')
                     self.engine.default_template = copy(self.template)  # Avoid thread-unsafe modifications of the mode.
             self._last_loaded_step = 0  # tag to avoid useless loading during grad accumulation