Merge pull request #1428 from bghira/bugfix/hidream_ddp

bghira · web-flow · commit 485d213ffaf4 · 2025-04-23T19:17:06.000-06:00
hidream multigpu fixes; PEFT LoRA support
diff --git a/configure.py b/configure.py
@@ -42,6 +42,7 @@
         "wan",
         "deepfloyd",
         "auraflow",
+        "hidream",
     ],
     "controlnet": ["sdxl", "sd1x", "sd2x"],
 }
@@ -61,7 +62,7 @@
     "hidream": "HiDream-ai/HiDream-I1-Full",
     "auraflow": "terminusresearch/auraflow-v0.3",
     "deepfloyd": "DeepFloyd/DeepFloyd-IF-I-XL-v1.0",
-    "omnigen": "Shitao/OmniGen-v1-diffusers",    
+    "omnigen": "Shitao/OmniGen-v1-diffusers",
 }
 
 default_cfg = {
@@ -78,7 +79,7 @@
     "omnigen": 3.2,
     "deepfloyd": 6.0,
     "sd2x": 7.0,
-    "sd1x": 6.0,    
+    "sd1x": 6.0,
 }
 
 model_labels = {
diff --git a/helpers/models/hidream/model.py b/helpers/models/hidream/model.py
@@ -46,6 +46,7 @@ class HiDream(ImageModelFoundation):
     MODEL_TYPE = ModelTypes.TRANSFORMER
     AUTOENCODER_CLASS = AutoencoderKL
     LATENT_CHANNEL_COUNT = 16
+    DEFAULT_NOISE_SCHEDULER = "flow_unipc"
     # The safe diffusers default value for LoRA training targets.
     DEFAULT_LORA_TARGET = ["to_k", "to_q", "to_v", "to_out.0"]
     # Only training the Attention blocks by default seems to help more with HiDream.
@@ -123,7 +124,11 @@ def _load_pipeline(
         """
         active_pipelines = getattr(self, "pipelines", {})
         if pipeline_type in active_pipelines:
-            setattr(active_pipelines[pipeline_type], self.MODEL_TYPE.value, self.unwrap_model())
+            setattr(
+                active_pipelines[pipeline_type],
+                self.MODEL_TYPE.value,
+                self.unwrap_model(),
+            )
             return active_pipelines[pipeline_type]
         pipeline_kwargs = {
             "pretrained_model_name_or_path": self._model_config_path(),
@@ -187,7 +192,6 @@ def _load_pipeline(
 
         return self.pipelines[pipeline_type]
 
-
     def _format_text_embedding(self, text_embedding: torch.Tensor):
         """
         Models can optionally format the stored text embedding, eg. in a dict, or
@@ -308,16 +312,16 @@ def model_predict(self, prepared_batch):
         ):
             B, C, H, W = prepared_batch["noisy_latents"].shape
             pH, pW = (
-                H // self.model.config.patch_size,
-                W // self.model.config.patch_size,
+                H // self.unwrap_model(model=self.model).config.patch_size,
+                W // self.unwrap_model(model=self.model).config.patch_size,
             )
 
             img_sizes = torch.tensor([pH, pW], dtype=torch.int64).reshape(-1)
             img_ids = torch.zeros(pH, pW, 3)
             img_ids[..., 1] = img_ids[..., 1] + torch.arange(pH)[:, None]
             img_ids[..., 2] = img_ids[..., 2] + torch.arange(pW)[None, :]
             img_ids = img_ids.reshape(pH * pW, -1)
-            img_ids_pad = torch.zeros(self.model.max_seq, 3)
+            img_ids_pad = torch.zeros(self.unwrap_model(model=self.model).max_seq, 3)
             img_ids_pad[: pH * pW, :] = img_ids
 
             img_sizes = img_sizes.unsqueeze(0).to(
@@ -334,10 +338,15 @@ def model_predict(self, prepared_batch):
         latent_model_input = prepared_batch["noisy_latents"]
         if latent_model_input.shape[-2] != latent_model_input.shape[-1]:
             B, C, H, W = latent_model_input.shape
-            patch_size = self.model.config.patch_size
+            patch_size = self.unwrap_model(model=self.model).config.patch_size
             pH, pW = H // patch_size, W // patch_size
             out = torch.zeros(
-                (B, C, self.model.max_seq, patch_size * patch_size),
+                (
+                    B,
+                    C,
+                    self.unwrap_model(model=self.model).max_seq,
+                    patch_size * patch_size,
+                ),
                 dtype=latent_model_input.dtype,
                 device=latent_model_input.device,
             )
diff --git a/helpers/models/hidream/pipeline.py b/helpers/models/hidream/pipeline.py
@@ -13,7 +13,7 @@
 )
 
 from diffusers.image_processor import VaeImageProcessor
-from diffusers.loaders import FromSingleFileMixin
+from diffusers.loaders import FromSingleFileMixin, HiDreamImageLoraLoaderMixin
 from diffusers.models.autoencoders import AutoencoderKL
 from diffusers.schedulers import FlowMatchEulerDiscreteScheduler, UniPCMultistepScheduler
 from diffusers.utils import (
@@ -136,7 +136,9 @@ class HiDreamImagePipelineOutput(BaseOutput):
     images: Union[List[PIL.Image.Image], np.ndarray]
 
 
-class HiDreamImagePipeline(DiffusionPipeline, FromSingleFileMixin):
+class HiDreamImagePipeline(
+    DiffusionPipeline, FromSingleFileMixin, HiDreamImageLoraLoaderMixin
+):
     model_cpu_offload_seq = "text_encoder->text_encoder_2->text_encoder_3->text_encoder_4->image_encoder->transformer->vae"
     _optional_components = ["image_encoder", "feature_extractor"]
     _callback_tensor_inputs = ["latents", "prompt_embeds"]
diff --git a/helpers/models/hidream/schedule.py b/helpers/models/hidream/schedule.py
@@ -103,6 +103,9 @@ def __init__(
                 raise NotImplementedError(
                     f"{solver_type} is not implemented for {self.__class__}"
                 )
+        if prediction_type is None:
+            prediction_type = "flow_prediction"
+            self.config.prediction_type = prediction_type
 
         self.predict_x0 = predict_x0
         # setable values
diff --git a/helpers/training/validation.py b/helpers/training/validation.py
@@ -26,6 +26,7 @@
     DDIMScheduler,
     DDPMScheduler,
 )
+from helpers.models.hidream.schedule import FlowUniPCMultistepScheduler
 from diffusers.utils.torch_utils import is_compiled_module
 from helpers.multiaspect.image import MultiaspectImage
 from helpers.image_manipulation.brightness import calculate_luminance
@@ -45,6 +46,7 @@
     "euler-a": EulerAncestralDiscreteScheduler,
     "flow_matching": FlowMatchEulerDiscreteScheduler,
     "unipc": UniPCMultistepScheduler,
+    "flow_unipc": FlowUniPCMultistepScheduler,
     "ddim": DDIMScheduler,
     "ddpm": DDPMScheduler,
     "dpm++": DPMSolverMultistepScheduler,
@@ -793,7 +795,7 @@ def setup_scheduler(self):
                 # The Beta schedule looks WAY better...
                 scheduler_args["use_beta_sigmas"] = True
                 scheduler_args["shift"] = self.config.flow_schedule_shift
-            if self.config.validation_noise_scheduler == "unipc":
+            if self.config.validation_noise_scheduler in ["flow_unipc", "unipc"]:
                 scheduler_args["prediction_type"] = "flow_prediction"
                 scheduler_args["use_flow_sigmas"] = True
                 scheduler_args["num_train_timesteps"] = 1000
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml

Original file line number	Diff line number	Diff line change
`@@ -103,6 +103,9 @@ def __init__(`
`103`	`103`	`raise NotImplementedError(`
`104`	`104`	`f"{solver_type} is not implemented for {self.__class__}"`
`105`	`105`	`)`
	`106`	`+ if prediction_type is None:`
	`107`	`+ prediction_type = "flow_prediction"`
	`108`	`+ self.config.prediction_type = prediction_type`
`106`	`109`
`107`	`110`	`self.predict_x0 = predict_x0`
`108`	`111`	`# setable values`