From f5e37bc5b21ff05b77cdef744c62ade09ec5be67 Mon Sep 17 00:00:00 2001 From: Vixy Date: Thu, 1 Jan 2026 22:01:18 -0600 Subject: [PATCH] Fix: use s_scale=0 when IP-Adapter loaded but no face requested When IP-Adapter FaceID is initialized, it modifies the pipeline's UNet cross-attention layers. Calling raw pipeline() without face embeddings leaves these layers in a broken state, causing corrupted output. Solution: When IP-Adapter is loaded but no face_image provided, call ip_model.generate() with s_scale=0.0 and zero embeddings to properly disable face conditioning while satisfying the modified layers. --- requirements.txt | 1 + worker/generator.py | 68 ++++++++++++++++++++++++++++++--------------- 2 files changed, 46 insertions(+), 23 deletions(-) diff --git a/requirements.txt b/requirements.txt index 466a932..9e15baa 100755 --- a/requirements.txt +++ b/requirements.txt @@ -28,6 +28,7 @@ insightface==0.7.3 # IP-Adapter from GitHub git+https://github.com/tencent-ailab/IP-Adapter.git +einops # Required by IP-Adapter # Utilities pydantic==2.6.0 diff --git a/worker/generator.py b/worker/generator.py index e87fc99..2fc3a0c 100755 --- a/worker/generator.py +++ b/worker/generator.py @@ -289,30 +289,52 @@ class SDXLGenerator: )[0] ) else: - # Progress callback wrapper (only for standard pipeline) - def callback_wrapper(step: int, timestep: int, latents: torch.FloatTensor): - if progress_callback: - progress = int((step / num_inference_steps) * 100) - try: - asyncio.create_task(progress_callback(progress)) - except: - pass + # Check if IP-Adapter is loaded - if so, we must use it with s_scale=0 + # to avoid corrupted output from dangling adapter layers + if self.ip_adapter_loaded: + logger.info("IP-Adapter loaded but no face requested, using s_scale=0") + # Create zero embedding (512-dim for FaceID) + zero_embed = torch.zeros((1, 512), device=self.device, dtype=torch.float16) + + image = await loop.run_in_executor( + None, + lambda: self.ip_model.generate( + prompt=prompt, + negative_prompt=negative_prompt, + faceid_embeds=zero_embed, + width=width, + height=height, + num_inference_steps=num_inference_steps, + guidance_scale=guidance_scale, + num_samples=1, + seed=seed, + s_scale=0.0, # Disable face conditioning + )[0] + ) + else: + # Standard generation - IP-Adapter not loaded + def callback_wrapper(step: int, timestep: int, latents: torch.FloatTensor): + if progress_callback: + progress = int((step / num_inference_steps) * 100) + try: + asyncio.create_task(progress_callback(progress)) + except: + pass - # Standard generation without face lock - image = await loop.run_in_executor( - None, - lambda: self.pipeline( - prompt=prompt, - negative_prompt=negative_prompt, - width=width, - height=height, - num_inference_steps=num_inference_steps, - guidance_scale=guidance_scale, - generator=generator, - callback=callback_wrapper, - callback_steps=1 - ).images[0] - ) + image = await loop.run_in_executor( + None, + lambda: self.pipeline( + prompt=prompt, + negative_prompt=negative_prompt, + width=width, + height=height, + num_inference_steps=num_inference_steps, + guidance_scale=guidance_scale, + generator=generator, + callback=callback_wrapper, + callback_steps=1 + ).images[0] + ) logger.info("Image generated successfully") return image