stable diffusion
!nvidia-smi
!pip uninstall -y diffusers huggingface_hub
!pip install diffusers transformers ftfy ipywidgets torch
import torch
from diffusers import StableDiffusionPipeline
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16)
pipe = pipe.to("cuda")
prompt = "plane crash"
image = pipe(prompt).images[0] # image here is in [PIL format](https://pillow.readthedocs.io/en/stable/)
# Now to display an image you can either save it such as:
image.save(f"astronaut_rides_horse.png")
# or if you're in a google colab you can directly display it with
image
import torch
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
import matplotlib.pyplot as plt
from PIL import Image
# Load the pipeline
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16)
pipe = pipe.to("cuda")
# Set a scheduler to include callback
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
# Define the callback to capture intermediate images
intermediate_images = []
capture_steps = [10, 25, 40] # Steps at which to capture intermediate images
def save_intermediate_images(pipeline, step, timestep, extra_inputs):
"""
Callback function to save intermediate images.
"""
if step in capture_steps:
latents = extra_inputs["latents"] # Retrieve the latents from the extra inputs
with torch.no_grad():
# Decode the latents to RGB images using the updated method
decoded_output = pipeline.vae.decode(latents / pipeline.vae.config.scaling_factor)
decoded_tensor = decoded_output.sample # Extract the tensor from DecoderOutput
processed_images = pipeline.image_processor.postprocess(decoded_tensor, output_type="pil")
intermediate_images.append(processed_images[0]) # Append the first image
# Return the latents unmodified to ensure compatibility with the pipeline
return {"latents": extra_inputs["latents"]}
# Generate the image with the callback
prompt = "floor map of 2bhk house"
image = pipe(prompt, callback_on_step_end=save_intermediate_images, num_inference_steps=50).images[0]
# Display the phases of generation
num_images = len(intermediate_images) + 1 # Intermediate images + final output
fig, axes = plt.subplots(1, num_images, figsize=(15, 5))
# Ensure axes is iterable even for a single subplot
if num_images == 1:
axes = [axes]
for i, img in enumerate(intermediate_images):
axes[i].imshow(img)
axes[i].axis("off")
axes[i].set_title(f"Captured Step {capture_steps[i]}")
# Show the final image
axes[-1].imshow(image)
axes[-1].axis("off")
axes[-1].set_title("Final Output")
plt.show()
Comments
Post a Comment