stable diffusion

 !nvidia-smi


!pip uninstall -y diffusers huggingface_hub
!pip install diffusers transformers ftfy ipywidgets torch

import torch
from diffusers import StableDiffusionPipeline

pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16)  


pipe = pipe.to("cuda")



prompt = "plane crash"
image = pipe(prompt).images[0]  # image here is in [PIL format](https://pillow.readthedocs.io/en/stable/)

# Now to display an image you can either save it such as:
image.save(f"astronaut_rides_horse.png")

# or if you're in a google colab you can directly display it with
image
















import torch
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
import matplotlib.pyplot as plt
from PIL import Image

# Load the pipeline
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16)
pipe = pipe.to("cuda")

# Set a scheduler to include callback
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)

# Define the callback to capture intermediate images
intermediate_images = []
capture_steps = [10, 25, 40]  # Steps at which to capture intermediate images

def save_intermediate_images(pipeline, step, timestep, extra_inputs):
    """
    Callback function to save intermediate images.
    """
    if step in capture_steps:
        latents = extra_inputs["latents"]  # Retrieve the latents from the extra inputs
        with torch.no_grad():
            # Decode the latents to RGB images using the updated method
            decoded_output = pipeline.vae.decode(latents / pipeline.vae.config.scaling_factor)
            decoded_tensor = decoded_output.sample  # Extract the tensor from DecoderOutput
            processed_images = pipeline.image_processor.postprocess(decoded_tensor, output_type="pil")
            intermediate_images.append(processed_images[0])  # Append the first image

    # Return the latents unmodified to ensure compatibility with the pipeline
    return {"latents": extra_inputs["latents"]}

# Generate the image with the callback
prompt = "floor map of 2bhk house"
image = pipe(prompt, callback_on_step_end=save_intermediate_images, num_inference_steps=50).images[0]

# Display the phases of generation
num_images = len(intermediate_images) + 1  # Intermediate images + final output
fig, axes = plt.subplots(1, num_images, figsize=(15, 5))

# Ensure axes is iterable even for a single subplot
if num_images == 1:
    axes = [axes]

for i, img in enumerate(intermediate_images):
    axes[i].imshow(img)
    axes[i].axis("off")
    axes[i].set_title(f"Captured Step {capture_steps[i]}")

# Show the final image
axes[-1].imshow(image)
axes[-1].axis("off")
axes[-1].set_title("Final Output")
plt.show()

Comments

Popular posts from this blog

CSS-position property

maintext/ react

randomly changing color of tiles