Path: blob/main/examples/dreambooth/DreamBooth_Stable_Diffusion.ipynb
1441 views
Kernel: Python 3 (ipykernel)
In [ ]:
#@markdown Check type of GPU and VRAM available. !nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader
Tesla T4, 15109 MiB, 15109 MiB
Install Requirements
In [ ]:
!wget -q https://github.com/ShivamShrirao/diffusers/raw/main/examples/dreambooth/train_dreambooth.py !wget -q https://github.com/ShivamShrirao/diffusers/raw/main/scripts/convert_diffusers_to_original_stable_diffusion.py %pip install -qq git+https://github.com/ShivamShrirao/diffusers %pip install -q -U --pre triton %pip install -q accelerate transformers ftfy bitsandbytes==0.35.0 gradio natsort safetensors xformers
In [ ]:
#@title Login to HuggingFace 🤗 #@markdown You need to accept the model license before downloading or using the Stable Diffusion weights. Please, visit the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5), read the license and tick the checkbox if you agree. You have to be a registered user in 🤗 Hugging Face Hub, and you'll also need to use an access token for the code to work. # https://huggingface.co/settings/tokens !mkdir -p ~/.huggingface HUGGINGFACE_TOKEN = "" #@param {type:"string"} !echo -n "{HUGGINGFACE_TOKEN}" > ~/.huggingface/token
Settings and run
In [ ]:
#@markdown If model weights should be saved directly in google drive (takes around 4-5 GB). save_to_gdrive = False #@param {type:"boolean"} if save_to_gdrive: from google.colab import drive drive.mount('/content/drive') #@markdown Name/Path of the initial model. MODEL_NAME = "runwayml/stable-diffusion-v1-5" #@param {type:"string"} #@markdown Enter the directory name to save model at. OUTPUT_DIR = "stable_diffusion_weights/zwx" #@param {type:"string"} if save_to_gdrive: OUTPUT_DIR = "/content/drive/MyDrive/" + OUTPUT_DIR else: OUTPUT_DIR = "/content/" + OUTPUT_DIR print(f"[*] Weights will be saved at {OUTPUT_DIR}") !mkdir -p $OUTPUT_DIR
Start Training
Use the table below to choose the best flags based on your memory and speed requirements. Tested on Tesla T4 GPU.
fp16 | train_batch_size | gradient_accumulation_steps | gradient_checkpointing | use_8bit_adam | GB VRAM usage | Speed (it/s) |
---|---|---|---|---|---|---|
fp16 | 1 | 1 | TRUE | TRUE | 9.92 | 0.93 |
no | 1 | 1 | TRUE | TRUE | 10.08 | 0.42 |
fp16 | 2 | 1 | TRUE | TRUE | 10.4 | 0.66 |
fp16 | 1 | 1 | FALSE | TRUE | 11.17 | 1.14 |
no | 1 | 1 | FALSE | TRUE | 11.17 | 0.49 |
fp16 | 1 | 2 | TRUE | TRUE | 11.56 | 1 |
fp16 | 2 | 1 | FALSE | TRUE | 13.67 | 0.82 |
fp16 | 1 | 2 | FALSE | TRUE | 13.7 | 0.83 |
fp16 | 1 | 1 | TRUE | FALSE | 15.79 | 0.77 |
Add --gradient_checkpointing
flag for around 9.92 GB VRAM usage.
remove --use_8bit_adam
flag for full precision. Requires 15.79 GB with --gradient_checkpointing
else 17.8 GB.
remove --train_text_encoder
flag to reduce memory usage further, degrades output quality.
In [ ]:
# You can also add multiple concepts here. Try tweaking `--max_train_steps` accordingly. concepts_list = [ { "instance_prompt": "photo of zwx dog", "class_prompt": "photo of a dog", "instance_data_dir": "/content/data/zwx", "class_data_dir": "/content/data/dog" }, # { # "instance_prompt": "photo of ukj person", # "class_prompt": "photo of a person", # "instance_data_dir": "/content/data/ukj", # "class_data_dir": "/content/data/person" # } ] # `class_data_dir` contains regularization images import json import os for c in concepts_list: os.makedirs(c["instance_data_dir"], exist_ok=True) with open("concepts_list.json", "w") as f: json.dump(concepts_list, f, indent=4)
In [ ]:
#@markdown Upload your images by running this cell. #@markdown OR #@markdown You can use the file manager on the left panel to upload (drag and drop) to each `instance_data_dir` (it uploads faster). You can also upload your own class images in `class_data_dir` if u don't wanna generate with SD. import os from google.colab import files import shutil for c in concepts_list: print(f"Uploading instance images for `{c['instance_prompt']}`") uploaded = files.upload() for filename in uploaded.keys(): dst_path = os.path.join(c['instance_data_dir'], filename) shutil.move(filename, dst_path)
In [ ]:
!python3 train_dreambooth.py \ --pretrained_model_name_or_path=$MODEL_NAME \ --pretrained_vae_name_or_path="stabilityai/sd-vae-ft-mse" \ --output_dir=$OUTPUT_DIR \ --revision="fp16" \ --with_prior_preservation --prior_loss_weight=1.0 \ --seed=1337 \ --resolution=512 \ --train_batch_size=1 \ --train_text_encoder \ --mixed_precision="fp16" \ --use_8bit_adam \ --gradient_accumulation_steps=1 \ --learning_rate=1e-6 \ --lr_scheduler="constant" \ --lr_warmup_steps=0 \ --num_class_images=50 \ --sample_batch_size=4 \ --max_train_steps=800 \ --save_interval=10000 \ --save_sample_prompt="photo of zwx dog" \ --concepts_list="concepts_list.json" # Reduce the `--save_interval` to lower than `--max_train_steps` to save weights from intermediate steps. # `--save_sample_prompt` can be same as `--instance_prompt` to generate intermediate samples (saved along with weights in samples directory).
In [ ]:
#@markdown Specify the weights directory to use (leave blank for latest) WEIGHTS_DIR = "" #@param {type:"string"} if WEIGHTS_DIR == "": from natsort import natsorted from glob import glob import os WEIGHTS_DIR = natsorted(glob(OUTPUT_DIR + os.sep + "*"))[-1] print(f"[*] WEIGHTS_DIR={WEIGHTS_DIR}")
In [ ]:
#@markdown Run to generate a grid of preview images from the last saved weights. import os import matplotlib.pyplot as plt import matplotlib.image as mpimg weights_folder = OUTPUT_DIR folders = sorted([f for f in os.listdir(weights_folder) if f != "0"], key=lambda x: int(x)) row = len(folders) col = len(os.listdir(os.path.join(weights_folder, folders[0], "samples"))) scale = 4 fig, axes = plt.subplots(row, col, figsize=(col*scale, row*scale), gridspec_kw={'hspace': 0, 'wspace': 0}) for i, folder in enumerate(folders): folder_path = os.path.join(weights_folder, folder) image_folder = os.path.join(folder_path, "samples") images = [f for f in os.listdir(image_folder)] for j, image in enumerate(images): if row == 1: currAxes = axes[j] else: currAxes = axes[i, j] if i == 0: currAxes.set_title(f"Image {j}") if j == 0: currAxes.text(-0.1, 0.5, folder, rotation=0, va='center', ha='center', transform=currAxes.transAxes) image_path = os.path.join(image_folder, image) img = mpimg.imread(image_path) currAxes.imshow(img, cmap='gray') currAxes.axis('off') plt.tight_layout() plt.savefig('grid.png', dpi=72)
Convert weights to ckpt to use in web UIs like AUTOMATIC1111.
In [ ]:
#@markdown Run conversion. ckpt_path = WEIGHTS_DIR + "/model.ckpt" half_arg = "" #@markdown Whether to convert to fp16, takes half the space (2GB). fp16 = True #@param {type: "boolean"} if fp16: half_arg = "--half" !python convert_diffusers_to_original_stable_diffusion.py --model_path $WEIGHTS_DIR --checkpoint_path $ckpt_path $half_arg print(f"[*] Converted ckpt saved at {ckpt_path}")
Inference
In [ ]:
import torch from torch import autocast from diffusers import StableDiffusionPipeline, DDIMScheduler from IPython.display import display model_path = WEIGHTS_DIR # If you want to use previously trained model saved in gdrive, replace this with the full path of model in gdrive pipe = StableDiffusionPipeline.from_pretrained(model_path, safety_checker=None, torch_dtype=torch.float16).to("cuda") pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config) pipe.enable_xformers_memory_efficient_attention() g_cuda = None
In [ ]:
#@markdown Can set random seed here for reproducibility. g_cuda = torch.Generator(device='cuda') seed = 52362 #@param {type:"number"} g_cuda.manual_seed(seed)
<torch._C.Generator at 0x7f84dc8ed450>
In [ ]:
#@title Run for generating images. prompt = "photo of zwx dog in a bucket" #@param {type:"string"} negative_prompt = "" #@param {type:"string"} num_samples = 4 #@param {type:"number"} guidance_scale = 7.5 #@param {type:"number"} num_inference_steps = 24 #@param {type:"number"} height = 512 #@param {type:"number"} width = 512 #@param {type:"number"} with autocast("cuda"), torch.inference_mode(): images = pipe( prompt, height=height, width=width, negative_prompt=negative_prompt, num_images_per_prompt=num_samples, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, generator=g_cuda ).images for img in images: display(img)
0%| | 0/50 [00:00<?, ?it/s]
In [ ]:
#@markdown Run Gradio UI for generating images. import gradio as gr def inference(prompt, negative_prompt, num_samples, height=512, width=512, num_inference_steps=50, guidance_scale=7.5): with torch.autocast("cuda"), torch.inference_mode(): return pipe( prompt, height=int(height), width=int(width), negative_prompt=negative_prompt, num_images_per_prompt=int(num_samples), num_inference_steps=int(num_inference_steps), guidance_scale=guidance_scale, generator=g_cuda ).images with gr.Blocks() as demo: with gr.Row(): with gr.Column(): prompt = gr.Textbox(label="Prompt", value="photo of zwx dog in a bucket") negative_prompt = gr.Textbox(label="Negative Prompt", value="") run = gr.Button(value="Generate") with gr.Row(): num_samples = gr.Number(label="Number of Samples", value=4) guidance_scale = gr.Number(label="Guidance Scale", value=7.5) with gr.Row(): height = gr.Number(label="Height", value=512) width = gr.Number(label="Width", value=512) num_inference_steps = gr.Slider(label="Steps", value=24) with gr.Column(): gallery = gr.Gallery() run.click(inference, inputs=[prompt, negative_prompt, num_samples, height, width, num_inference_steps, guidance_scale], outputs=gallery) demo.launch(debug=True)
In [ ]:
#@title (Optional) Delete diffuser and old weights and only keep the ckpt to free up drive space. #@markdown [ ! ] Caution, Only execute if you are sure u want to delete the diffuser format weights and only use the ckpt. import shutil from glob import glob import os for f in glob(OUTPUT_DIR+os.sep+"*"): if f != WEIGHTS_DIR: shutil.rmtree(f) print("Deleted", f) for f in glob(WEIGHTS_DIR+"/*"): if not f.endswith(".ckpt") or not f.endswith(".json"): try: shutil.rmtree(f) except NotADirectoryError: continue print("Deleted", f)
In [ ]:
#@title Free runtime memory exit()