Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
shivamshrirao
GitHub Repository: shivamshrirao/diffusers
Path: blob/main/tests/pipelines/stable_diffusion/test_stable_diffusion_model_editing.py
1450 views
1
# coding=utf-8
2
# Copyright 2023 HuggingFace Inc.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
# http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
16
import gc
17
import unittest
18
19
import numpy as np
20
import torch
21
from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer
22
23
from diffusers import (
24
AutoencoderKL,
25
DDIMScheduler,
26
EulerAncestralDiscreteScheduler,
27
PNDMScheduler,
28
StableDiffusionModelEditingPipeline,
29
UNet2DConditionModel,
30
)
31
from diffusers.utils import slow, torch_device
32
from diffusers.utils.testing_utils import require_torch_gpu, skip_mps
33
34
from ...pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_PARAMS
35
from ...test_pipelines_common import PipelineTesterMixin
36
37
38
torch.backends.cuda.matmul.allow_tf32 = False
39
40
41
@skip_mps
42
class StableDiffusionModelEditingPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
43
pipeline_class = StableDiffusionModelEditingPipeline
44
params = TEXT_TO_IMAGE_PARAMS
45
batch_params = TEXT_TO_IMAGE_BATCH_PARAMS
46
47
def get_dummy_components(self):
48
torch.manual_seed(0)
49
unet = UNet2DConditionModel(
50
block_out_channels=(32, 64),
51
layers_per_block=2,
52
sample_size=32,
53
in_channels=4,
54
out_channels=4,
55
down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
56
up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
57
cross_attention_dim=32,
58
)
59
scheduler = DDIMScheduler()
60
torch.manual_seed(0)
61
vae = AutoencoderKL(
62
block_out_channels=[32, 64],
63
in_channels=3,
64
out_channels=3,
65
down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
66
up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
67
latent_channels=4,
68
)
69
torch.manual_seed(0)
70
text_encoder_config = CLIPTextConfig(
71
bos_token_id=0,
72
eos_token_id=2,
73
hidden_size=32,
74
intermediate_size=37,
75
layer_norm_eps=1e-05,
76
num_attention_heads=4,
77
num_hidden_layers=5,
78
pad_token_id=1,
79
vocab_size=1000,
80
)
81
text_encoder = CLIPTextModel(text_encoder_config)
82
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
83
84
components = {
85
"unet": unet,
86
"scheduler": scheduler,
87
"vae": vae,
88
"text_encoder": text_encoder,
89
"tokenizer": tokenizer,
90
"safety_checker": None,
91
"feature_extractor": None,
92
}
93
return components
94
95
def get_dummy_inputs(self, device, seed=0):
96
generator = torch.manual_seed(seed)
97
inputs = {
98
"prompt": "A field of roses",
99
"generator": generator,
100
# Setting height and width to None to prevent OOMs on CPU.
101
"height": None,
102
"width": None,
103
"num_inference_steps": 2,
104
"guidance_scale": 6.0,
105
"output_type": "numpy",
106
}
107
return inputs
108
109
def test_stable_diffusion_model_editing_default_case(self):
110
device = "cpu" # ensure determinism for the device-dependent torch.Generator
111
components = self.get_dummy_components()
112
sd_pipe = StableDiffusionModelEditingPipeline(**components)
113
sd_pipe = sd_pipe.to(device)
114
sd_pipe.set_progress_bar_config(disable=None)
115
116
inputs = self.get_dummy_inputs(device)
117
image = sd_pipe(**inputs).images
118
image_slice = image[0, -3:, -3:, -1]
119
assert image.shape == (1, 64, 64, 3)
120
121
expected_slice = np.array(
122
[0.5217179, 0.50658035, 0.5003239, 0.41109088, 0.3595158, 0.46607107, 0.5323504, 0.5335255, 0.49187922]
123
)
124
125
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
126
127
def test_stable_diffusion_model_editing_negative_prompt(self):
128
device = "cpu" # ensure determinism for the device-dependent torch.Generator
129
components = self.get_dummy_components()
130
sd_pipe = StableDiffusionModelEditingPipeline(**components)
131
sd_pipe = sd_pipe.to(device)
132
sd_pipe.set_progress_bar_config(disable=None)
133
134
inputs = self.get_dummy_inputs(device)
135
negative_prompt = "french fries"
136
output = sd_pipe(**inputs, negative_prompt=negative_prompt)
137
image = output.images
138
image_slice = image[0, -3:, -3:, -1]
139
140
assert image.shape == (1, 64, 64, 3)
141
142
expected_slice = np.array(
143
[0.546259, 0.5108156, 0.50897664, 0.41931948, 0.3748669, 0.4669299, 0.5427151, 0.54561913, 0.49353]
144
)
145
146
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
147
148
def test_stable_diffusion_model_editing_euler(self):
149
device = "cpu" # ensure determinism for the device-dependent torch.Generator
150
components = self.get_dummy_components()
151
components["scheduler"] = EulerAncestralDiscreteScheduler(
152
beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear"
153
)
154
sd_pipe = StableDiffusionModelEditingPipeline(**components)
155
sd_pipe = sd_pipe.to(device)
156
sd_pipe.set_progress_bar_config(disable=None)
157
158
inputs = self.get_dummy_inputs(device)
159
image = sd_pipe(**inputs).images
160
image_slice = image[0, -3:, -3:, -1]
161
162
assert image.shape == (1, 64, 64, 3)
163
164
expected_slice = np.array(
165
[0.47106352, 0.53579676, 0.45798016, 0.514294, 0.56856745, 0.4788605, 0.54380214, 0.5046455, 0.50404465]
166
)
167
168
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
169
170
def test_stable_diffusion_model_editing_pndm(self):
171
device = "cpu" # ensure determinism for the device-dependent torch.Generator
172
components = self.get_dummy_components()
173
components["scheduler"] = PNDMScheduler()
174
sd_pipe = StableDiffusionModelEditingPipeline(**components)
175
sd_pipe = sd_pipe.to(device)
176
sd_pipe.set_progress_bar_config(disable=None)
177
178
inputs = self.get_dummy_inputs(device)
179
# the pipeline does not expect pndm so test if it raises error.
180
with self.assertRaises(ValueError):
181
_ = sd_pipe(**inputs).images
182
183
184
@slow
185
@require_torch_gpu
186
class StableDiffusionModelEditingSlowTests(unittest.TestCase):
187
def tearDown(self):
188
super().tearDown()
189
gc.collect()
190
torch.cuda.empty_cache()
191
192
def get_inputs(self, seed=0):
193
generator = torch.manual_seed(seed)
194
inputs = {
195
"prompt": "A field of roses",
196
"generator": generator,
197
"num_inference_steps": 3,
198
"guidance_scale": 7.5,
199
"output_type": "numpy",
200
}
201
return inputs
202
203
def test_stable_diffusion_model_editing_default(self):
204
model_ckpt = "CompVis/stable-diffusion-v1-4"
205
pipe = StableDiffusionModelEditingPipeline.from_pretrained(model_ckpt, safety_checker=None)
206
pipe.to(torch_device)
207
pipe.set_progress_bar_config(disable=None)
208
pipe.enable_attention_slicing()
209
210
inputs = self.get_inputs()
211
image = pipe(**inputs).images
212
image_slice = image[0, -3:, -3:, -1].flatten()
213
214
assert image.shape == (1, 512, 512, 3)
215
216
expected_slice = np.array(
217
[0.6749496, 0.6386453, 0.51443267, 0.66094905, 0.61921215, 0.5491332, 0.5744417, 0.58075106, 0.5174658]
218
)
219
220
assert np.abs(expected_slice - image_slice).max() < 1e-2
221
222
# make sure image changes after editing
223
pipe.edit_model("A pack of roses", "A pack of blue roses")
224
225
image = pipe(**inputs).images
226
image_slice = image[0, -3:, -3:, -1].flatten()
227
228
assert image.shape == (1, 512, 512, 3)
229
230
assert np.abs(expected_slice - image_slice).max() > 1e-1
231
232
def test_stable_diffusion_model_editing_pipeline_with_sequential_cpu_offloading(self):
233
torch.cuda.empty_cache()
234
torch.cuda.reset_max_memory_allocated()
235
torch.cuda.reset_peak_memory_stats()
236
237
model_ckpt = "CompVis/stable-diffusion-v1-4"
238
scheduler = DDIMScheduler.from_pretrained(model_ckpt, subfolder="scheduler")
239
pipe = StableDiffusionModelEditingPipeline.from_pretrained(
240
model_ckpt, scheduler=scheduler, safety_checker=None
241
)
242
pipe = pipe.to(torch_device)
243
pipe.set_progress_bar_config(disable=None)
244
pipe.enable_attention_slicing(1)
245
pipe.enable_sequential_cpu_offload()
246
247
inputs = self.get_inputs()
248
_ = pipe(**inputs)
249
250
mem_bytes = torch.cuda.max_memory_allocated()
251
# make sure that less than 4.4 GB is allocated
252
assert mem_bytes < 4.4 * 10**9
253
254