Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
shivamshrirao
GitHub Repository: shivamshrirao/diffusers
Path: blob/main/tests/pipelines/stable_diffusion/test_stable_diffusion_panorama.py
1448 views
1
# coding=utf-8
2
# Copyright 2023 HuggingFace Inc.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
# http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
16
import gc
17
import unittest
18
19
import numpy as np
20
import torch
21
from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer
22
23
from diffusers import (
24
AutoencoderKL,
25
DDIMScheduler,
26
EulerAncestralDiscreteScheduler,
27
LMSDiscreteScheduler,
28
PNDMScheduler,
29
StableDiffusionPanoramaPipeline,
30
UNet2DConditionModel,
31
)
32
from diffusers.utils import slow, torch_device
33
from diffusers.utils.testing_utils import require_torch_gpu, skip_mps
34
35
from ...pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_PARAMS
36
from ...test_pipelines_common import PipelineTesterMixin
37
38
39
torch.backends.cuda.matmul.allow_tf32 = False
40
41
42
@skip_mps
43
class StableDiffusionPanoramaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
44
pipeline_class = StableDiffusionPanoramaPipeline
45
params = TEXT_TO_IMAGE_PARAMS
46
batch_params = TEXT_TO_IMAGE_BATCH_PARAMS
47
48
def get_dummy_components(self):
49
torch.manual_seed(0)
50
unet = UNet2DConditionModel(
51
block_out_channels=(32, 64),
52
layers_per_block=2,
53
sample_size=32,
54
in_channels=4,
55
out_channels=4,
56
down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
57
up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
58
cross_attention_dim=32,
59
)
60
scheduler = DDIMScheduler()
61
torch.manual_seed(0)
62
vae = AutoencoderKL(
63
block_out_channels=[32, 64],
64
in_channels=3,
65
out_channels=3,
66
down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
67
up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
68
latent_channels=4,
69
)
70
torch.manual_seed(0)
71
text_encoder_config = CLIPTextConfig(
72
bos_token_id=0,
73
eos_token_id=2,
74
hidden_size=32,
75
intermediate_size=37,
76
layer_norm_eps=1e-05,
77
num_attention_heads=4,
78
num_hidden_layers=5,
79
pad_token_id=1,
80
vocab_size=1000,
81
)
82
text_encoder = CLIPTextModel(text_encoder_config)
83
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
84
85
components = {
86
"unet": unet,
87
"scheduler": scheduler,
88
"vae": vae,
89
"text_encoder": text_encoder,
90
"tokenizer": tokenizer,
91
"safety_checker": None,
92
"feature_extractor": None,
93
}
94
return components
95
96
def get_dummy_inputs(self, device, seed=0):
97
generator = torch.manual_seed(seed)
98
inputs = {
99
"prompt": "a photo of the dolomites",
100
"generator": generator,
101
# Setting height and width to None to prevent OOMs on CPU.
102
"height": None,
103
"width": None,
104
"num_inference_steps": 2,
105
"guidance_scale": 6.0,
106
"output_type": "numpy",
107
}
108
return inputs
109
110
def test_stable_diffusion_panorama_default_case(self):
111
device = "cpu" # ensure determinism for the device-dependent torch.Generator
112
components = self.get_dummy_components()
113
sd_pipe = StableDiffusionPanoramaPipeline(**components)
114
sd_pipe = sd_pipe.to(device)
115
sd_pipe.set_progress_bar_config(disable=None)
116
117
inputs = self.get_dummy_inputs(device)
118
image = sd_pipe(**inputs).images
119
image_slice = image[0, -3:, -3:, -1]
120
assert image.shape == (1, 64, 64, 3)
121
122
expected_slice = np.array([0.5101, 0.5006, 0.4962, 0.3995, 0.3501, 0.4632, 0.5339, 0.525, 0.4878])
123
124
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
125
126
def test_stable_diffusion_panorama_negative_prompt(self):
127
device = "cpu" # ensure determinism for the device-dependent torch.Generator
128
components = self.get_dummy_components()
129
sd_pipe = StableDiffusionPanoramaPipeline(**components)
130
sd_pipe = sd_pipe.to(device)
131
sd_pipe.set_progress_bar_config(disable=None)
132
133
inputs = self.get_dummy_inputs(device)
134
negative_prompt = "french fries"
135
output = sd_pipe(**inputs, negative_prompt=negative_prompt)
136
image = output.images
137
image_slice = image[0, -3:, -3:, -1]
138
139
assert image.shape == (1, 64, 64, 3)
140
141
expected_slice = np.array([0.5326, 0.5009, 0.5074, 0.4133, 0.371, 0.464, 0.5432, 0.5429, 0.4896])
142
143
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
144
145
def test_stable_diffusion_panorama_euler(self):
146
device = "cpu" # ensure determinism for the device-dependent torch.Generator
147
components = self.get_dummy_components()
148
components["scheduler"] = EulerAncestralDiscreteScheduler(
149
beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear"
150
)
151
sd_pipe = StableDiffusionPanoramaPipeline(**components)
152
sd_pipe = sd_pipe.to(device)
153
sd_pipe.set_progress_bar_config(disable=None)
154
155
inputs = self.get_dummy_inputs(device)
156
image = sd_pipe(**inputs).images
157
image_slice = image[0, -3:, -3:, -1]
158
159
assert image.shape == (1, 64, 64, 3)
160
161
expected_slice = np.array(
162
[0.48235387, 0.5423796, 0.46016198, 0.5377287, 0.5803722, 0.4876525, 0.5515428, 0.5045897, 0.50709957]
163
)
164
165
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
166
167
def test_stable_diffusion_panorama_pndm(self):
168
device = "cpu" # ensure determinism for the device-dependent torch.Generator
169
components = self.get_dummy_components()
170
components["scheduler"] = PNDMScheduler()
171
sd_pipe = StableDiffusionPanoramaPipeline(**components)
172
sd_pipe = sd_pipe.to(device)
173
sd_pipe.set_progress_bar_config(disable=None)
174
175
inputs = self.get_dummy_inputs(device)
176
# the pipeline does not expect pndm so test if it raises error.
177
with self.assertRaises(ValueError):
178
_ = sd_pipe(**inputs).images
179
180
181
@slow
182
@require_torch_gpu
183
class StableDiffusionPanoramaSlowTests(unittest.TestCase):
184
def tearDown(self):
185
super().tearDown()
186
gc.collect()
187
torch.cuda.empty_cache()
188
189
def get_inputs(self, seed=0):
190
generator = torch.manual_seed(seed)
191
inputs = {
192
"prompt": "a photo of the dolomites",
193
"generator": generator,
194
"num_inference_steps": 3,
195
"guidance_scale": 7.5,
196
"output_type": "numpy",
197
}
198
return inputs
199
200
def test_stable_diffusion_panorama_default(self):
201
model_ckpt = "stabilityai/stable-diffusion-2-base"
202
scheduler = DDIMScheduler.from_pretrained(model_ckpt, subfolder="scheduler")
203
pipe = StableDiffusionPanoramaPipeline.from_pretrained(model_ckpt, scheduler=scheduler, safety_checker=None)
204
pipe.to(torch_device)
205
pipe.set_progress_bar_config(disable=None)
206
pipe.enable_attention_slicing()
207
208
inputs = self.get_inputs()
209
image = pipe(**inputs).images
210
image_slice = image[0, -3:, -3:, -1].flatten()
211
212
assert image.shape == (1, 512, 2048, 3)
213
214
expected_slice = np.array(
215
[
216
0.36968392,
217
0.27025372,
218
0.32446766,
219
0.28379387,
220
0.36363274,
221
0.30733347,
222
0.27100027,
223
0.27054125,
224
0.25536096,
225
]
226
)
227
228
assert np.abs(expected_slice - image_slice).max() < 1e-2
229
230
def test_stable_diffusion_panorama_k_lms(self):
231
pipe = StableDiffusionPanoramaPipeline.from_pretrained(
232
"stabilityai/stable-diffusion-2-base", safety_checker=None
233
)
234
pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)
235
pipe.to(torch_device)
236
pipe.set_progress_bar_config(disable=None)
237
pipe.enable_attention_slicing()
238
239
inputs = self.get_inputs()
240
image = pipe(**inputs).images
241
image_slice = image[0, -3:, -3:, -1].flatten()
242
243
assert image.shape == (1, 512, 2048, 3)
244
245
expected_slice = np.array(
246
[
247
[
248
0.0,
249
0.0,
250
0.0,
251
0.0,
252
0.0,
253
0.0,
254
0.0,
255
0.0,
256
0.0,
257
]
258
]
259
)
260
261
assert np.abs(expected_slice - image_slice).max() < 1e-3
262
263
def test_stable_diffusion_panorama_intermediate_state(self):
264
number_of_steps = 0
265
266
def callback_fn(step: int, timestep: int, latents: torch.FloatTensor) -> None:
267
callback_fn.has_been_called = True
268
nonlocal number_of_steps
269
number_of_steps += 1
270
if step == 1:
271
latents = latents.detach().cpu().numpy()
272
assert latents.shape == (1, 4, 64, 256)
273
latents_slice = latents[0, -3:, -3:, -1]
274
275
expected_slice = np.array(
276
[
277
0.18681869,
278
0.33907816,
279
0.5361276,
280
0.14432865,
281
-0.02856611,
282
-0.73941123,
283
0.23397987,
284
0.47322682,
285
-0.37823164,
286
]
287
)
288
assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2
289
elif step == 2:
290
latents = latents.detach().cpu().numpy()
291
assert latents.shape == (1, 4, 64, 256)
292
latents_slice = latents[0, -3:, -3:, -1]
293
294
expected_slice = np.array(
295
[
296
0.18539645,
297
0.33987248,
298
0.5378559,
299
0.14437142,
300
-0.02455261,
301
-0.7338317,
302
0.23990755,
303
0.47356272,
304
-0.3786505,
305
]
306
)
307
308
assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2
309
310
callback_fn.has_been_called = False
311
312
model_ckpt = "stabilityai/stable-diffusion-2-base"
313
scheduler = DDIMScheduler.from_pretrained(model_ckpt, subfolder="scheduler")
314
pipe = StableDiffusionPanoramaPipeline.from_pretrained(model_ckpt, scheduler=scheduler, safety_checker=None)
315
pipe = pipe.to(torch_device)
316
pipe.set_progress_bar_config(disable=None)
317
pipe.enable_attention_slicing()
318
319
inputs = self.get_inputs()
320
pipe(**inputs, callback=callback_fn, callback_steps=1)
321
assert callback_fn.has_been_called
322
assert number_of_steps == 3
323
324
def test_stable_diffusion_panorama_pipeline_with_sequential_cpu_offloading(self):
325
torch.cuda.empty_cache()
326
torch.cuda.reset_max_memory_allocated()
327
torch.cuda.reset_peak_memory_stats()
328
329
model_ckpt = "stabilityai/stable-diffusion-2-base"
330
scheduler = DDIMScheduler.from_pretrained(model_ckpt, subfolder="scheduler")
331
pipe = StableDiffusionPanoramaPipeline.from_pretrained(model_ckpt, scheduler=scheduler, safety_checker=None)
332
pipe = pipe.to(torch_device)
333
pipe.set_progress_bar_config(disable=None)
334
pipe.enable_attention_slicing(1)
335
pipe.enable_sequential_cpu_offload()
336
337
inputs = self.get_inputs()
338
_ = pipe(**inputs)
339
340
mem_bytes = torch.cuda.max_memory_allocated()
341
# make sure that less than 5.2 GB is allocated
342
assert mem_bytes < 5.5 * 10**9
343
344