Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
shivamshrirao
GitHub Repository: shivamshrirao/diffusers
Path: blob/main/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
1451 views
1
# coding=utf-8
2
# Copyright 2023 HuggingFace Inc.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
# http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
16
import gc
17
import unittest
18
19
import numpy as np
20
import torch
21
from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer
22
23
from diffusers import (
24
AutoencoderKL,
25
DDIMScheduler,
26
DPMSolverMultistepScheduler,
27
EulerAncestralDiscreteScheduler,
28
EulerDiscreteScheduler,
29
LMSDiscreteScheduler,
30
PNDMScheduler,
31
StableDiffusionPipeline,
32
UNet2DConditionModel,
33
logging,
34
)
35
from diffusers.models.attention_processor import AttnProcessor
36
from diffusers.utils import load_numpy, nightly, slow, torch_device
37
from diffusers.utils.testing_utils import CaptureLogger, require_torch_gpu
38
39
from ...pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_PARAMS
40
from ...test_pipelines_common import PipelineTesterMixin
41
42
43
torch.backends.cuda.matmul.allow_tf32 = False
44
45
46
class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
47
pipeline_class = StableDiffusionPipeline
48
params = TEXT_TO_IMAGE_PARAMS
49
batch_params = TEXT_TO_IMAGE_BATCH_PARAMS
50
51
def get_dummy_components(self):
52
torch.manual_seed(0)
53
unet = UNet2DConditionModel(
54
block_out_channels=(32, 64),
55
layers_per_block=2,
56
sample_size=32,
57
in_channels=4,
58
out_channels=4,
59
down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
60
up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
61
cross_attention_dim=32,
62
# SD2-specific config below
63
attention_head_dim=(2, 4),
64
use_linear_projection=True,
65
)
66
scheduler = DDIMScheduler(
67
beta_start=0.00085,
68
beta_end=0.012,
69
beta_schedule="scaled_linear",
70
clip_sample=False,
71
set_alpha_to_one=False,
72
)
73
torch.manual_seed(0)
74
vae = AutoencoderKL(
75
block_out_channels=[32, 64],
76
in_channels=3,
77
out_channels=3,
78
down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
79
up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
80
latent_channels=4,
81
sample_size=128,
82
)
83
torch.manual_seed(0)
84
text_encoder_config = CLIPTextConfig(
85
bos_token_id=0,
86
eos_token_id=2,
87
hidden_size=32,
88
intermediate_size=37,
89
layer_norm_eps=1e-05,
90
num_attention_heads=4,
91
num_hidden_layers=5,
92
pad_token_id=1,
93
vocab_size=1000,
94
# SD2-specific config below
95
hidden_act="gelu",
96
projection_dim=512,
97
)
98
text_encoder = CLIPTextModel(text_encoder_config)
99
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
100
101
components = {
102
"unet": unet,
103
"scheduler": scheduler,
104
"vae": vae,
105
"text_encoder": text_encoder,
106
"tokenizer": tokenizer,
107
"safety_checker": None,
108
"feature_extractor": None,
109
}
110
return components
111
112
def get_dummy_inputs(self, device, seed=0):
113
if str(device).startswith("mps"):
114
generator = torch.manual_seed(seed)
115
else:
116
generator = torch.Generator(device=device).manual_seed(seed)
117
inputs = {
118
"prompt": "A painting of a squirrel eating a burger",
119
"generator": generator,
120
"num_inference_steps": 2,
121
"guidance_scale": 6.0,
122
"output_type": "numpy",
123
}
124
return inputs
125
126
def test_stable_diffusion_ddim(self):
127
device = "cpu" # ensure determinism for the device-dependent torch.Generator
128
components = self.get_dummy_components()
129
sd_pipe = StableDiffusionPipeline(**components)
130
sd_pipe = sd_pipe.to(device)
131
sd_pipe.set_progress_bar_config(disable=None)
132
133
inputs = self.get_dummy_inputs(device)
134
image = sd_pipe(**inputs).images
135
image_slice = image[0, -3:, -3:, -1]
136
137
assert image.shape == (1, 64, 64, 3)
138
expected_slice = np.array([0.5649, 0.6022, 0.4804, 0.5270, 0.5585, 0.4643, 0.5159, 0.4963, 0.4793])
139
140
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
141
142
def test_stable_diffusion_pndm(self):
143
device = "cpu" # ensure determinism for the device-dependent torch.Generator
144
components = self.get_dummy_components()
145
components["scheduler"] = PNDMScheduler(skip_prk_steps=True)
146
sd_pipe = StableDiffusionPipeline(**components)
147
sd_pipe = sd_pipe.to(device)
148
sd_pipe.set_progress_bar_config(disable=None)
149
150
inputs = self.get_dummy_inputs(device)
151
image = sd_pipe(**inputs).images
152
image_slice = image[0, -3:, -3:, -1]
153
154
assert image.shape == (1, 64, 64, 3)
155
expected_slice = np.array([0.5099, 0.5677, 0.4671, 0.5128, 0.5697, 0.4676, 0.5277, 0.4964, 0.4946])
156
157
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
158
159
def test_stable_diffusion_k_lms(self):
160
device = "cpu" # ensure determinism for the device-dependent torch.Generator
161
components = self.get_dummy_components()
162
components["scheduler"] = LMSDiscreteScheduler.from_config(components["scheduler"].config)
163
sd_pipe = StableDiffusionPipeline(**components)
164
sd_pipe = sd_pipe.to(device)
165
sd_pipe.set_progress_bar_config(disable=None)
166
167
inputs = self.get_dummy_inputs(device)
168
image = sd_pipe(**inputs).images
169
image_slice = image[0, -3:, -3:, -1]
170
171
assert image.shape == (1, 64, 64, 3)
172
expected_slice = np.array([0.4717, 0.5376, 0.4568, 0.5225, 0.5734, 0.4797, 0.5467, 0.5074, 0.5043])
173
174
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
175
176
def test_stable_diffusion_k_euler_ancestral(self):
177
device = "cpu" # ensure determinism for the device-dependent torch.Generator
178
components = self.get_dummy_components()
179
components["scheduler"] = EulerAncestralDiscreteScheduler.from_config(components["scheduler"].config)
180
sd_pipe = StableDiffusionPipeline(**components)
181
sd_pipe = sd_pipe.to(device)
182
sd_pipe.set_progress_bar_config(disable=None)
183
184
inputs = self.get_dummy_inputs(device)
185
image = sd_pipe(**inputs).images
186
image_slice = image[0, -3:, -3:, -1]
187
188
assert image.shape == (1, 64, 64, 3)
189
expected_slice = np.array([0.4715, 0.5376, 0.4569, 0.5224, 0.5734, 0.4797, 0.5465, 0.5074, 0.5046])
190
191
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
192
193
def test_stable_diffusion_k_euler(self):
194
device = "cpu" # ensure determinism for the device-dependent torch.Generator
195
components = self.get_dummy_components()
196
components["scheduler"] = EulerDiscreteScheduler.from_config(components["scheduler"].config)
197
sd_pipe = StableDiffusionPipeline(**components)
198
sd_pipe = sd_pipe.to(device)
199
sd_pipe.set_progress_bar_config(disable=None)
200
201
inputs = self.get_dummy_inputs(device)
202
image = sd_pipe(**inputs).images
203
image_slice = image[0, -3:, -3:, -1]
204
205
assert image.shape == (1, 64, 64, 3)
206
expected_slice = np.array([0.4717, 0.5376, 0.4568, 0.5225, 0.5734, 0.4797, 0.5467, 0.5074, 0.5043])
207
208
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
209
210
def test_stable_diffusion_long_prompt(self):
211
components = self.get_dummy_components()
212
components["scheduler"] = LMSDiscreteScheduler.from_config(components["scheduler"].config)
213
sd_pipe = StableDiffusionPipeline(**components)
214
sd_pipe = sd_pipe.to(torch_device)
215
sd_pipe.set_progress_bar_config(disable=None)
216
217
do_classifier_free_guidance = True
218
negative_prompt = None
219
num_images_per_prompt = 1
220
logger = logging.get_logger("diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion")
221
222
prompt = 25 * "@"
223
with CaptureLogger(logger) as cap_logger_3:
224
text_embeddings_3 = sd_pipe._encode_prompt(
225
prompt, torch_device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt
226
)
227
228
prompt = 100 * "@"
229
with CaptureLogger(logger) as cap_logger:
230
text_embeddings = sd_pipe._encode_prompt(
231
prompt, torch_device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt
232
)
233
234
negative_prompt = "Hello"
235
with CaptureLogger(logger) as cap_logger_2:
236
text_embeddings_2 = sd_pipe._encode_prompt(
237
prompt, torch_device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt
238
)
239
240
assert text_embeddings_3.shape == text_embeddings_2.shape == text_embeddings.shape
241
assert text_embeddings.shape[1] == 77
242
243
assert cap_logger.out == cap_logger_2.out
244
# 100 - 77 + 1 (BOS token) + 1 (EOS token) = 25
245
assert cap_logger.out.count("@") == 25
246
assert cap_logger_3.out == ""
247
248
249
@slow
250
@require_torch_gpu
251
class StableDiffusion2PipelineSlowTests(unittest.TestCase):
252
def tearDown(self):
253
super().tearDown()
254
gc.collect()
255
torch.cuda.empty_cache()
256
257
def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
258
generator = torch.Generator(device=generator_device).manual_seed(seed)
259
latents = np.random.RandomState(seed).standard_normal((1, 4, 64, 64))
260
latents = torch.from_numpy(latents).to(device=device, dtype=dtype)
261
inputs = {
262
"prompt": "a photograph of an astronaut riding a horse",
263
"latents": latents,
264
"generator": generator,
265
"num_inference_steps": 3,
266
"guidance_scale": 7.5,
267
"output_type": "numpy",
268
}
269
return inputs
270
271
def test_stable_diffusion_default_ddim(self):
272
pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-base")
273
pipe.to(torch_device)
274
pipe.set_progress_bar_config(disable=None)
275
276
inputs = self.get_inputs(torch_device)
277
image = pipe(**inputs).images
278
image_slice = image[0, -3:, -3:, -1].flatten()
279
280
assert image.shape == (1, 512, 512, 3)
281
expected_slice = np.array([0.49493, 0.47896, 0.40798, 0.54214, 0.53212, 0.48202, 0.47656, 0.46329, 0.48506])
282
assert np.abs(image_slice - expected_slice).max() < 1e-4
283
284
def test_stable_diffusion_pndm(self):
285
pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-base")
286
pipe.scheduler = PNDMScheduler.from_config(pipe.scheduler.config)
287
pipe.to(torch_device)
288
pipe.set_progress_bar_config(disable=None)
289
290
inputs = self.get_inputs(torch_device)
291
image = pipe(**inputs).images
292
image_slice = image[0, -3:, -3:, -1].flatten()
293
294
assert image.shape == (1, 512, 512, 3)
295
expected_slice = np.array([0.49493, 0.47896, 0.40798, 0.54214, 0.53212, 0.48202, 0.47656, 0.46329, 0.48506])
296
assert np.abs(image_slice - expected_slice).max() < 1e-4
297
298
def test_stable_diffusion_k_lms(self):
299
pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-base")
300
pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)
301
pipe.to(torch_device)
302
pipe.set_progress_bar_config(disable=None)
303
304
inputs = self.get_inputs(torch_device)
305
image = pipe(**inputs).images
306
image_slice = image[0, -3:, -3:, -1].flatten()
307
308
assert image.shape == (1, 512, 512, 3)
309
expected_slice = np.array([0.10440, 0.13115, 0.11100, 0.10141, 0.11440, 0.07215, 0.11332, 0.09693, 0.10006])
310
assert np.abs(image_slice - expected_slice).max() < 1e-4
311
312
def test_stable_diffusion_attention_slicing(self):
313
torch.cuda.reset_peak_memory_stats()
314
pipe = StableDiffusionPipeline.from_pretrained(
315
"stabilityai/stable-diffusion-2-base", torch_dtype=torch.float16
316
)
317
pipe = pipe.to(torch_device)
318
pipe.set_progress_bar_config(disable=None)
319
320
# enable attention slicing
321
pipe.enable_attention_slicing()
322
inputs = self.get_inputs(torch_device, dtype=torch.float16)
323
image_sliced = pipe(**inputs).images
324
325
mem_bytes = torch.cuda.max_memory_allocated()
326
torch.cuda.reset_peak_memory_stats()
327
# make sure that less than 3.3 GB is allocated
328
assert mem_bytes < 3.3 * 10**9
329
330
# disable slicing
331
pipe.disable_attention_slicing()
332
inputs = self.get_inputs(torch_device, dtype=torch.float16)
333
image = pipe(**inputs).images
334
335
# make sure that more than 3.3 GB is allocated
336
mem_bytes = torch.cuda.max_memory_allocated()
337
assert mem_bytes > 3.3 * 10**9
338
assert np.abs(image_sliced - image).max() < 1e-3
339
340
def test_stable_diffusion_text2img_intermediate_state(self):
341
number_of_steps = 0
342
343
def callback_fn(step: int, timestep: int, latents: torch.FloatTensor) -> None:
344
callback_fn.has_been_called = True
345
nonlocal number_of_steps
346
number_of_steps += 1
347
if step == 1:
348
latents = latents.detach().cpu().numpy()
349
assert latents.shape == (1, 4, 64, 64)
350
latents_slice = latents[0, -3:, -3:, -1]
351
expected_slice = np.array(
352
[-0.3862, -0.4507, -1.1729, 0.0686, -1.1045, 0.7124, -1.8301, 0.1903, 1.2773]
353
)
354
355
assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2
356
elif step == 2:
357
latents = latents.detach().cpu().numpy()
358
assert latents.shape == (1, 4, 64, 64)
359
latents_slice = latents[0, -3:, -3:, -1]
360
expected_slice = np.array(
361
[0.2720, -0.1863, -0.7383, -0.5029, -0.7534, 0.3970, -0.7646, 0.4468, 1.2686]
362
)
363
364
assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2
365
366
callback_fn.has_been_called = False
367
368
pipe = StableDiffusionPipeline.from_pretrained(
369
"stabilityai/stable-diffusion-2-base", torch_dtype=torch.float16
370
)
371
pipe = pipe.to(torch_device)
372
pipe.set_progress_bar_config(disable=None)
373
pipe.enable_attention_slicing()
374
375
inputs = self.get_inputs(torch_device, dtype=torch.float16)
376
pipe(**inputs, callback=callback_fn, callback_steps=1)
377
assert callback_fn.has_been_called
378
assert number_of_steps == inputs["num_inference_steps"]
379
380
def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self):
381
torch.cuda.empty_cache()
382
torch.cuda.reset_max_memory_allocated()
383
torch.cuda.reset_peak_memory_stats()
384
385
pipe = StableDiffusionPipeline.from_pretrained(
386
"stabilityai/stable-diffusion-2-base", torch_dtype=torch.float16
387
)
388
pipe = pipe.to(torch_device)
389
pipe.set_progress_bar_config(disable=None)
390
pipe.enable_attention_slicing(1)
391
pipe.enable_sequential_cpu_offload()
392
393
inputs = self.get_inputs(torch_device, dtype=torch.float16)
394
_ = pipe(**inputs)
395
396
mem_bytes = torch.cuda.max_memory_allocated()
397
# make sure that less than 2.8 GB is allocated
398
assert mem_bytes < 2.8 * 10**9
399
400
def test_stable_diffusion_pipeline_with_model_offloading(self):
401
torch.cuda.empty_cache()
402
torch.cuda.reset_max_memory_allocated()
403
torch.cuda.reset_peak_memory_stats()
404
405
inputs = self.get_inputs(torch_device, dtype=torch.float16)
406
407
# Normal inference
408
409
pipe = StableDiffusionPipeline.from_pretrained(
410
"stabilityai/stable-diffusion-2-base",
411
torch_dtype=torch.float16,
412
)
413
pipe.unet.set_attn_processor(AttnProcessor())
414
pipe.to(torch_device)
415
pipe.set_progress_bar_config(disable=None)
416
outputs = pipe(**inputs)
417
mem_bytes = torch.cuda.max_memory_allocated()
418
419
# With model offloading
420
421
# Reload but don't move to cuda
422
pipe = StableDiffusionPipeline.from_pretrained(
423
"stabilityai/stable-diffusion-2-base",
424
torch_dtype=torch.float16,
425
)
426
pipe.unet.set_attn_processor(AttnProcessor())
427
428
torch.cuda.empty_cache()
429
torch.cuda.reset_max_memory_allocated()
430
torch.cuda.reset_peak_memory_stats()
431
432
pipe.enable_model_cpu_offload()
433
pipe.set_progress_bar_config(disable=None)
434
inputs = self.get_inputs(torch_device, dtype=torch.float16)
435
outputs_offloaded = pipe(**inputs)
436
mem_bytes_offloaded = torch.cuda.max_memory_allocated()
437
438
assert np.abs(outputs.images - outputs_offloaded.images).max() < 1e-3
439
assert mem_bytes_offloaded < mem_bytes
440
assert mem_bytes_offloaded < 3 * 10**9
441
for module in pipe.text_encoder, pipe.unet, pipe.vae:
442
assert module.device == torch.device("cpu")
443
444
# With attention slicing
445
torch.cuda.empty_cache()
446
torch.cuda.reset_max_memory_allocated()
447
torch.cuda.reset_peak_memory_stats()
448
449
pipe.enable_attention_slicing()
450
_ = pipe(**inputs)
451
mem_bytes_slicing = torch.cuda.max_memory_allocated()
452
assert mem_bytes_slicing < mem_bytes_offloaded
453
454
455
@nightly
456
@require_torch_gpu
457
class StableDiffusion2PipelineNightlyTests(unittest.TestCase):
458
def tearDown(self):
459
super().tearDown()
460
gc.collect()
461
torch.cuda.empty_cache()
462
463
def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
464
generator = torch.Generator(device=generator_device).manual_seed(seed)
465
latents = np.random.RandomState(seed).standard_normal((1, 4, 64, 64))
466
latents = torch.from_numpy(latents).to(device=device, dtype=dtype)
467
inputs = {
468
"prompt": "a photograph of an astronaut riding a horse",
469
"latents": latents,
470
"generator": generator,
471
"num_inference_steps": 50,
472
"guidance_scale": 7.5,
473
"output_type": "numpy",
474
}
475
return inputs
476
477
def test_stable_diffusion_2_0_default_ddim(self):
478
sd_pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-base").to(torch_device)
479
sd_pipe.set_progress_bar_config(disable=None)
480
481
inputs = self.get_inputs(torch_device)
482
image = sd_pipe(**inputs).images[0]
483
484
expected_image = load_numpy(
485
"https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
486
"/stable_diffusion_2_text2img/stable_diffusion_2_0_base_ddim.npy"
487
)
488
max_diff = np.abs(expected_image - image).max()
489
assert max_diff < 1e-3
490
491
def test_stable_diffusion_2_1_default_pndm(self):
492
sd_pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base").to(torch_device)
493
sd_pipe.set_progress_bar_config(disable=None)
494
495
inputs = self.get_inputs(torch_device)
496
image = sd_pipe(**inputs).images[0]
497
498
expected_image = load_numpy(
499
"https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
500
"/stable_diffusion_2_text2img/stable_diffusion_2_1_base_pndm.npy"
501
)
502
max_diff = np.abs(expected_image - image).max()
503
assert max_diff < 1e-3
504
505
def test_stable_diffusion_ddim(self):
506
sd_pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base").to(torch_device)
507
sd_pipe.scheduler = DDIMScheduler.from_config(sd_pipe.scheduler.config)
508
sd_pipe.set_progress_bar_config(disable=None)
509
510
inputs = self.get_inputs(torch_device)
511
image = sd_pipe(**inputs).images[0]
512
513
expected_image = load_numpy(
514
"https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
515
"/stable_diffusion_2_text2img/stable_diffusion_2_1_base_ddim.npy"
516
)
517
max_diff = np.abs(expected_image - image).max()
518
assert max_diff < 1e-3
519
520
def test_stable_diffusion_lms(self):
521
sd_pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base").to(torch_device)
522
sd_pipe.scheduler = LMSDiscreteScheduler.from_config(sd_pipe.scheduler.config)
523
sd_pipe.set_progress_bar_config(disable=None)
524
525
inputs = self.get_inputs(torch_device)
526
image = sd_pipe(**inputs).images[0]
527
528
expected_image = load_numpy(
529
"https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
530
"/stable_diffusion_2_text2img/stable_diffusion_2_1_base_lms.npy"
531
)
532
max_diff = np.abs(expected_image - image).max()
533
assert max_diff < 1e-3
534
535
def test_stable_diffusion_euler(self):
536
sd_pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base").to(torch_device)
537
sd_pipe.scheduler = EulerDiscreteScheduler.from_config(sd_pipe.scheduler.config)
538
sd_pipe.set_progress_bar_config(disable=None)
539
540
inputs = self.get_inputs(torch_device)
541
image = sd_pipe(**inputs).images[0]
542
543
expected_image = load_numpy(
544
"https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
545
"/stable_diffusion_2_text2img/stable_diffusion_2_1_base_euler.npy"
546
)
547
max_diff = np.abs(expected_image - image).max()
548
assert max_diff < 1e-3
549
550
def test_stable_diffusion_dpm(self):
551
sd_pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base").to(torch_device)
552
sd_pipe.scheduler = DPMSolverMultistepScheduler.from_config(sd_pipe.scheduler.config)
553
sd_pipe.set_progress_bar_config(disable=None)
554
555
inputs = self.get_inputs(torch_device)
556
inputs["num_inference_steps"] = 25
557
image = sd_pipe(**inputs).images[0]
558
559
expected_image = load_numpy(
560
"https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
561
"/stable_diffusion_2_text2img/stable_diffusion_2_1_base_dpm_multi.npy"
562
)
563
max_diff = np.abs(expected_image - image).max()
564
assert max_diff < 1e-3
565
566