Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
shivamshrirao
GitHub Repository: shivamshrirao/diffusers
Path: blob/main/tests/pipelines/stable_diffusion_2/test_stable_diffusion_depth.py
1448 views
1
# coding=utf-8
2
# Copyright 2023 HuggingFace Inc.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
# http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
16
import gc
17
import random
18
import tempfile
19
import unittest
20
21
import numpy as np
22
import torch
23
from PIL import Image
24
from transformers import (
25
CLIPTextConfig,
26
CLIPTextModel,
27
CLIPTokenizer,
28
DPTConfig,
29
DPTFeatureExtractor,
30
DPTForDepthEstimation,
31
)
32
33
from diffusers import (
34
AutoencoderKL,
35
DDIMScheduler,
36
DPMSolverMultistepScheduler,
37
LMSDiscreteScheduler,
38
PNDMScheduler,
39
StableDiffusionDepth2ImgPipeline,
40
UNet2DConditionModel,
41
)
42
from diffusers.utils import (
43
floats_tensor,
44
is_accelerate_available,
45
is_accelerate_version,
46
load_image,
47
load_numpy,
48
nightly,
49
slow,
50
torch_device,
51
)
52
from diffusers.utils.testing_utils import require_torch_gpu, skip_mps
53
54
from ...pipeline_params import TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS, TEXT_GUIDED_IMAGE_VARIATION_PARAMS
55
from ...test_pipelines_common import PipelineTesterMixin
56
57
58
torch.backends.cuda.matmul.allow_tf32 = False
59
60
61
@skip_mps
62
class StableDiffusionDepth2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
63
pipeline_class = StableDiffusionDepth2ImgPipeline
64
test_save_load_optional_components = False
65
params = TEXT_GUIDED_IMAGE_VARIATION_PARAMS - {"height", "width"}
66
required_optional_params = PipelineTesterMixin.required_optional_params - {"latents"}
67
batch_params = TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS
68
69
def get_dummy_components(self):
70
torch.manual_seed(0)
71
unet = UNet2DConditionModel(
72
block_out_channels=(32, 64),
73
layers_per_block=2,
74
sample_size=32,
75
in_channels=5,
76
out_channels=4,
77
down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
78
up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
79
cross_attention_dim=32,
80
attention_head_dim=(2, 4),
81
use_linear_projection=True,
82
)
83
scheduler = PNDMScheduler(skip_prk_steps=True)
84
torch.manual_seed(0)
85
vae = AutoencoderKL(
86
block_out_channels=[32, 64],
87
in_channels=3,
88
out_channels=3,
89
down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
90
up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
91
latent_channels=4,
92
)
93
torch.manual_seed(0)
94
text_encoder_config = CLIPTextConfig(
95
bos_token_id=0,
96
eos_token_id=2,
97
hidden_size=32,
98
intermediate_size=37,
99
layer_norm_eps=1e-05,
100
num_attention_heads=4,
101
num_hidden_layers=5,
102
pad_token_id=1,
103
vocab_size=1000,
104
)
105
text_encoder = CLIPTextModel(text_encoder_config)
106
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
107
108
backbone_config = {
109
"global_padding": "same",
110
"layer_type": "bottleneck",
111
"depths": [3, 4, 9],
112
"out_features": ["stage1", "stage2", "stage3"],
113
"embedding_dynamic_padding": True,
114
"hidden_sizes": [96, 192, 384, 768],
115
"num_groups": 2,
116
}
117
depth_estimator_config = DPTConfig(
118
image_size=32,
119
patch_size=16,
120
num_channels=3,
121
hidden_size=32,
122
num_hidden_layers=4,
123
backbone_out_indices=(0, 1, 2, 3),
124
num_attention_heads=4,
125
intermediate_size=37,
126
hidden_act="gelu",
127
hidden_dropout_prob=0.1,
128
attention_probs_dropout_prob=0.1,
129
is_decoder=False,
130
initializer_range=0.02,
131
is_hybrid=True,
132
backbone_config=backbone_config,
133
backbone_featmap_shape=[1, 384, 24, 24],
134
)
135
depth_estimator = DPTForDepthEstimation(depth_estimator_config)
136
feature_extractor = DPTFeatureExtractor.from_pretrained(
137
"hf-internal-testing/tiny-random-DPTForDepthEstimation"
138
)
139
140
components = {
141
"unet": unet,
142
"scheduler": scheduler,
143
"vae": vae,
144
"text_encoder": text_encoder,
145
"tokenizer": tokenizer,
146
"depth_estimator": depth_estimator,
147
"feature_extractor": feature_extractor,
148
}
149
return components
150
151
def get_dummy_inputs(self, device, seed=0):
152
image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed))
153
image = image.cpu().permute(0, 2, 3, 1)[0]
154
image = Image.fromarray(np.uint8(image)).convert("RGB").resize((32, 32))
155
if str(device).startswith("mps"):
156
generator = torch.manual_seed(seed)
157
else:
158
generator = torch.Generator(device=device).manual_seed(seed)
159
inputs = {
160
"prompt": "A painting of a squirrel eating a burger",
161
"image": image,
162
"generator": generator,
163
"num_inference_steps": 2,
164
"guidance_scale": 6.0,
165
"output_type": "numpy",
166
}
167
return inputs
168
169
def test_save_load_local(self):
170
components = self.get_dummy_components()
171
pipe = self.pipeline_class(**components)
172
pipe.to(torch_device)
173
pipe.set_progress_bar_config(disable=None)
174
175
inputs = self.get_dummy_inputs(torch_device)
176
output = pipe(**inputs)[0]
177
178
with tempfile.TemporaryDirectory() as tmpdir:
179
pipe.save_pretrained(tmpdir)
180
pipe_loaded = self.pipeline_class.from_pretrained(tmpdir)
181
pipe_loaded.to(torch_device)
182
pipe_loaded.set_progress_bar_config(disable=None)
183
184
inputs = self.get_dummy_inputs(torch_device)
185
output_loaded = pipe_loaded(**inputs)[0]
186
187
max_diff = np.abs(output - output_loaded).max()
188
self.assertLess(max_diff, 1e-4)
189
190
@unittest.skipIf(torch_device != "cuda", reason="float16 requires CUDA")
191
def test_save_load_float16(self):
192
components = self.get_dummy_components()
193
for name, module in components.items():
194
if hasattr(module, "half"):
195
components[name] = module.to(torch_device).half()
196
pipe = self.pipeline_class(**components)
197
pipe.to(torch_device)
198
pipe.set_progress_bar_config(disable=None)
199
200
inputs = self.get_dummy_inputs(torch_device)
201
output = pipe(**inputs)[0]
202
203
with tempfile.TemporaryDirectory() as tmpdir:
204
pipe.save_pretrained(tmpdir)
205
pipe_loaded = self.pipeline_class.from_pretrained(tmpdir, torch_dtype=torch.float16)
206
pipe_loaded.to(torch_device)
207
pipe_loaded.set_progress_bar_config(disable=None)
208
209
for name, component in pipe_loaded.components.items():
210
if hasattr(component, "dtype"):
211
self.assertTrue(
212
component.dtype == torch.float16,
213
f"`{name}.dtype` switched from `float16` to {component.dtype} after loading.",
214
)
215
216
inputs = self.get_dummy_inputs(torch_device)
217
output_loaded = pipe_loaded(**inputs)[0]
218
219
max_diff = np.abs(output - output_loaded).max()
220
self.assertLess(max_diff, 2e-2, "The output of the fp16 pipeline changed after saving and loading.")
221
222
@unittest.skipIf(torch_device != "cuda", reason="float16 requires CUDA")
223
def test_float16_inference(self):
224
components = self.get_dummy_components()
225
pipe = self.pipeline_class(**components)
226
pipe.to(torch_device)
227
pipe.set_progress_bar_config(disable=None)
228
229
for name, module in components.items():
230
if hasattr(module, "half"):
231
components[name] = module.half()
232
pipe_fp16 = self.pipeline_class(**components)
233
pipe_fp16.to(torch_device)
234
pipe_fp16.set_progress_bar_config(disable=None)
235
236
output = pipe(**self.get_dummy_inputs(torch_device))[0]
237
output_fp16 = pipe_fp16(**self.get_dummy_inputs(torch_device))[0]
238
239
max_diff = np.abs(output - output_fp16).max()
240
self.assertLess(max_diff, 1.3e-2, "The outputs of the fp16 and fp32 pipelines are too different.")
241
242
@unittest.skipIf(
243
torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.14.0"),
244
reason="CPU offload is only available with CUDA and `accelerate v0.14.0` or higher",
245
)
246
def test_cpu_offload_forward_pass(self):
247
components = self.get_dummy_components()
248
pipe = self.pipeline_class(**components)
249
pipe.to(torch_device)
250
pipe.set_progress_bar_config(disable=None)
251
252
inputs = self.get_dummy_inputs(torch_device)
253
output_without_offload = pipe(**inputs)[0]
254
255
pipe.enable_sequential_cpu_offload()
256
inputs = self.get_dummy_inputs(torch_device)
257
output_with_offload = pipe(**inputs)[0]
258
259
max_diff = np.abs(output_with_offload - output_without_offload).max()
260
self.assertLess(max_diff, 1e-4, "CPU offloading should not affect the inference results")
261
262
def test_dict_tuple_outputs_equivalent(self):
263
components = self.get_dummy_components()
264
pipe = self.pipeline_class(**components)
265
pipe.to(torch_device)
266
pipe.set_progress_bar_config(disable=None)
267
268
output = pipe(**self.get_dummy_inputs(torch_device))[0]
269
output_tuple = pipe(**self.get_dummy_inputs(torch_device), return_dict=False)[0]
270
271
max_diff = np.abs(output - output_tuple).max()
272
self.assertLess(max_diff, 1e-4)
273
274
def test_progress_bar(self):
275
super().test_progress_bar()
276
277
def test_stable_diffusion_depth2img_default_case(self):
278
device = "cpu" # ensure determinism for the device-dependent torch.Generator
279
components = self.get_dummy_components()
280
pipe = StableDiffusionDepth2ImgPipeline(**components)
281
pipe = pipe.to(device)
282
pipe.set_progress_bar_config(disable=None)
283
284
inputs = self.get_dummy_inputs(device)
285
image = pipe(**inputs).images
286
image_slice = image[0, -3:, -3:, -1]
287
288
assert image.shape == (1, 32, 32, 3)
289
if torch_device == "mps":
290
expected_slice = np.array([0.6071, 0.5035, 0.4378, 0.5776, 0.5753, 0.4316, 0.4513, 0.5263, 0.4546])
291
else:
292
expected_slice = np.array([0.6312, 0.4984, 0.4154, 0.4788, 0.5535, 0.4599, 0.4017, 0.5359, 0.4716])
293
294
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
295
296
def test_stable_diffusion_depth2img_negative_prompt(self):
297
device = "cpu" # ensure determinism for the device-dependent torch.Generator
298
components = self.get_dummy_components()
299
pipe = StableDiffusionDepth2ImgPipeline(**components)
300
pipe = pipe.to(device)
301
pipe.set_progress_bar_config(disable=None)
302
303
inputs = self.get_dummy_inputs(device)
304
negative_prompt = "french fries"
305
output = pipe(**inputs, negative_prompt=negative_prompt)
306
image = output.images
307
image_slice = image[0, -3:, -3:, -1]
308
309
assert image.shape == (1, 32, 32, 3)
310
if torch_device == "mps":
311
expected_slice = np.array([0.5825, 0.5135, 0.4095, 0.5452, 0.6059, 0.4211, 0.3994, 0.5177, 0.4335])
312
else:
313
expected_slice = np.array([0.6296, 0.5125, 0.3890, 0.4456, 0.5955, 0.4621, 0.3810, 0.5310, 0.4626])
314
315
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
316
317
def test_stable_diffusion_depth2img_multiple_init_images(self):
318
device = "cpu" # ensure determinism for the device-dependent torch.Generator
319
components = self.get_dummy_components()
320
pipe = StableDiffusionDepth2ImgPipeline(**components)
321
pipe = pipe.to(device)
322
pipe.set_progress_bar_config(disable=None)
323
324
inputs = self.get_dummy_inputs(device)
325
inputs["prompt"] = [inputs["prompt"]] * 2
326
inputs["image"] = 2 * [inputs["image"]]
327
image = pipe(**inputs).images
328
image_slice = image[-1, -3:, -3:, -1]
329
330
assert image.shape == (2, 32, 32, 3)
331
332
if torch_device == "mps":
333
expected_slice = np.array([0.6501, 0.5150, 0.4939, 0.6688, 0.5437, 0.5758, 0.5115, 0.4406, 0.4551])
334
else:
335
expected_slice = np.array([0.6267, 0.5232, 0.6001, 0.6738, 0.5029, 0.6429, 0.5364, 0.4159, 0.4674])
336
337
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
338
339
def test_stable_diffusion_depth2img_pil(self):
340
device = "cpu" # ensure determinism for the device-dependent torch.Generator
341
components = self.get_dummy_components()
342
pipe = StableDiffusionDepth2ImgPipeline(**components)
343
pipe = pipe.to(device)
344
pipe.set_progress_bar_config(disable=None)
345
346
inputs = self.get_dummy_inputs(device)
347
348
image = pipe(**inputs).images
349
image_slice = image[0, -3:, -3:, -1]
350
351
if torch_device == "mps":
352
expected_slice = np.array([0.53232, 0.47015, 0.40868, 0.45651, 0.4891, 0.4668, 0.4287, 0.48822, 0.47439])
353
else:
354
expected_slice = np.array([0.6312, 0.4984, 0.4154, 0.4788, 0.5535, 0.4599, 0.4017, 0.5359, 0.4716])
355
356
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
357
358
@skip_mps
359
def test_attention_slicing_forward_pass(self):
360
return super().test_attention_slicing_forward_pass()
361
362
363
@slow
364
@require_torch_gpu
365
class StableDiffusionDepth2ImgPipelineSlowTests(unittest.TestCase):
366
def tearDown(self):
367
super().tearDown()
368
gc.collect()
369
torch.cuda.empty_cache()
370
371
def get_inputs(self, device="cpu", dtype=torch.float32, seed=0):
372
generator = torch.Generator(device=device).manual_seed(seed)
373
init_image = load_image(
374
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/depth2img/two_cats.png"
375
)
376
inputs = {
377
"prompt": "two tigers",
378
"image": init_image,
379
"generator": generator,
380
"num_inference_steps": 3,
381
"strength": 0.75,
382
"guidance_scale": 7.5,
383
"output_type": "numpy",
384
}
385
return inputs
386
387
def test_stable_diffusion_depth2img_pipeline_default(self):
388
pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(
389
"stabilityai/stable-diffusion-2-depth", safety_checker=None
390
)
391
pipe.to(torch_device)
392
pipe.set_progress_bar_config(disable=None)
393
pipe.enable_attention_slicing()
394
395
inputs = self.get_inputs()
396
image = pipe(**inputs).images
397
image_slice = image[0, 253:256, 253:256, -1].flatten()
398
399
assert image.shape == (1, 480, 640, 3)
400
expected_slice = np.array([0.9057, 0.9365, 0.9258, 0.8937, 0.8555, 0.8541, 0.8260, 0.7747, 0.7421])
401
402
assert np.abs(expected_slice - image_slice).max() < 1e-4
403
404
def test_stable_diffusion_depth2img_pipeline_k_lms(self):
405
pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(
406
"stabilityai/stable-diffusion-2-depth", safety_checker=None
407
)
408
pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)
409
pipe.to(torch_device)
410
pipe.set_progress_bar_config(disable=None)
411
pipe.enable_attention_slicing()
412
413
inputs = self.get_inputs()
414
image = pipe(**inputs).images
415
image_slice = image[0, 253:256, 253:256, -1].flatten()
416
417
assert image.shape == (1, 480, 640, 3)
418
expected_slice = np.array([0.6363, 0.6274, 0.6309, 0.6370, 0.6226, 0.6286, 0.6213, 0.6453, 0.6306])
419
420
assert np.abs(expected_slice - image_slice).max() < 1e-4
421
422
def test_stable_diffusion_depth2img_pipeline_ddim(self):
423
pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(
424
"stabilityai/stable-diffusion-2-depth", safety_checker=None
425
)
426
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
427
pipe.to(torch_device)
428
pipe.set_progress_bar_config(disable=None)
429
pipe.enable_attention_slicing()
430
431
inputs = self.get_inputs()
432
image = pipe(**inputs).images
433
image_slice = image[0, 253:256, 253:256, -1].flatten()
434
435
assert image.shape == (1, 480, 640, 3)
436
expected_slice = np.array([0.6424, 0.6524, 0.6249, 0.6041, 0.6634, 0.6420, 0.6522, 0.6555, 0.6436])
437
438
assert np.abs(expected_slice - image_slice).max() < 1e-4
439
440
def test_stable_diffusion_depth2img_intermediate_state(self):
441
number_of_steps = 0
442
443
def callback_fn(step: int, timestep: int, latents: torch.FloatTensor) -> None:
444
callback_fn.has_been_called = True
445
nonlocal number_of_steps
446
number_of_steps += 1
447
if step == 1:
448
latents = latents.detach().cpu().numpy()
449
assert latents.shape == (1, 4, 60, 80)
450
latents_slice = latents[0, -3:, -3:, -1]
451
expected_slice = np.array(
452
[-0.7168, -1.5137, -0.1418, -2.9219, -2.7266, -2.4414, -2.1035, -3.0078, -1.7051]
453
)
454
455
assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2
456
elif step == 2:
457
latents = latents.detach().cpu().numpy()
458
assert latents.shape == (1, 4, 60, 80)
459
latents_slice = latents[0, -3:, -3:, -1]
460
expected_slice = np.array(
461
[-0.7109, -1.5068, -0.1403, -2.9160, -2.7207, -2.4414, -2.1035, -3.0059, -1.7090]
462
)
463
464
assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2
465
466
callback_fn.has_been_called = False
467
468
pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(
469
"stabilityai/stable-diffusion-2-depth", safety_checker=None, torch_dtype=torch.float16
470
)
471
pipe = pipe.to(torch_device)
472
pipe.set_progress_bar_config(disable=None)
473
pipe.enable_attention_slicing()
474
475
inputs = self.get_inputs(dtype=torch.float16)
476
pipe(**inputs, callback=callback_fn, callback_steps=1)
477
assert callback_fn.has_been_called
478
assert number_of_steps == 2
479
480
def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self):
481
torch.cuda.empty_cache()
482
torch.cuda.reset_max_memory_allocated()
483
torch.cuda.reset_peak_memory_stats()
484
485
pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(
486
"stabilityai/stable-diffusion-2-depth", safety_checker=None, torch_dtype=torch.float16
487
)
488
pipe = pipe.to(torch_device)
489
pipe.set_progress_bar_config(disable=None)
490
pipe.enable_attention_slicing(1)
491
pipe.enable_sequential_cpu_offload()
492
493
inputs = self.get_inputs(dtype=torch.float16)
494
_ = pipe(**inputs)
495
496
mem_bytes = torch.cuda.max_memory_allocated()
497
# make sure that less than 2.9 GB is allocated
498
assert mem_bytes < 2.9 * 10**9
499
500
501
@nightly
502
@require_torch_gpu
503
class StableDiffusionImg2ImgPipelineNightlyTests(unittest.TestCase):
504
def tearDown(self):
505
super().tearDown()
506
gc.collect()
507
torch.cuda.empty_cache()
508
509
def get_inputs(self, device="cpu", dtype=torch.float32, seed=0):
510
generator = torch.Generator(device=device).manual_seed(seed)
511
init_image = load_image(
512
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/depth2img/two_cats.png"
513
)
514
inputs = {
515
"prompt": "two tigers",
516
"image": init_image,
517
"generator": generator,
518
"num_inference_steps": 3,
519
"strength": 0.75,
520
"guidance_scale": 7.5,
521
"output_type": "numpy",
522
}
523
return inputs
524
525
def test_depth2img_pndm(self):
526
pipe = StableDiffusionDepth2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-2-depth")
527
pipe.to(torch_device)
528
pipe.set_progress_bar_config(disable=None)
529
530
inputs = self.get_inputs()
531
image = pipe(**inputs).images[0]
532
533
expected_image = load_numpy(
534
"https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
535
"/stable_diffusion_depth2img/stable_diffusion_2_0_pndm.npy"
536
)
537
max_diff = np.abs(expected_image - image).max()
538
assert max_diff < 1e-3
539
540
def test_depth2img_ddim(self):
541
pipe = StableDiffusionDepth2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-2-depth")
542
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
543
pipe.to(torch_device)
544
pipe.set_progress_bar_config(disable=None)
545
546
inputs = self.get_inputs()
547
image = pipe(**inputs).images[0]
548
549
expected_image = load_numpy(
550
"https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
551
"/stable_diffusion_depth2img/stable_diffusion_2_0_ddim.npy"
552
)
553
max_diff = np.abs(expected_image - image).max()
554
assert max_diff < 1e-3
555
556
def test_img2img_lms(self):
557
pipe = StableDiffusionDepth2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-2-depth")
558
pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)
559
pipe.to(torch_device)
560
pipe.set_progress_bar_config(disable=None)
561
562
inputs = self.get_inputs()
563
image = pipe(**inputs).images[0]
564
565
expected_image = load_numpy(
566
"https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
567
"/stable_diffusion_depth2img/stable_diffusion_2_0_lms.npy"
568
)
569
max_diff = np.abs(expected_image - image).max()
570
assert max_diff < 1e-3
571
572
def test_img2img_dpm(self):
573
pipe = StableDiffusionDepth2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-2-depth")
574
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
575
pipe.to(torch_device)
576
pipe.set_progress_bar_config(disable=None)
577
578
inputs = self.get_inputs()
579
inputs["num_inference_steps"] = 30
580
image = pipe(**inputs).images[0]
581
582
expected_image = load_numpy(
583
"https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
584
"/stable_diffusion_depth2img/stable_diffusion_2_0_dpm_multi.npy"
585
)
586
max_diff = np.abs(expected_image - image).max()
587
assert max_diff < 1e-3
588
589