CoCalc -- test_stable_diffusion

GitHub Repository: shivamshrirao/diffusers
Path: blob/main/tests/pipelines/stable_diffusion_2/test_stable_diffusion_depth.py
¹⁴⁴⁸ views
1
# coding=utf-8
2
# Copyright 2023 HuggingFace Inc.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
import gc
17
import random
18
import tempfile
19
import unittest
20

21
import numpy as np
22
import torch
23
from PIL import Image
24
from transformers import (
25
    CLIPTextConfig,
26
    CLIPTextModel,
27
    CLIPTokenizer,
28
    DPTConfig,
29
    DPTFeatureExtractor,
30
    DPTForDepthEstimation,
31
)
32

33
from diffusers import (
34
    AutoencoderKL,
35
    DDIMScheduler,
36
    DPMSolverMultistepScheduler,
37
    LMSDiscreteScheduler,
38
    PNDMScheduler,
39
    StableDiffusionDepth2ImgPipeline,
40
    UNet2DConditionModel,
41
)
42
from diffusers.utils import (
43
    floats_tensor,
44
    is_accelerate_available,
45
    is_accelerate_version,
46
    load_image,
47
    load_numpy,
48
    nightly,
49
    slow,
50
    torch_device,
51
)
52
from diffusers.utils.testing_utils import require_torch_gpu, skip_mps
53

54
from ...pipeline_params import TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS, TEXT_GUIDED_IMAGE_VARIATION_PARAMS
55
from ...test_pipelines_common import PipelineTesterMixin
56

57

58
torch.backends.cuda.matmul.allow_tf32 = False
59

60

61
@skip_mps
62
class StableDiffusionDepth2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
63
    pipeline_class = StableDiffusionDepth2ImgPipeline
64
    test_save_load_optional_components = False
65
    params = TEXT_GUIDED_IMAGE_VARIATION_PARAMS - {"height", "width"}
66
    required_optional_params = PipelineTesterMixin.required_optional_params - {"latents"}
67
    batch_params = TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS
68

69
    def get_dummy_components(self):
70
        torch.manual_seed(0)
71
        unet = UNet2DConditionModel(
72
            block_out_channels=(32, 64),
73
            layers_per_block=2,
74
            sample_size=32,
75
            in_channels=5,
76
            out_channels=4,
77
            down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
78
            up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
79
            cross_attention_dim=32,
80
            attention_head_dim=(2, 4),
81
            use_linear_projection=True,
82
        )
83
        scheduler = PNDMScheduler(skip_prk_steps=True)
84
        torch.manual_seed(0)
85
        vae = AutoencoderKL(
86
            block_out_channels=[32, 64],
87
            in_channels=3,
88
            out_channels=3,
89
            down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
90
            up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
91
            latent_channels=4,
92
        )
93
        torch.manual_seed(0)
94
        text_encoder_config = CLIPTextConfig(
95
            bos_token_id=0,
96
            eos_token_id=2,
97
            hidden_size=32,
98
            intermediate_size=37,
99
            layer_norm_eps=1e-05,
100
            num_attention_heads=4,
101
            num_hidden_layers=5,
102
            pad_token_id=1,
103
            vocab_size=1000,
104
        )
105
        text_encoder = CLIPTextModel(text_encoder_config)
106
        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
107

108
        backbone_config = {
109
            "global_padding": "same",
110
            "layer_type": "bottleneck",
111
            "depths": [3, 4, 9],
112
            "out_features": ["stage1", "stage2", "stage3"],
113
            "embedding_dynamic_padding": True,
114
            "hidden_sizes": [96, 192, 384, 768],
115
            "num_groups": 2,
116
        }
117
        depth_estimator_config = DPTConfig(
118
            image_size=32,
119
            patch_size=16,
120
            num_channels=3,
121
            hidden_size=32,
122
            num_hidden_layers=4,
123
            backbone_out_indices=(0, 1, 2, 3),
124
            num_attention_heads=4,
125
            intermediate_size=37,
126
            hidden_act="gelu",
127
            hidden_dropout_prob=0.1,
128
            attention_probs_dropout_prob=0.1,
129
            is_decoder=False,
130
            initializer_range=0.02,
131
            is_hybrid=True,
132
            backbone_config=backbone_config,
133
            backbone_featmap_shape=[1, 384, 24, 24],
134
        )
135
        depth_estimator = DPTForDepthEstimation(depth_estimator_config)
136
        feature_extractor = DPTFeatureExtractor.from_pretrained(
137
            "hf-internal-testing/tiny-random-DPTForDepthEstimation"
138
        )
139

140
        components = {
141
            "unet": unet,
142
            "scheduler": scheduler,
143
            "vae": vae,
144
            "text_encoder": text_encoder,
145
            "tokenizer": tokenizer,
146
            "depth_estimator": depth_estimator,
147
            "feature_extractor": feature_extractor,
148
        }
149
        return components
150

151
    def get_dummy_inputs(self, device, seed=0):
152
        image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed))
153
        image = image.cpu().permute(0, 2, 3, 1)[0]
154
        image = Image.fromarray(np.uint8(image)).convert("RGB").resize((32, 32))
155
        if str(device).startswith("mps"):
156
            generator = torch.manual_seed(seed)
157
        else:
158
            generator = torch.Generator(device=device).manual_seed(seed)
159
        inputs = {
160
            "prompt": "A painting of a squirrel eating a burger",
161
            "image": image,
162
            "generator": generator,
163
            "num_inference_steps": 2,
164
            "guidance_scale": 6.0,
165
            "output_type": "numpy",
166
        }
167
        return inputs
168

169
    def test_save_load_local(self):
170
        components = self.get_dummy_components()
171
        pipe = self.pipeline_class(**components)
172
        pipe.to(torch_device)
173
        pipe.set_progress_bar_config(disable=None)
174

175
        inputs = self.get_dummy_inputs(torch_device)
176
        output = pipe(**inputs)[0]
177

178
        with tempfile.TemporaryDirectory() as tmpdir:
179
            pipe.save_pretrained(tmpdir)
180
            pipe_loaded = self.pipeline_class.from_pretrained(tmpdir)
181
            pipe_loaded.to(torch_device)
182
            pipe_loaded.set_progress_bar_config(disable=None)
183

184
        inputs = self.get_dummy_inputs(torch_device)
185
        output_loaded = pipe_loaded(**inputs)[0]
186

187
        max_diff = np.abs(output - output_loaded).max()
188
        self.assertLess(max_diff, 1e-4)
189

190
    @unittest.skipIf(torch_device != "cuda", reason="float16 requires CUDA")
191
    def test_save_load_float16(self):
192
        components = self.get_dummy_components()
193
        for name, module in components.items():
194
            if hasattr(module, "half"):
195
                components[name] = module.to(torch_device).half()
196
        pipe = self.pipeline_class(**components)
197
        pipe.to(torch_device)
198
        pipe.set_progress_bar_config(disable=None)
199

200
        inputs = self.get_dummy_inputs(torch_device)
201
        output = pipe(**inputs)[0]
202

203
        with tempfile.TemporaryDirectory() as tmpdir:
204
            pipe.save_pretrained(tmpdir)
205
            pipe_loaded = self.pipeline_class.from_pretrained(tmpdir, torch_dtype=torch.float16)
206
            pipe_loaded.to(torch_device)
207
            pipe_loaded.set_progress_bar_config(disable=None)
208

209
        for name, component in pipe_loaded.components.items():
210
            if hasattr(component, "dtype"):
211
                self.assertTrue(
212
                    component.dtype == torch.float16,
213
                    f"`{name}.dtype` switched from `float16` to {component.dtype} after loading.",
214
                )
215

216
        inputs = self.get_dummy_inputs(torch_device)
217
        output_loaded = pipe_loaded(**inputs)[0]
218

219
        max_diff = np.abs(output - output_loaded).max()
220
        self.assertLess(max_diff, 2e-2, "The output of the fp16 pipeline changed after saving and loading.")
221

222
    @unittest.skipIf(torch_device != "cuda", reason="float16 requires CUDA")
223
    def test_float16_inference(self):
224
        components = self.get_dummy_components()
225
        pipe = self.pipeline_class(**components)
226
        pipe.to(torch_device)
227
        pipe.set_progress_bar_config(disable=None)
228

229
        for name, module in components.items():
230
            if hasattr(module, "half"):
231
                components[name] = module.half()
232
        pipe_fp16 = self.pipeline_class(**components)
233
        pipe_fp16.to(torch_device)
234
        pipe_fp16.set_progress_bar_config(disable=None)
235

236
        output = pipe(**self.get_dummy_inputs(torch_device))[0]
237
        output_fp16 = pipe_fp16(**self.get_dummy_inputs(torch_device))[0]
238

239
        max_diff = np.abs(output - output_fp16).max()
240
        self.assertLess(max_diff, 1.3e-2, "The outputs of the fp16 and fp32 pipelines are too different.")
241

242
    @unittest.skipIf(
243
        torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.14.0"),
244
        reason="CPU offload is only available with CUDA and `accelerate v0.14.0` or higher",
245
    )
246
    def test_cpu_offload_forward_pass(self):
247
        components = self.get_dummy_components()
248
        pipe = self.pipeline_class(**components)
249
        pipe.to(torch_device)
250
        pipe.set_progress_bar_config(disable=None)
251

252
        inputs = self.get_dummy_inputs(torch_device)
253
        output_without_offload = pipe(**inputs)[0]
254

255
        pipe.enable_sequential_cpu_offload()
256
        inputs = self.get_dummy_inputs(torch_device)
257
        output_with_offload = pipe(**inputs)[0]
258

259
        max_diff = np.abs(output_with_offload - output_without_offload).max()
260
        self.assertLess(max_diff, 1e-4, "CPU offloading should not affect the inference results")
261

262
    def test_dict_tuple_outputs_equivalent(self):
263
        components = self.get_dummy_components()
264
        pipe = self.pipeline_class(**components)
265
        pipe.to(torch_device)
266
        pipe.set_progress_bar_config(disable=None)
267

268
        output = pipe(**self.get_dummy_inputs(torch_device))[0]
269
        output_tuple = pipe(**self.get_dummy_inputs(torch_device), return_dict=False)[0]
270

271
        max_diff = np.abs(output - output_tuple).max()
272
        self.assertLess(max_diff, 1e-4)
273

274
    def test_progress_bar(self):
275
        super().test_progress_bar()
276

277
    def test_stable_diffusion_depth2img_default_case(self):
278
        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
279
        components = self.get_dummy_components()
280
        pipe = StableDiffusionDepth2ImgPipeline(**components)
281
        pipe = pipe.to(device)
282
        pipe.set_progress_bar_config(disable=None)
283

284
        inputs = self.get_dummy_inputs(device)
285
        image = pipe(**inputs).images
286
        image_slice = image[0, -3:, -3:, -1]
287

288
        assert image.shape == (1, 32, 32, 3)
289
        if torch_device == "mps":
290
            expected_slice = np.array([0.6071, 0.5035, 0.4378, 0.5776, 0.5753, 0.4316, 0.4513, 0.5263, 0.4546])
291
        else:
292
            expected_slice = np.array([0.6312, 0.4984, 0.4154, 0.4788, 0.5535, 0.4599, 0.4017, 0.5359, 0.4716])
293

294
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
295

296
    def test_stable_diffusion_depth2img_negative_prompt(self):
297
        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
298
        components = self.get_dummy_components()
299
        pipe = StableDiffusionDepth2ImgPipeline(**components)
300
        pipe = pipe.to(device)
301
        pipe.set_progress_bar_config(disable=None)
302

303
        inputs = self.get_dummy_inputs(device)
304
        negative_prompt = "french fries"
305
        output = pipe(**inputs, negative_prompt=negative_prompt)
306
        image = output.images
307
        image_slice = image[0, -3:, -3:, -1]
308

309
        assert image.shape == (1, 32, 32, 3)
310
        if torch_device == "mps":
311
            expected_slice = np.array([0.5825, 0.5135, 0.4095, 0.5452, 0.6059, 0.4211, 0.3994, 0.5177, 0.4335])
312
        else:
313
            expected_slice = np.array([0.6296, 0.5125, 0.3890, 0.4456, 0.5955, 0.4621, 0.3810, 0.5310, 0.4626])
314

315
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
316

317
    def test_stable_diffusion_depth2img_multiple_init_images(self):
318
        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
319
        components = self.get_dummy_components()
320
        pipe = StableDiffusionDepth2ImgPipeline(**components)
321
        pipe = pipe.to(device)
322
        pipe.set_progress_bar_config(disable=None)
323

324
        inputs = self.get_dummy_inputs(device)
325
        inputs["prompt"] = [inputs["prompt"]] * 2
326
        inputs["image"] = 2 * [inputs["image"]]
327
        image = pipe(**inputs).images
328
        image_slice = image[-1, -3:, -3:, -1]
329

330
        assert image.shape == (2, 32, 32, 3)
331

332
        if torch_device == "mps":
333
            expected_slice = np.array([0.6501, 0.5150, 0.4939, 0.6688, 0.5437, 0.5758, 0.5115, 0.4406, 0.4551])
334
        else:
335
            expected_slice = np.array([0.6267, 0.5232, 0.6001, 0.6738, 0.5029, 0.6429, 0.5364, 0.4159, 0.4674])
336

337
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
338

339
    def test_stable_diffusion_depth2img_pil(self):
340
        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
341
        components = self.get_dummy_components()
342
        pipe = StableDiffusionDepth2ImgPipeline(**components)
343
        pipe = pipe.to(device)
344
        pipe.set_progress_bar_config(disable=None)
345

346
        inputs = self.get_dummy_inputs(device)
347

348
        image = pipe(**inputs).images
349
        image_slice = image[0, -3:, -3:, -1]
350

351
        if torch_device == "mps":
352
            expected_slice = np.array([0.53232, 0.47015, 0.40868, 0.45651, 0.4891, 0.4668, 0.4287, 0.48822, 0.47439])
353
        else:
354
            expected_slice = np.array([0.6312, 0.4984, 0.4154, 0.4788, 0.5535, 0.4599, 0.4017, 0.5359, 0.4716])
355

356
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
357

358
    @skip_mps
359
    def test_attention_slicing_forward_pass(self):
360
        return super().test_attention_slicing_forward_pass()
361

362

363
@slow
364
@require_torch_gpu
365
class StableDiffusionDepth2ImgPipelineSlowTests(unittest.TestCase):
366
    def tearDown(self):
367
        super().tearDown()
368
        gc.collect()
369
        torch.cuda.empty_cache()
370

371
    def get_inputs(self, device="cpu", dtype=torch.float32, seed=0):
372
        generator = torch.Generator(device=device).manual_seed(seed)
373
        init_image = load_image(
374
            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/depth2img/two_cats.png"
375
        )
376
        inputs = {
377
            "prompt": "two tigers",
378
            "image": init_image,
379
            "generator": generator,
380
            "num_inference_steps": 3,
381
            "strength": 0.75,
382
            "guidance_scale": 7.5,
383
            "output_type": "numpy",
384
        }
385
        return inputs
386

387
    def test_stable_diffusion_depth2img_pipeline_default(self):
388
        pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(
389
            "stabilityai/stable-diffusion-2-depth", safety_checker=None
390
        )
391
        pipe.to(torch_device)
392
        pipe.set_progress_bar_config(disable=None)
393
        pipe.enable_attention_slicing()
394

395
        inputs = self.get_inputs()
396
        image = pipe(**inputs).images
397
        image_slice = image[0, 253:256, 253:256, -1].flatten()
398

399
        assert image.shape == (1, 480, 640, 3)
400
        expected_slice = np.array([0.9057, 0.9365, 0.9258, 0.8937, 0.8555, 0.8541, 0.8260, 0.7747, 0.7421])
401

402
        assert np.abs(expected_slice - image_slice).max() < 1e-4
403

404
    def test_stable_diffusion_depth2img_pipeline_k_lms(self):
405
        pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(
406
            "stabilityai/stable-diffusion-2-depth", safety_checker=None
407
        )
408
        pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)
409
        pipe.to(torch_device)
410
        pipe.set_progress_bar_config(disable=None)
411
        pipe.enable_attention_slicing()
412

413
        inputs = self.get_inputs()
414
        image = pipe(**inputs).images
415
        image_slice = image[0, 253:256, 253:256, -1].flatten()
416

417
        assert image.shape == (1, 480, 640, 3)
418
        expected_slice = np.array([0.6363, 0.6274, 0.6309, 0.6370, 0.6226, 0.6286, 0.6213, 0.6453, 0.6306])
419

420
        assert np.abs(expected_slice - image_slice).max() < 1e-4
421

422
    def test_stable_diffusion_depth2img_pipeline_ddim(self):
423
        pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(
424
            "stabilityai/stable-diffusion-2-depth", safety_checker=None
425
        )
426
        pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
427
        pipe.to(torch_device)
428
        pipe.set_progress_bar_config(disable=None)
429
        pipe.enable_attention_slicing()
430

431
        inputs = self.get_inputs()
432
        image = pipe(**inputs).images
433
        image_slice = image[0, 253:256, 253:256, -1].flatten()
434

435
        assert image.shape == (1, 480, 640, 3)
436
        expected_slice = np.array([0.6424, 0.6524, 0.6249, 0.6041, 0.6634, 0.6420, 0.6522, 0.6555, 0.6436])
437

438
        assert np.abs(expected_slice - image_slice).max() < 1e-4
439

440
    def test_stable_diffusion_depth2img_intermediate_state(self):
441
        number_of_steps = 0
442

443
        def callback_fn(step: int, timestep: int, latents: torch.FloatTensor) -> None:
444
            callback_fn.has_been_called = True
445
            nonlocal number_of_steps
446
            number_of_steps += 1
447
            if step == 1:
448
                latents = latents.detach().cpu().numpy()
449
                assert latents.shape == (1, 4, 60, 80)
450
                latents_slice = latents[0, -3:, -3:, -1]
451
                expected_slice = np.array(
452
                    [-0.7168, -1.5137, -0.1418, -2.9219, -2.7266, -2.4414, -2.1035, -3.0078, -1.7051]
453
                )
454

455
                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2
456
            elif step == 2:
457
                latents = latents.detach().cpu().numpy()
458
                assert latents.shape == (1, 4, 60, 80)
459
                latents_slice = latents[0, -3:, -3:, -1]
460
                expected_slice = np.array(
461
                    [-0.7109, -1.5068, -0.1403, -2.9160, -2.7207, -2.4414, -2.1035, -3.0059, -1.7090]
462
                )
463

464
                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2
465

466
        callback_fn.has_been_called = False
467

468
        pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(
469
            "stabilityai/stable-diffusion-2-depth", safety_checker=None, torch_dtype=torch.float16
470
        )
471
        pipe = pipe.to(torch_device)
472
        pipe.set_progress_bar_config(disable=None)
473
        pipe.enable_attention_slicing()
474

475
        inputs = self.get_inputs(dtype=torch.float16)
476
        pipe(**inputs, callback=callback_fn, callback_steps=1)
477
        assert callback_fn.has_been_called
478
        assert number_of_steps == 2
479

480
    def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self):
481
        torch.cuda.empty_cache()
482
        torch.cuda.reset_max_memory_allocated()
483
        torch.cuda.reset_peak_memory_stats()
484

485
        pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(
486
            "stabilityai/stable-diffusion-2-depth", safety_checker=None, torch_dtype=torch.float16
487
        )
488
        pipe = pipe.to(torch_device)
489
        pipe.set_progress_bar_config(disable=None)
490
        pipe.enable_attention_slicing(1)
491
        pipe.enable_sequential_cpu_offload()
492

493
        inputs = self.get_inputs(dtype=torch.float16)
494
        _ = pipe(**inputs)
495

496
        mem_bytes = torch.cuda.max_memory_allocated()
497
        # make sure that less than 2.9 GB is allocated
498
        assert mem_bytes < 2.9 * 10**9
499

500

501
@nightly
502
@require_torch_gpu
503
class StableDiffusionImg2ImgPipelineNightlyTests(unittest.TestCase):
504
    def tearDown(self):
505
        super().tearDown()
506
        gc.collect()
507
        torch.cuda.empty_cache()
508

509
    def get_inputs(self, device="cpu", dtype=torch.float32, seed=0):
510
        generator = torch.Generator(device=device).manual_seed(seed)
511
        init_image = load_image(
512
            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/depth2img/two_cats.png"
513
        )
514
        inputs = {
515
            "prompt": "two tigers",
516
            "image": init_image,
517
            "generator": generator,
518
            "num_inference_steps": 3,
519
            "strength": 0.75,
520
            "guidance_scale": 7.5,
521
            "output_type": "numpy",
522
        }
523
        return inputs
524

525
    def test_depth2img_pndm(self):
526
        pipe = StableDiffusionDepth2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-2-depth")
527
        pipe.to(torch_device)
528
        pipe.set_progress_bar_config(disable=None)
529

530
        inputs = self.get_inputs()
531
        image = pipe(**inputs).images[0]
532

533
        expected_image = load_numpy(
534
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
535
            "/stable_diffusion_depth2img/stable_diffusion_2_0_pndm.npy"
536
        )
537
        max_diff = np.abs(expected_image - image).max()
538
        assert max_diff < 1e-3
539

540
    def test_depth2img_ddim(self):
541
        pipe = StableDiffusionDepth2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-2-depth")
542
        pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
543
        pipe.to(torch_device)
544
        pipe.set_progress_bar_config(disable=None)
545

546
        inputs = self.get_inputs()
547
        image = pipe(**inputs).images[0]
548

549
        expected_image = load_numpy(
550
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
551
            "/stable_diffusion_depth2img/stable_diffusion_2_0_ddim.npy"
552
        )
553
        max_diff = np.abs(expected_image - image).max()
554
        assert max_diff < 1e-3
555

556
    def test_img2img_lms(self):
557
        pipe = StableDiffusionDepth2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-2-depth")
558
        pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)
559
        pipe.to(torch_device)
560
        pipe.set_progress_bar_config(disable=None)
561

562
        inputs = self.get_inputs()
563
        image = pipe(**inputs).images[0]
564

565
        expected_image = load_numpy(
566
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
567
            "/stable_diffusion_depth2img/stable_diffusion_2_0_lms.npy"
568
        )
569
        max_diff = np.abs(expected_image - image).max()
570
        assert max_diff < 1e-3
571

572
    def test_img2img_dpm(self):
573
        pipe = StableDiffusionDepth2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-2-depth")
574
        pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
575
        pipe.to(torch_device)
576
        pipe.set_progress_bar_config(disable=None)
577

578
        inputs = self.get_inputs()
579
        inputs["num_inference_steps"] = 30
580
        image = pipe(**inputs).images[0]
581

582
        expected_image = load_numpy(
583
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
584
            "/stable_diffusion_depth2img/stable_diffusion_2_0_dpm_multi.npy"
585
        )
586
        max_diff = np.abs(expected_image - image).max()
587
        assert max_diff < 1e-3
588

589
Product

Resources

Company