CoCalc -- test_stable

GitHub Repository: shivamshrirao/diffusers
Path: blob/main/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
¹⁴⁵¹ views
1
# coding=utf-8
2
# Copyright 2023 HuggingFace Inc.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
import gc
17
import unittest
18

19
import numpy as np
20
import torch
21
from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer
22

23
from diffusers import (
24
    AutoencoderKL,
25
    DDIMScheduler,
26
    DPMSolverMultistepScheduler,
27
    EulerAncestralDiscreteScheduler,
28
    EulerDiscreteScheduler,
29
    LMSDiscreteScheduler,
30
    PNDMScheduler,
31
    StableDiffusionPipeline,
32
    UNet2DConditionModel,
33
    logging,
34
)
35
from diffusers.models.attention_processor import AttnProcessor
36
from diffusers.utils import load_numpy, nightly, slow, torch_device
37
from diffusers.utils.testing_utils import CaptureLogger, require_torch_gpu
38

39
from ...pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_PARAMS
40
from ...test_pipelines_common import PipelineTesterMixin
41

42

43
torch.backends.cuda.matmul.allow_tf32 = False
44

45

46
class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
47
    pipeline_class = StableDiffusionPipeline
48
    params = TEXT_TO_IMAGE_PARAMS
49
    batch_params = TEXT_TO_IMAGE_BATCH_PARAMS
50

51
    def get_dummy_components(self):
52
        torch.manual_seed(0)
53
        unet = UNet2DConditionModel(
54
            block_out_channels=(32, 64),
55
            layers_per_block=2,
56
            sample_size=32,
57
            in_channels=4,
58
            out_channels=4,
59
            down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
60
            up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
61
            cross_attention_dim=32,
62
            # SD2-specific config below
63
            attention_head_dim=(2, 4),
64
            use_linear_projection=True,
65
        )
66
        scheduler = DDIMScheduler(
67
            beta_start=0.00085,
68
            beta_end=0.012,
69
            beta_schedule="scaled_linear",
70
            clip_sample=False,
71
            set_alpha_to_one=False,
72
        )
73
        torch.manual_seed(0)
74
        vae = AutoencoderKL(
75
            block_out_channels=[32, 64],
76
            in_channels=3,
77
            out_channels=3,
78
            down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
79
            up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
80
            latent_channels=4,
81
            sample_size=128,
82
        )
83
        torch.manual_seed(0)
84
        text_encoder_config = CLIPTextConfig(
85
            bos_token_id=0,
86
            eos_token_id=2,
87
            hidden_size=32,
88
            intermediate_size=37,
89
            layer_norm_eps=1e-05,
90
            num_attention_heads=4,
91
            num_hidden_layers=5,
92
            pad_token_id=1,
93
            vocab_size=1000,
94
            # SD2-specific config below
95
            hidden_act="gelu",
96
            projection_dim=512,
97
        )
98
        text_encoder = CLIPTextModel(text_encoder_config)
99
        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
100

101
        components = {
102
            "unet": unet,
103
            "scheduler": scheduler,
104
            "vae": vae,
105
            "text_encoder": text_encoder,
106
            "tokenizer": tokenizer,
107
            "safety_checker": None,
108
            "feature_extractor": None,
109
        }
110
        return components
111

112
    def get_dummy_inputs(self, device, seed=0):
113
        if str(device).startswith("mps"):
114
            generator = torch.manual_seed(seed)
115
        else:
116
            generator = torch.Generator(device=device).manual_seed(seed)
117
        inputs = {
118
            "prompt": "A painting of a squirrel eating a burger",
119
            "generator": generator,
120
            "num_inference_steps": 2,
121
            "guidance_scale": 6.0,
122
            "output_type": "numpy",
123
        }
124
        return inputs
125

126
    def test_stable_diffusion_ddim(self):
127
        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
128
        components = self.get_dummy_components()
129
        sd_pipe = StableDiffusionPipeline(**components)
130
        sd_pipe = sd_pipe.to(device)
131
        sd_pipe.set_progress_bar_config(disable=None)
132

133
        inputs = self.get_dummy_inputs(device)
134
        image = sd_pipe(**inputs).images
135
        image_slice = image[0, -3:, -3:, -1]
136

137
        assert image.shape == (1, 64, 64, 3)
138
        expected_slice = np.array([0.5649, 0.6022, 0.4804, 0.5270, 0.5585, 0.4643, 0.5159, 0.4963, 0.4793])
139

140
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
141

142
    def test_stable_diffusion_pndm(self):
143
        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
144
        components = self.get_dummy_components()
145
        components["scheduler"] = PNDMScheduler(skip_prk_steps=True)
146
        sd_pipe = StableDiffusionPipeline(**components)
147
        sd_pipe = sd_pipe.to(device)
148
        sd_pipe.set_progress_bar_config(disable=None)
149

150
        inputs = self.get_dummy_inputs(device)
151
        image = sd_pipe(**inputs).images
152
        image_slice = image[0, -3:, -3:, -1]
153

154
        assert image.shape == (1, 64, 64, 3)
155
        expected_slice = np.array([0.5099, 0.5677, 0.4671, 0.5128, 0.5697, 0.4676, 0.5277, 0.4964, 0.4946])
156

157
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
158

159
    def test_stable_diffusion_k_lms(self):
160
        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
161
        components = self.get_dummy_components()
162
        components["scheduler"] = LMSDiscreteScheduler.from_config(components["scheduler"].config)
163
        sd_pipe = StableDiffusionPipeline(**components)
164
        sd_pipe = sd_pipe.to(device)
165
        sd_pipe.set_progress_bar_config(disable=None)
166

167
        inputs = self.get_dummy_inputs(device)
168
        image = sd_pipe(**inputs).images
169
        image_slice = image[0, -3:, -3:, -1]
170

171
        assert image.shape == (1, 64, 64, 3)
172
        expected_slice = np.array([0.4717, 0.5376, 0.4568, 0.5225, 0.5734, 0.4797, 0.5467, 0.5074, 0.5043])
173

174
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
175

176
    def test_stable_diffusion_k_euler_ancestral(self):
177
        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
178
        components = self.get_dummy_components()
179
        components["scheduler"] = EulerAncestralDiscreteScheduler.from_config(components["scheduler"].config)
180
        sd_pipe = StableDiffusionPipeline(**components)
181
        sd_pipe = sd_pipe.to(device)
182
        sd_pipe.set_progress_bar_config(disable=None)
183

184
        inputs = self.get_dummy_inputs(device)
185
        image = sd_pipe(**inputs).images
186
        image_slice = image[0, -3:, -3:, -1]
187

188
        assert image.shape == (1, 64, 64, 3)
189
        expected_slice = np.array([0.4715, 0.5376, 0.4569, 0.5224, 0.5734, 0.4797, 0.5465, 0.5074, 0.5046])
190

191
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
192

193
    def test_stable_diffusion_k_euler(self):
194
        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
195
        components = self.get_dummy_components()
196
        components["scheduler"] = EulerDiscreteScheduler.from_config(components["scheduler"].config)
197
        sd_pipe = StableDiffusionPipeline(**components)
198
        sd_pipe = sd_pipe.to(device)
199
        sd_pipe.set_progress_bar_config(disable=None)
200

201
        inputs = self.get_dummy_inputs(device)
202
        image = sd_pipe(**inputs).images
203
        image_slice = image[0, -3:, -3:, -1]
204

205
        assert image.shape == (1, 64, 64, 3)
206
        expected_slice = np.array([0.4717, 0.5376, 0.4568, 0.5225, 0.5734, 0.4797, 0.5467, 0.5074, 0.5043])
207

208
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
209

210
    def test_stable_diffusion_long_prompt(self):
211
        components = self.get_dummy_components()
212
        components["scheduler"] = LMSDiscreteScheduler.from_config(components["scheduler"].config)
213
        sd_pipe = StableDiffusionPipeline(**components)
214
        sd_pipe = sd_pipe.to(torch_device)
215
        sd_pipe.set_progress_bar_config(disable=None)
216

217
        do_classifier_free_guidance = True
218
        negative_prompt = None
219
        num_images_per_prompt = 1
220
        logger = logging.get_logger("diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion")
221

222
        prompt = 25 * "@"
223
        with CaptureLogger(logger) as cap_logger_3:
224
            text_embeddings_3 = sd_pipe._encode_prompt(
225
                prompt, torch_device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt
226
            )
227

228
        prompt = 100 * "@"
229
        with CaptureLogger(logger) as cap_logger:
230
            text_embeddings = sd_pipe._encode_prompt(
231
                prompt, torch_device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt
232
            )
233

234
        negative_prompt = "Hello"
235
        with CaptureLogger(logger) as cap_logger_2:
236
            text_embeddings_2 = sd_pipe._encode_prompt(
237
                prompt, torch_device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt
238
            )
239

240
        assert text_embeddings_3.shape == text_embeddings_2.shape == text_embeddings.shape
241
        assert text_embeddings.shape[1] == 77
242

243
        assert cap_logger.out == cap_logger_2.out
244
        # 100 - 77 + 1 (BOS token) + 1 (EOS token) = 25
245
        assert cap_logger.out.count("@") == 25
246
        assert cap_logger_3.out == ""
247

248

249
@slow
250
@require_torch_gpu
251
class StableDiffusion2PipelineSlowTests(unittest.TestCase):
252
    def tearDown(self):
253
        super().tearDown()
254
        gc.collect()
255
        torch.cuda.empty_cache()
256

257
    def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
258
        generator = torch.Generator(device=generator_device).manual_seed(seed)
259
        latents = np.random.RandomState(seed).standard_normal((1, 4, 64, 64))
260
        latents = torch.from_numpy(latents).to(device=device, dtype=dtype)
261
        inputs = {
262
            "prompt": "a photograph of an astronaut riding a horse",
263
            "latents": latents,
264
            "generator": generator,
265
            "num_inference_steps": 3,
266
            "guidance_scale": 7.5,
267
            "output_type": "numpy",
268
        }
269
        return inputs
270

271
    def test_stable_diffusion_default_ddim(self):
272
        pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-base")
273
        pipe.to(torch_device)
274
        pipe.set_progress_bar_config(disable=None)
275

276
        inputs = self.get_inputs(torch_device)
277
        image = pipe(**inputs).images
278
        image_slice = image[0, -3:, -3:, -1].flatten()
279

280
        assert image.shape == (1, 512, 512, 3)
281
        expected_slice = np.array([0.49493, 0.47896, 0.40798, 0.54214, 0.53212, 0.48202, 0.47656, 0.46329, 0.48506])
282
        assert np.abs(image_slice - expected_slice).max() < 1e-4
283

284
    def test_stable_diffusion_pndm(self):
285
        pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-base")
286
        pipe.scheduler = PNDMScheduler.from_config(pipe.scheduler.config)
287
        pipe.to(torch_device)
288
        pipe.set_progress_bar_config(disable=None)
289

290
        inputs = self.get_inputs(torch_device)
291
        image = pipe(**inputs).images
292
        image_slice = image[0, -3:, -3:, -1].flatten()
293

294
        assert image.shape == (1, 512, 512, 3)
295
        expected_slice = np.array([0.49493, 0.47896, 0.40798, 0.54214, 0.53212, 0.48202, 0.47656, 0.46329, 0.48506])
296
        assert np.abs(image_slice - expected_slice).max() < 1e-4
297

298
    def test_stable_diffusion_k_lms(self):
299
        pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-base")
300
        pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)
301
        pipe.to(torch_device)
302
        pipe.set_progress_bar_config(disable=None)
303

304
        inputs = self.get_inputs(torch_device)
305
        image = pipe(**inputs).images
306
        image_slice = image[0, -3:, -3:, -1].flatten()
307

308
        assert image.shape == (1, 512, 512, 3)
309
        expected_slice = np.array([0.10440, 0.13115, 0.11100, 0.10141, 0.11440, 0.07215, 0.11332, 0.09693, 0.10006])
310
        assert np.abs(image_slice - expected_slice).max() < 1e-4
311

312
    def test_stable_diffusion_attention_slicing(self):
313
        torch.cuda.reset_peak_memory_stats()
314
        pipe = StableDiffusionPipeline.from_pretrained(
315
            "stabilityai/stable-diffusion-2-base", torch_dtype=torch.float16
316
        )
317
        pipe = pipe.to(torch_device)
318
        pipe.set_progress_bar_config(disable=None)
319

320
        # enable attention slicing
321
        pipe.enable_attention_slicing()
322
        inputs = self.get_inputs(torch_device, dtype=torch.float16)
323
        image_sliced = pipe(**inputs).images
324

325
        mem_bytes = torch.cuda.max_memory_allocated()
326
        torch.cuda.reset_peak_memory_stats()
327
        # make sure that less than 3.3 GB is allocated
328
        assert mem_bytes < 3.3 * 10**9
329

330
        # disable slicing
331
        pipe.disable_attention_slicing()
332
        inputs = self.get_inputs(torch_device, dtype=torch.float16)
333
        image = pipe(**inputs).images
334

335
        # make sure that more than 3.3 GB is allocated
336
        mem_bytes = torch.cuda.max_memory_allocated()
337
        assert mem_bytes > 3.3 * 10**9
338
        assert np.abs(image_sliced - image).max() < 1e-3
339

340
    def test_stable_diffusion_text2img_intermediate_state(self):
341
        number_of_steps = 0
342

343
        def callback_fn(step: int, timestep: int, latents: torch.FloatTensor) -> None:
344
            callback_fn.has_been_called = True
345
            nonlocal number_of_steps
346
            number_of_steps += 1
347
            if step == 1:
348
                latents = latents.detach().cpu().numpy()
349
                assert latents.shape == (1, 4, 64, 64)
350
                latents_slice = latents[0, -3:, -3:, -1]
351
                expected_slice = np.array(
352
                    [-0.3862, -0.4507, -1.1729, 0.0686, -1.1045, 0.7124, -1.8301, 0.1903, 1.2773]
353
                )
354

355
                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2
356
            elif step == 2:
357
                latents = latents.detach().cpu().numpy()
358
                assert latents.shape == (1, 4, 64, 64)
359
                latents_slice = latents[0, -3:, -3:, -1]
360
                expected_slice = np.array(
361
                    [0.2720, -0.1863, -0.7383, -0.5029, -0.7534, 0.3970, -0.7646, 0.4468, 1.2686]
362
                )
363

364
                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2
365

366
        callback_fn.has_been_called = False
367

368
        pipe = StableDiffusionPipeline.from_pretrained(
369
            "stabilityai/stable-diffusion-2-base", torch_dtype=torch.float16
370
        )
371
        pipe = pipe.to(torch_device)
372
        pipe.set_progress_bar_config(disable=None)
373
        pipe.enable_attention_slicing()
374

375
        inputs = self.get_inputs(torch_device, dtype=torch.float16)
376
        pipe(**inputs, callback=callback_fn, callback_steps=1)
377
        assert callback_fn.has_been_called
378
        assert number_of_steps == inputs["num_inference_steps"]
379

380
    def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self):
381
        torch.cuda.empty_cache()
382
        torch.cuda.reset_max_memory_allocated()
383
        torch.cuda.reset_peak_memory_stats()
384

385
        pipe = StableDiffusionPipeline.from_pretrained(
386
            "stabilityai/stable-diffusion-2-base", torch_dtype=torch.float16
387
        )
388
        pipe = pipe.to(torch_device)
389
        pipe.set_progress_bar_config(disable=None)
390
        pipe.enable_attention_slicing(1)
391
        pipe.enable_sequential_cpu_offload()
392

393
        inputs = self.get_inputs(torch_device, dtype=torch.float16)
394
        _ = pipe(**inputs)
395

396
        mem_bytes = torch.cuda.max_memory_allocated()
397
        # make sure that less than 2.8 GB is allocated
398
        assert mem_bytes < 2.8 * 10**9
399

400
    def test_stable_diffusion_pipeline_with_model_offloading(self):
401
        torch.cuda.empty_cache()
402
        torch.cuda.reset_max_memory_allocated()
403
        torch.cuda.reset_peak_memory_stats()
404

405
        inputs = self.get_inputs(torch_device, dtype=torch.float16)
406

407
        # Normal inference
408

409
        pipe = StableDiffusionPipeline.from_pretrained(
410
            "stabilityai/stable-diffusion-2-base",
411
            torch_dtype=torch.float16,
412
        )
413
        pipe.unet.set_attn_processor(AttnProcessor())
414
        pipe.to(torch_device)
415
        pipe.set_progress_bar_config(disable=None)
416
        outputs = pipe(**inputs)
417
        mem_bytes = torch.cuda.max_memory_allocated()
418

419
        # With model offloading
420

421
        # Reload but don't move to cuda
422
        pipe = StableDiffusionPipeline.from_pretrained(
423
            "stabilityai/stable-diffusion-2-base",
424
            torch_dtype=torch.float16,
425
        )
426
        pipe.unet.set_attn_processor(AttnProcessor())
427

428
        torch.cuda.empty_cache()
429
        torch.cuda.reset_max_memory_allocated()
430
        torch.cuda.reset_peak_memory_stats()
431

432
        pipe.enable_model_cpu_offload()
433
        pipe.set_progress_bar_config(disable=None)
434
        inputs = self.get_inputs(torch_device, dtype=torch.float16)
435
        outputs_offloaded = pipe(**inputs)
436
        mem_bytes_offloaded = torch.cuda.max_memory_allocated()
437

438
        assert np.abs(outputs.images - outputs_offloaded.images).max() < 1e-3
439
        assert mem_bytes_offloaded < mem_bytes
440
        assert mem_bytes_offloaded < 3 * 10**9
441
        for module in pipe.text_encoder, pipe.unet, pipe.vae:
442
            assert module.device == torch.device("cpu")
443

444
        # With attention slicing
445
        torch.cuda.empty_cache()
446
        torch.cuda.reset_max_memory_allocated()
447
        torch.cuda.reset_peak_memory_stats()
448

449
        pipe.enable_attention_slicing()
450
        _ = pipe(**inputs)
451
        mem_bytes_slicing = torch.cuda.max_memory_allocated()
452
        assert mem_bytes_slicing < mem_bytes_offloaded
453

454

455
@nightly
456
@require_torch_gpu
457
class StableDiffusion2PipelineNightlyTests(unittest.TestCase):
458
    def tearDown(self):
459
        super().tearDown()
460
        gc.collect()
461
        torch.cuda.empty_cache()
462

463
    def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
464
        generator = torch.Generator(device=generator_device).manual_seed(seed)
465
        latents = np.random.RandomState(seed).standard_normal((1, 4, 64, 64))
466
        latents = torch.from_numpy(latents).to(device=device, dtype=dtype)
467
        inputs = {
468
            "prompt": "a photograph of an astronaut riding a horse",
469
            "latents": latents,
470
            "generator": generator,
471
            "num_inference_steps": 50,
472
            "guidance_scale": 7.5,
473
            "output_type": "numpy",
474
        }
475
        return inputs
476

477
    def test_stable_diffusion_2_0_default_ddim(self):
478
        sd_pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-base").to(torch_device)
479
        sd_pipe.set_progress_bar_config(disable=None)
480

481
        inputs = self.get_inputs(torch_device)
482
        image = sd_pipe(**inputs).images[0]
483

484
        expected_image = load_numpy(
485
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
486
            "/stable_diffusion_2_text2img/stable_diffusion_2_0_base_ddim.npy"
487
        )
488
        max_diff = np.abs(expected_image - image).max()
489
        assert max_diff < 1e-3
490

491
    def test_stable_diffusion_2_1_default_pndm(self):
492
        sd_pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base").to(torch_device)
493
        sd_pipe.set_progress_bar_config(disable=None)
494

495
        inputs = self.get_inputs(torch_device)
496
        image = sd_pipe(**inputs).images[0]
497

498
        expected_image = load_numpy(
499
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
500
            "/stable_diffusion_2_text2img/stable_diffusion_2_1_base_pndm.npy"
501
        )
502
        max_diff = np.abs(expected_image - image).max()
503
        assert max_diff < 1e-3
504

505
    def test_stable_diffusion_ddim(self):
506
        sd_pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base").to(torch_device)
507
        sd_pipe.scheduler = DDIMScheduler.from_config(sd_pipe.scheduler.config)
508
        sd_pipe.set_progress_bar_config(disable=None)
509

510
        inputs = self.get_inputs(torch_device)
511
        image = sd_pipe(**inputs).images[0]
512

513
        expected_image = load_numpy(
514
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
515
            "/stable_diffusion_2_text2img/stable_diffusion_2_1_base_ddim.npy"
516
        )
517
        max_diff = np.abs(expected_image - image).max()
518
        assert max_diff < 1e-3
519

520
    def test_stable_diffusion_lms(self):
521
        sd_pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base").to(torch_device)
522
        sd_pipe.scheduler = LMSDiscreteScheduler.from_config(sd_pipe.scheduler.config)
523
        sd_pipe.set_progress_bar_config(disable=None)
524

525
        inputs = self.get_inputs(torch_device)
526
        image = sd_pipe(**inputs).images[0]
527

528
        expected_image = load_numpy(
529
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
530
            "/stable_diffusion_2_text2img/stable_diffusion_2_1_base_lms.npy"
531
        )
532
        max_diff = np.abs(expected_image - image).max()
533
        assert max_diff < 1e-3
534

535
    def test_stable_diffusion_euler(self):
536
        sd_pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base").to(torch_device)
537
        sd_pipe.scheduler = EulerDiscreteScheduler.from_config(sd_pipe.scheduler.config)
538
        sd_pipe.set_progress_bar_config(disable=None)
539

540
        inputs = self.get_inputs(torch_device)
541
        image = sd_pipe(**inputs).images[0]
542

543
        expected_image = load_numpy(
544
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
545
            "/stable_diffusion_2_text2img/stable_diffusion_2_1_base_euler.npy"
546
        )
547
        max_diff = np.abs(expected_image - image).max()
548
        assert max_diff < 1e-3
549

550
    def test_stable_diffusion_dpm(self):
551
        sd_pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base").to(torch_device)
552
        sd_pipe.scheduler = DPMSolverMultistepScheduler.from_config(sd_pipe.scheduler.config)
553
        sd_pipe.set_progress_bar_config(disable=None)
554

555
        inputs = self.get_inputs(torch_device)
556
        inputs["num_inference_steps"] = 25
557
        image = sd_pipe(**inputs).images[0]
558

559
        expected_image = load_numpy(
560
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
561
            "/stable_diffusion_2_text2img/stable_diffusion_2_1_base_dpm_multi.npy"
562
        )
563
        max_diff = np.abs(expected_image - image).max()
564
        assert max_diff < 1e-3
565

566
Product

Resources

Company