CoCalc -- test_stable_diffusion

GitHub Repository: shivamshrirao/diffusers
Path: blob/main/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py
¹⁴⁴⁸ views
1
# coding=utf-8
2
# Copyright 2023 HuggingFace Inc.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
import gc
17
import random
18
import unittest
19

20
import numpy as np
21
import torch
22
from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer
23

24
from diffusers import (
25
    AutoencoderKL,
26
    DDIMScheduler,
27
    DPMSolverMultistepScheduler,
28
    LMSDiscreteScheduler,
29
    PNDMScheduler,
30
    StableDiffusionImg2ImgPipeline,
31
    UNet2DConditionModel,
32
)
33
from diffusers.image_processor import VaeImageProcessor
34
from diffusers.utils import floats_tensor, load_image, load_numpy, nightly, slow, torch_device
35
from diffusers.utils.testing_utils import require_torch_gpu, skip_mps
36

37
from ...pipeline_params import TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS, TEXT_GUIDED_IMAGE_VARIATION_PARAMS
38
from ...test_pipelines_common import PipelineTesterMixin
39

40

41
torch.backends.cuda.matmul.allow_tf32 = False
42

43

44
class StableDiffusionImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
45
    pipeline_class = StableDiffusionImg2ImgPipeline
46
    params = TEXT_GUIDED_IMAGE_VARIATION_PARAMS - {"height", "width"}
47
    required_optional_params = PipelineTesterMixin.required_optional_params - {"latents"}
48
    batch_params = TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS
49

50
    def get_dummy_components(self):
51
        torch.manual_seed(0)
52
        unet = UNet2DConditionModel(
53
            block_out_channels=(32, 64),
54
            layers_per_block=2,
55
            sample_size=32,
56
            in_channels=4,
57
            out_channels=4,
58
            down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
59
            up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
60
            cross_attention_dim=32,
61
        )
62
        scheduler = PNDMScheduler(skip_prk_steps=True)
63
        torch.manual_seed(0)
64
        vae = AutoencoderKL(
65
            block_out_channels=[32, 64],
66
            in_channels=3,
67
            out_channels=3,
68
            down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
69
            up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
70
            latent_channels=4,
71
        )
72
        torch.manual_seed(0)
73
        text_encoder_config = CLIPTextConfig(
74
            bos_token_id=0,
75
            eos_token_id=2,
76
            hidden_size=32,
77
            intermediate_size=37,
78
            layer_norm_eps=1e-05,
79
            num_attention_heads=4,
80
            num_hidden_layers=5,
81
            pad_token_id=1,
82
            vocab_size=1000,
83
        )
84
        text_encoder = CLIPTextModel(text_encoder_config)
85
        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
86

87
        components = {
88
            "unet": unet,
89
            "scheduler": scheduler,
90
            "vae": vae,
91
            "text_encoder": text_encoder,
92
            "tokenizer": tokenizer,
93
            "safety_checker": None,
94
            "feature_extractor": None,
95
        }
96
        return components
97

98
    def get_dummy_inputs(self, device, seed=0, input_image_type="pt", output_type="np"):
99
        image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device)
100
        if str(device).startswith("mps"):
101
            generator = torch.manual_seed(seed)
102
        else:
103
            generator = torch.Generator(device=device).manual_seed(seed)
104

105
        if input_image_type == "pt":
106
            input_image = image
107
        elif input_image_type == "np":
108
            input_image = image.cpu().numpy().transpose(0, 2, 3, 1)
109
        elif input_image_type == "pil":
110
            input_image = image.cpu().numpy().transpose(0, 2, 3, 1)
111
            input_image = VaeImageProcessor.numpy_to_pil(input_image)
112
        else:
113
            raise ValueError(f"unsupported input_image_type {input_image_type}.")
114

115
        if output_type not in ["pt", "np", "pil"]:
116
            raise ValueError(f"unsupported output_type {output_type}")
117

118
        inputs = {
119
            "prompt": "A painting of a squirrel eating a burger",
120
            "image": input_image,
121
            "generator": generator,
122
            "num_inference_steps": 2,
123
            "guidance_scale": 6.0,
124
            "output_type": output_type,
125
        }
126
        return inputs
127

128
    def test_stable_diffusion_img2img_default_case(self):
129
        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
130
        components = self.get_dummy_components()
131
        sd_pipe = StableDiffusionImg2ImgPipeline(**components)
132
        sd_pipe.image_processor = VaeImageProcessor(vae_scale_factor=sd_pipe.vae_scale_factor, do_normalize=False)
133
        sd_pipe = sd_pipe.to(device)
134
        sd_pipe.set_progress_bar_config(disable=None)
135

136
        inputs = self.get_dummy_inputs(device)
137
        image = sd_pipe(**inputs).images
138
        image_slice = image[0, -3:, -3:, -1]
139

140
        assert image.shape == (1, 32, 32, 3)
141
        expected_slice = np.array([0.4492, 0.3865, 0.4222, 0.5854, 0.5139, 0.4379, 0.4193, 0.48, 0.4218])
142

143
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
144

145
    def test_stable_diffusion_img2img_negative_prompt(self):
146
        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
147
        components = self.get_dummy_components()
148
        sd_pipe = StableDiffusionImg2ImgPipeline(**components)
149
        sd_pipe.image_processor = VaeImageProcessor(vae_scale_factor=sd_pipe.vae_scale_factor, do_normalize=False)
150
        sd_pipe = sd_pipe.to(device)
151
        sd_pipe.set_progress_bar_config(disable=None)
152

153
        inputs = self.get_dummy_inputs(device)
154
        negative_prompt = "french fries"
155
        output = sd_pipe(**inputs, negative_prompt=negative_prompt)
156
        image = output.images
157
        image_slice = image[0, -3:, -3:, -1]
158

159
        assert image.shape == (1, 32, 32, 3)
160
        expected_slice = np.array([0.4065, 0.3783, 0.4050, 0.5266, 0.4781, 0.4252, 0.4203, 0.4692, 0.4365])
161

162
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
163

164
    def test_stable_diffusion_img2img_multiple_init_images(self):
165
        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
166
        components = self.get_dummy_components()
167
        sd_pipe = StableDiffusionImg2ImgPipeline(**components)
168
        sd_pipe.image_processor = VaeImageProcessor(vae_scale_factor=sd_pipe.vae_scale_factor, do_normalize=False)
169
        sd_pipe = sd_pipe.to(device)
170
        sd_pipe.set_progress_bar_config(disable=None)
171

172
        inputs = self.get_dummy_inputs(device)
173
        inputs["prompt"] = [inputs["prompt"]] * 2
174
        inputs["image"] = inputs["image"].repeat(2, 1, 1, 1)
175
        image = sd_pipe(**inputs).images
176
        image_slice = image[-1, -3:, -3:, -1]
177

178
        assert image.shape == (2, 32, 32, 3)
179
        expected_slice = np.array([0.5144, 0.4447, 0.4735, 0.6676, 0.5526, 0.5454, 0.645, 0.5149, 0.4689])
180

181
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
182

183
    def test_stable_diffusion_img2img_k_lms(self):
184
        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
185
        components = self.get_dummy_components()
186
        components["scheduler"] = LMSDiscreteScheduler(
187
            beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear"
188
        )
189
        sd_pipe = StableDiffusionImg2ImgPipeline(**components)
190
        sd_pipe.image_processor = VaeImageProcessor(vae_scale_factor=sd_pipe.vae_scale_factor, do_normalize=False)
191
        sd_pipe = sd_pipe.to(device)
192
        sd_pipe.set_progress_bar_config(disable=None)
193

194
        inputs = self.get_dummy_inputs(device)
195
        image = sd_pipe(**inputs).images
196
        image_slice = image[0, -3:, -3:, -1]
197

198
        assert image.shape == (1, 32, 32, 3)
199
        expected_slice = np.array([0.4367, 0.4986, 0.4372, 0.6706, 0.5665, 0.444, 0.5864, 0.6019, 0.5203])
200

201
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
202

203
    @skip_mps
204
    def test_save_load_local(self):
205
        return super().test_save_load_local()
206

207
    @skip_mps
208
    def test_dict_tuple_outputs_equivalent(self):
209
        return super().test_dict_tuple_outputs_equivalent()
210

211
    @skip_mps
212
    def test_save_load_optional_components(self):
213
        return super().test_save_load_optional_components()
214

215
    @skip_mps
216
    def test_attention_slicing_forward_pass(self):
217
        return super().test_attention_slicing_forward_pass()
218

219
    @skip_mps
220
    def test_pt_np_pil_outputs_equivalent(self):
221
        device = "cpu"
222
        components = self.get_dummy_components()
223
        sd_pipe = StableDiffusionImg2ImgPipeline(**components)
224
        sd_pipe = sd_pipe.to(device)
225
        sd_pipe.set_progress_bar_config(disable=None)
226

227
        output_pt = sd_pipe(**self.get_dummy_inputs(device, output_type="pt"))[0]
228
        output_np = sd_pipe(**self.get_dummy_inputs(device, output_type="np"))[0]
229
        output_pil = sd_pipe(**self.get_dummy_inputs(device, output_type="pil"))[0]
230

231
        assert np.abs(output_pt.cpu().numpy().transpose(0, 2, 3, 1) - output_np).max() <= 1e-4
232
        assert np.abs(np.array(output_pil[0]) - (output_np * 255).round()).max() <= 1e-4
233

234
    @skip_mps
235
    def test_image_types_consistent(self):
236
        device = "cpu"
237
        components = self.get_dummy_components()
238
        sd_pipe = StableDiffusionImg2ImgPipeline(**components)
239
        sd_pipe = sd_pipe.to(device)
240
        sd_pipe.set_progress_bar_config(disable=None)
241

242
        output_pt = sd_pipe(**self.get_dummy_inputs(device, input_image_type="pt"))[0]
243
        output_np = sd_pipe(**self.get_dummy_inputs(device, input_image_type="np"))[0]
244
        output_pil = sd_pipe(**self.get_dummy_inputs(device, input_image_type="pil"))[0]
245

246
        assert np.abs(output_pt - output_np).max() <= 1e-4
247
        assert np.abs(output_pil - output_np).max() <= 1e-2
248

249

250
@slow
251
@require_torch_gpu
252
class StableDiffusionImg2ImgPipelineSlowTests(unittest.TestCase):
253
    def tearDown(self):
254
        super().tearDown()
255
        gc.collect()
256
        torch.cuda.empty_cache()
257

258
    def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
259
        generator = torch.Generator(device=generator_device).manual_seed(seed)
260
        init_image = load_image(
261
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
262
            "/stable_diffusion_img2img/sketch-mountains-input.png"
263
        )
264
        inputs = {
265
            "prompt": "a fantasy landscape, concept art, high resolution",
266
            "image": init_image,
267
            "generator": generator,
268
            "num_inference_steps": 3,
269
            "strength": 0.75,
270
            "guidance_scale": 7.5,
271
            "output_type": "np",
272
        }
273
        return inputs
274

275
    def test_stable_diffusion_img2img_default(self):
276
        pipe = StableDiffusionImg2ImgPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", safety_checker=None)
277
        pipe.to(torch_device)
278
        pipe.set_progress_bar_config(disable=None)
279
        pipe.enable_attention_slicing()
280

281
        inputs = self.get_inputs(torch_device)
282
        image = pipe(**inputs).images
283
        image_slice = image[0, -3:, -3:, -1].flatten()
284

285
        assert image.shape == (1, 512, 768, 3)
286
        expected_slice = np.array([0.4300, 0.4662, 0.4930, 0.3990, 0.4307, 0.4525, 0.3719, 0.4064, 0.3923])
287

288
        assert np.abs(expected_slice - image_slice).max() < 1e-3
289

290
    def test_stable_diffusion_img2img_k_lms(self):
291
        pipe = StableDiffusionImg2ImgPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", safety_checker=None)
292
        pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)
293
        pipe.to(torch_device)
294
        pipe.set_progress_bar_config(disable=None)
295
        pipe.enable_attention_slicing()
296

297
        inputs = self.get_inputs(torch_device)
298
        image = pipe(**inputs).images
299
        image_slice = image[0, -3:, -3:, -1].flatten()
300

301
        assert image.shape == (1, 512, 768, 3)
302
        expected_slice = np.array([0.0389, 0.0346, 0.0415, 0.0290, 0.0218, 0.0210, 0.0408, 0.0567, 0.0271])
303

304
        assert np.abs(expected_slice - image_slice).max() < 1e-3
305

306
    def test_stable_diffusion_img2img_ddim(self):
307
        pipe = StableDiffusionImg2ImgPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", safety_checker=None)
308
        pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
309
        pipe.to(torch_device)
310
        pipe.set_progress_bar_config(disable=None)
311
        pipe.enable_attention_slicing()
312

313
        inputs = self.get_inputs(torch_device)
314
        image = pipe(**inputs).images
315
        image_slice = image[0, -3:, -3:, -1].flatten()
316

317
        assert image.shape == (1, 512, 768, 3)
318
        expected_slice = np.array([0.0593, 0.0607, 0.0851, 0.0582, 0.0636, 0.0721, 0.0751, 0.0981, 0.0781])
319

320
        assert np.abs(expected_slice - image_slice).max() < 1e-3
321

322
    def test_stable_diffusion_img2img_intermediate_state(self):
323
        number_of_steps = 0
324

325
        def callback_fn(step: int, timestep: int, latents: torch.FloatTensor) -> None:
326
            callback_fn.has_been_called = True
327
            nonlocal number_of_steps
328
            number_of_steps += 1
329
            if step == 1:
330
                latents = latents.detach().cpu().numpy()
331
                assert latents.shape == (1, 4, 64, 96)
332
                latents_slice = latents[0, -3:, -3:, -1]
333
                expected_slice = np.array([-0.4958, 0.5107, 1.1045, 2.7539, 4.6680, 3.8320, 1.5049, 1.8633, 2.6523])
334

335
                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2
336
            elif step == 2:
337
                latents = latents.detach().cpu().numpy()
338
                assert latents.shape == (1, 4, 64, 96)
339
                latents_slice = latents[0, -3:, -3:, -1]
340
                expected_slice = np.array([-0.4956, 0.5078, 1.0918, 2.7520, 4.6484, 3.8125, 1.5146, 1.8633, 2.6367])
341

342
                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2
343

344
        callback_fn.has_been_called = False
345

346
        pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
347
            "CompVis/stable-diffusion-v1-4", safety_checker=None, torch_dtype=torch.float16
348
        )
349
        pipe = pipe.to(torch_device)
350
        pipe.set_progress_bar_config(disable=None)
351
        pipe.enable_attention_slicing()
352

353
        inputs = self.get_inputs(torch_device, dtype=torch.float16)
354
        pipe(**inputs, callback=callback_fn, callback_steps=1)
355
        assert callback_fn.has_been_called
356
        assert number_of_steps == 2
357

358
    def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self):
359
        torch.cuda.empty_cache()
360
        torch.cuda.reset_max_memory_allocated()
361
        torch.cuda.reset_peak_memory_stats()
362

363
        pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
364
            "CompVis/stable-diffusion-v1-4", safety_checker=None, torch_dtype=torch.float16
365
        )
366
        pipe = pipe.to(torch_device)
367
        pipe.set_progress_bar_config(disable=None)
368
        pipe.enable_attention_slicing(1)
369
        pipe.enable_sequential_cpu_offload()
370

371
        inputs = self.get_inputs(torch_device, dtype=torch.float16)
372
        _ = pipe(**inputs)
373

374
        mem_bytes = torch.cuda.max_memory_allocated()
375
        # make sure that less than 2.2 GB is allocated
376
        assert mem_bytes < 2.2 * 10**9
377

378
    def test_stable_diffusion_pipeline_with_model_offloading(self):
379
        torch.cuda.empty_cache()
380
        torch.cuda.reset_max_memory_allocated()
381
        torch.cuda.reset_peak_memory_stats()
382

383
        inputs = self.get_inputs(torch_device, dtype=torch.float16)
384

385
        # Normal inference
386

387
        pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
388
            "CompVis/stable-diffusion-v1-4",
389
            safety_checker=None,
390
            torch_dtype=torch.float16,
391
        )
392
        pipe.to(torch_device)
393
        pipe.set_progress_bar_config(disable=None)
394
        pipe(**inputs)
395
        mem_bytes = torch.cuda.max_memory_allocated()
396

397
        # With model offloading
398

399
        # Reload but don't move to cuda
400
        pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
401
            "CompVis/stable-diffusion-v1-4",
402
            safety_checker=None,
403
            torch_dtype=torch.float16,
404
        )
405

406
        torch.cuda.empty_cache()
407
        torch.cuda.reset_max_memory_allocated()
408
        torch.cuda.reset_peak_memory_stats()
409

410
        pipe.enable_model_cpu_offload()
411
        pipe.set_progress_bar_config(disable=None)
412
        _ = pipe(**inputs)
413
        mem_bytes_offloaded = torch.cuda.max_memory_allocated()
414

415
        assert mem_bytes_offloaded < mem_bytes
416
        for module in pipe.text_encoder, pipe.unet, pipe.vae:
417
            assert module.device == torch.device("cpu")
418

419
    def test_stable_diffusion_img2img_pipeline_multiple_of_8(self):
420
        init_image = load_image(
421
            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
422
            "/img2img/sketch-mountains-input.jpg"
423
        )
424
        # resize to resolution that is divisible by 8 but not 16 or 32
425
        init_image = init_image.resize((760, 504))
426

427
        model_id = "CompVis/stable-diffusion-v1-4"
428
        pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
429
            model_id,
430
            safety_checker=None,
431
        )
432
        pipe.to(torch_device)
433
        pipe.set_progress_bar_config(disable=None)
434
        pipe.enable_attention_slicing()
435

436
        prompt = "A fantasy landscape, trending on artstation"
437

438
        generator = torch.manual_seed(0)
439
        output = pipe(
440
            prompt=prompt,
441
            image=init_image,
442
            strength=0.75,
443
            guidance_scale=7.5,
444
            generator=generator,
445
            output_type="np",
446
        )
447
        image = output.images[0]
448

449
        image_slice = image[255:258, 383:386, -1]
450

451
        assert image.shape == (504, 760, 3)
452
        expected_slice = np.array([0.9393, 0.9500, 0.9399, 0.9438, 0.9458, 0.9400, 0.9455, 0.9414, 0.9423])
453

454
        assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-3
455

456

457
@nightly
458
@require_torch_gpu
459
class StableDiffusionImg2ImgPipelineNightlyTests(unittest.TestCase):
460
    def tearDown(self):
461
        super().tearDown()
462
        gc.collect()
463
        torch.cuda.empty_cache()
464

465
    def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
466
        generator = torch.Generator(device=generator_device).manual_seed(seed)
467
        init_image = load_image(
468
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
469
            "/stable_diffusion_img2img/sketch-mountains-input.png"
470
        )
471
        inputs = {
472
            "prompt": "a fantasy landscape, concept art, high resolution",
473
            "image": init_image,
474
            "generator": generator,
475
            "num_inference_steps": 50,
476
            "strength": 0.75,
477
            "guidance_scale": 7.5,
478
            "output_type": "np",
479
        }
480
        return inputs
481

482
    def test_img2img_pndm(self):
483
        sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
484
        sd_pipe.to(torch_device)
485
        sd_pipe.set_progress_bar_config(disable=None)
486

487
        inputs = self.get_inputs(torch_device)
488
        image = sd_pipe(**inputs).images[0]
489

490
        expected_image = load_numpy(
491
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
492
            "/stable_diffusion_img2img/stable_diffusion_1_5_pndm.npy"
493
        )
494
        max_diff = np.abs(expected_image - image).max()
495
        assert max_diff < 1e-3
496

497
    def test_img2img_ddim(self):
498
        sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
499
        sd_pipe.scheduler = DDIMScheduler.from_config(sd_pipe.scheduler.config)
500
        sd_pipe.to(torch_device)
501
        sd_pipe.set_progress_bar_config(disable=None)
502

503
        inputs = self.get_inputs(torch_device)
504
        image = sd_pipe(**inputs).images[0]
505

506
        expected_image = load_numpy(
507
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
508
            "/stable_diffusion_img2img/stable_diffusion_1_5_ddim.npy"
509
        )
510
        max_diff = np.abs(expected_image - image).max()
511
        assert max_diff < 1e-3
512

513
    def test_img2img_lms(self):
514
        sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
515
        sd_pipe.scheduler = LMSDiscreteScheduler.from_config(sd_pipe.scheduler.config)
516
        sd_pipe.to(torch_device)
517
        sd_pipe.set_progress_bar_config(disable=None)
518

519
        inputs = self.get_inputs(torch_device)
520
        image = sd_pipe(**inputs).images[0]
521

522
        expected_image = load_numpy(
523
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
524
            "/stable_diffusion_img2img/stable_diffusion_1_5_lms.npy"
525
        )
526
        max_diff = np.abs(expected_image - image).max()
527
        assert max_diff < 1e-3
528

529
    def test_img2img_dpm(self):
530
        sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
531
        sd_pipe.scheduler = DPMSolverMultistepScheduler.from_config(sd_pipe.scheduler.config)
532
        sd_pipe.to(torch_device)
533
        sd_pipe.set_progress_bar_config(disable=None)
534

535
        inputs = self.get_inputs(torch_device)
536
        inputs["num_inference_steps"] = 30
537
        image = sd_pipe(**inputs).images[0]
538

539
        expected_image = load_numpy(
540
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
541
            "/stable_diffusion_img2img/stable_diffusion_1_5_dpm.npy"
542
        )
543
        max_diff = np.abs(expected_image - image).max()
544
        assert max_diff < 1e-3
545

546
Product

Resources

Company