CoCalc -- test

GitHub Repository: shivamshrirao/diffusers
Path: blob/main/tests/pipelines/dit/test_dit.py
¹⁴⁴⁸ views
1
# coding=utf-8
2
# Copyright 2023 HuggingFace Inc.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
import gc
17
import unittest
18

19
import numpy as np
20
import torch
21

22
from diffusers import AutoencoderKL, DDIMScheduler, DiTPipeline, DPMSolverMultistepScheduler, Transformer2DModel
23
from diffusers.utils import is_xformers_available, load_numpy, slow, torch_device
24
from diffusers.utils.testing_utils import require_torch_gpu
25

26
from ...pipeline_params import (
27
    CLASS_CONDITIONED_IMAGE_GENERATION_BATCH_PARAMS,
28
    CLASS_CONDITIONED_IMAGE_GENERATION_PARAMS,
29
)
30
from ...test_pipelines_common import PipelineTesterMixin
31

32

33
torch.backends.cuda.matmul.allow_tf32 = False
34

35

36
class DiTPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
37
    pipeline_class = DiTPipeline
38
    params = CLASS_CONDITIONED_IMAGE_GENERATION_PARAMS
39
    required_optional_params = PipelineTesterMixin.required_optional_params - {
40
        "latents",
41
        "num_images_per_prompt",
42
        "callback",
43
        "callback_steps",
44
    }
45
    batch_params = CLASS_CONDITIONED_IMAGE_GENERATION_BATCH_PARAMS
46
    test_cpu_offload = False
47

48
    def get_dummy_components(self):
49
        torch.manual_seed(0)
50
        transformer = Transformer2DModel(
51
            sample_size=16,
52
            num_layers=2,
53
            patch_size=4,
54
            attention_head_dim=8,
55
            num_attention_heads=2,
56
            in_channels=4,
57
            out_channels=8,
58
            attention_bias=True,
59
            activation_fn="gelu-approximate",
60
            num_embeds_ada_norm=1000,
61
            norm_type="ada_norm_zero",
62
            norm_elementwise_affine=False,
63
        )
64
        vae = AutoencoderKL()
65
        scheduler = DDIMScheduler()
66
        components = {"transformer": transformer.eval(), "vae": vae.eval(), "scheduler": scheduler}
67
        return components
68

69
    def get_dummy_inputs(self, device, seed=0):
70
        if str(device).startswith("mps"):
71
            generator = torch.manual_seed(seed)
72
        else:
73
            generator = torch.Generator(device=device).manual_seed(seed)
74
        inputs = {
75
            "class_labels": [1],
76
            "generator": generator,
77
            "num_inference_steps": 2,
78
            "output_type": "numpy",
79
        }
80
        return inputs
81

82
    def test_inference(self):
83
        device = "cpu"
84

85
        components = self.get_dummy_components()
86
        pipe = self.pipeline_class(**components)
87
        pipe.to(device)
88
        pipe.set_progress_bar_config(disable=None)
89

90
        inputs = self.get_dummy_inputs(device)
91
        image = pipe(**inputs).images
92
        image_slice = image[0, -3:, -3:, -1]
93

94
        self.assertEqual(image.shape, (1, 16, 16, 3))
95
        expected_slice = np.array([0.4380, 0.4141, 0.5159, 0.0000, 0.4282, 0.6680, 0.5485, 0.2545, 0.6719])
96
        max_diff = np.abs(image_slice.flatten() - expected_slice).max()
97
        self.assertLessEqual(max_diff, 1e-3)
98

99
    def test_inference_batch_single_identical(self):
100
        self._test_inference_batch_single_identical(relax_max_difference=True, expected_max_diff=1e-3)
101

102
    @unittest.skipIf(
103
        torch_device != "cuda" or not is_xformers_available(),
104
        reason="XFormers attention is only available with CUDA and `xformers` installed",
105
    )
106
    def test_xformers_attention_forwardGenerator_pass(self):
107
        self._test_xformers_attention_forwardGenerator_pass(expected_max_diff=1e-3)
108

109

110
@require_torch_gpu
111
@slow
112
class DiTPipelineIntegrationTests(unittest.TestCase):
113
    def tearDown(self):
114
        super().tearDown()
115
        gc.collect()
116
        torch.cuda.empty_cache()
117

118
    def test_dit_256(self):
119
        generator = torch.manual_seed(0)
120

121
        pipe = DiTPipeline.from_pretrained("facebook/DiT-XL-2-256")
122
        pipe.to("cuda")
123

124
        words = ["vase", "umbrella", "white shark", "white wolf"]
125
        ids = pipe.get_label_ids(words)
126

127
        images = pipe(ids, generator=generator, num_inference_steps=40, output_type="np").images
128

129
        for word, image in zip(words, images):
130
            expected_image = load_numpy(
131
                f"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/dit/{word}.npy"
132
            )
133
            assert np.abs((expected_image - image).max()) < 1e-2
134

135
    def test_dit_512(self):
136
        pipe = DiTPipeline.from_pretrained("facebook/DiT-XL-2-512")
137
        pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
138
        pipe.to("cuda")
139

140
        words = ["vase", "umbrella"]
141
        ids = pipe.get_label_ids(words)
142

143
        generator = torch.manual_seed(0)
144
        images = pipe(ids, generator=generator, num_inference_steps=25, output_type="np").images
145

146
        for word, image in zip(words, images):
147
            expected_image = load_numpy(
148
                "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
149
                f"/dit/{word}_512.npy"
150
            )
151

152
            assert np.abs((expected_image - image).max()) < 1e-1
153

154
Product

Resources

Company