Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
shivamshrirao
GitHub Repository: shivamshrirao/diffusers
Path: blob/main/tests/pipelines/dit/test_dit.py
1448 views
1
# coding=utf-8
2
# Copyright 2023 HuggingFace Inc.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
# http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
16
import gc
17
import unittest
18
19
import numpy as np
20
import torch
21
22
from diffusers import AutoencoderKL, DDIMScheduler, DiTPipeline, DPMSolverMultistepScheduler, Transformer2DModel
23
from diffusers.utils import is_xformers_available, load_numpy, slow, torch_device
24
from diffusers.utils.testing_utils import require_torch_gpu
25
26
from ...pipeline_params import (
27
CLASS_CONDITIONED_IMAGE_GENERATION_BATCH_PARAMS,
28
CLASS_CONDITIONED_IMAGE_GENERATION_PARAMS,
29
)
30
from ...test_pipelines_common import PipelineTesterMixin
31
32
33
torch.backends.cuda.matmul.allow_tf32 = False
34
35
36
class DiTPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
37
pipeline_class = DiTPipeline
38
params = CLASS_CONDITIONED_IMAGE_GENERATION_PARAMS
39
required_optional_params = PipelineTesterMixin.required_optional_params - {
40
"latents",
41
"num_images_per_prompt",
42
"callback",
43
"callback_steps",
44
}
45
batch_params = CLASS_CONDITIONED_IMAGE_GENERATION_BATCH_PARAMS
46
test_cpu_offload = False
47
48
def get_dummy_components(self):
49
torch.manual_seed(0)
50
transformer = Transformer2DModel(
51
sample_size=16,
52
num_layers=2,
53
patch_size=4,
54
attention_head_dim=8,
55
num_attention_heads=2,
56
in_channels=4,
57
out_channels=8,
58
attention_bias=True,
59
activation_fn="gelu-approximate",
60
num_embeds_ada_norm=1000,
61
norm_type="ada_norm_zero",
62
norm_elementwise_affine=False,
63
)
64
vae = AutoencoderKL()
65
scheduler = DDIMScheduler()
66
components = {"transformer": transformer.eval(), "vae": vae.eval(), "scheduler": scheduler}
67
return components
68
69
def get_dummy_inputs(self, device, seed=0):
70
if str(device).startswith("mps"):
71
generator = torch.manual_seed(seed)
72
else:
73
generator = torch.Generator(device=device).manual_seed(seed)
74
inputs = {
75
"class_labels": [1],
76
"generator": generator,
77
"num_inference_steps": 2,
78
"output_type": "numpy",
79
}
80
return inputs
81
82
def test_inference(self):
83
device = "cpu"
84
85
components = self.get_dummy_components()
86
pipe = self.pipeline_class(**components)
87
pipe.to(device)
88
pipe.set_progress_bar_config(disable=None)
89
90
inputs = self.get_dummy_inputs(device)
91
image = pipe(**inputs).images
92
image_slice = image[0, -3:, -3:, -1]
93
94
self.assertEqual(image.shape, (1, 16, 16, 3))
95
expected_slice = np.array([0.4380, 0.4141, 0.5159, 0.0000, 0.4282, 0.6680, 0.5485, 0.2545, 0.6719])
96
max_diff = np.abs(image_slice.flatten() - expected_slice).max()
97
self.assertLessEqual(max_diff, 1e-3)
98
99
def test_inference_batch_single_identical(self):
100
self._test_inference_batch_single_identical(relax_max_difference=True, expected_max_diff=1e-3)
101
102
@unittest.skipIf(
103
torch_device != "cuda" or not is_xformers_available(),
104
reason="XFormers attention is only available with CUDA and `xformers` installed",
105
)
106
def test_xformers_attention_forwardGenerator_pass(self):
107
self._test_xformers_attention_forwardGenerator_pass(expected_max_diff=1e-3)
108
109
110
@require_torch_gpu
111
@slow
112
class DiTPipelineIntegrationTests(unittest.TestCase):
113
def tearDown(self):
114
super().tearDown()
115
gc.collect()
116
torch.cuda.empty_cache()
117
118
def test_dit_256(self):
119
generator = torch.manual_seed(0)
120
121
pipe = DiTPipeline.from_pretrained("facebook/DiT-XL-2-256")
122
pipe.to("cuda")
123
124
words = ["vase", "umbrella", "white shark", "white wolf"]
125
ids = pipe.get_label_ids(words)
126
127
images = pipe(ids, generator=generator, num_inference_steps=40, output_type="np").images
128
129
for word, image in zip(words, images):
130
expected_image = load_numpy(
131
f"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/dit/{word}.npy"
132
)
133
assert np.abs((expected_image - image).max()) < 1e-2
134
135
def test_dit_512(self):
136
pipe = DiTPipeline.from_pretrained("facebook/DiT-XL-2-512")
137
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
138
pipe.to("cuda")
139
140
words = ["vase", "umbrella"]
141
ids = pipe.get_label_ids(words)
142
143
generator = torch.manual_seed(0)
144
images = pipe(ids, generator=generator, num_inference_steps=25, output_type="np").images
145
146
for word, image in zip(words, images):
147
expected_image = load_numpy(
148
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
149
f"/dit/{word}_512.npy"
150
)
151
152
assert np.abs((expected_image - image).max()) < 1e-1
153
154