Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
shivamshrirao
GitHub Repository: shivamshrirao/diffusers
Path: blob/main/tests/pipelines/stable_diffusion/test_cycle_diffusion.py
1450 views
1
# coding=utf-8
2
# Copyright 2023 HuggingFace Inc.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
# http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
16
import gc
17
import random
18
import unittest
19
20
import numpy as np
21
import torch
22
from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer
23
24
from diffusers import AutoencoderKL, CycleDiffusionPipeline, DDIMScheduler, UNet2DConditionModel
25
from diffusers.utils import floats_tensor, load_image, load_numpy, slow, torch_device
26
from diffusers.utils.testing_utils import require_torch_gpu, skip_mps
27
28
from ...pipeline_params import TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS, TEXT_GUIDED_IMAGE_VARIATION_PARAMS
29
from ...test_pipelines_common import PipelineTesterMixin
30
31
32
torch.backends.cuda.matmul.allow_tf32 = False
33
34
35
class CycleDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
36
pipeline_class = CycleDiffusionPipeline
37
params = TEXT_GUIDED_IMAGE_VARIATION_PARAMS - {
38
"negative_prompt",
39
"height",
40
"width",
41
"negative_prompt_embeds",
42
}
43
required_optional_params = PipelineTesterMixin.required_optional_params - {"latents"}
44
batch_params = TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS.union({"source_prompt"})
45
46
def get_dummy_components(self):
47
torch.manual_seed(0)
48
unet = UNet2DConditionModel(
49
block_out_channels=(32, 64),
50
layers_per_block=2,
51
sample_size=32,
52
in_channels=4,
53
out_channels=4,
54
down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
55
up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
56
cross_attention_dim=32,
57
)
58
scheduler = DDIMScheduler(
59
beta_start=0.00085,
60
beta_end=0.012,
61
beta_schedule="scaled_linear",
62
num_train_timesteps=1000,
63
clip_sample=False,
64
set_alpha_to_one=False,
65
)
66
torch.manual_seed(0)
67
vae = AutoencoderKL(
68
block_out_channels=[32, 64],
69
in_channels=3,
70
out_channels=3,
71
down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
72
up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
73
latent_channels=4,
74
)
75
torch.manual_seed(0)
76
text_encoder_config = CLIPTextConfig(
77
bos_token_id=0,
78
eos_token_id=2,
79
hidden_size=32,
80
intermediate_size=37,
81
layer_norm_eps=1e-05,
82
num_attention_heads=4,
83
num_hidden_layers=5,
84
pad_token_id=1,
85
vocab_size=1000,
86
)
87
text_encoder = CLIPTextModel(text_encoder_config)
88
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
89
90
components = {
91
"unet": unet,
92
"scheduler": scheduler,
93
"vae": vae,
94
"text_encoder": text_encoder,
95
"tokenizer": tokenizer,
96
"safety_checker": None,
97
"feature_extractor": None,
98
}
99
return components
100
101
def get_dummy_inputs(self, device, seed=0):
102
image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device)
103
if str(device).startswith("mps"):
104
generator = torch.manual_seed(seed)
105
else:
106
generator = torch.Generator(device=device).manual_seed(seed)
107
inputs = {
108
"prompt": "An astronaut riding an elephant",
109
"source_prompt": "An astronaut riding a horse",
110
"image": image,
111
"generator": generator,
112
"num_inference_steps": 2,
113
"eta": 0.1,
114
"strength": 0.8,
115
"guidance_scale": 3,
116
"source_guidance_scale": 1,
117
"output_type": "numpy",
118
}
119
return inputs
120
121
def test_stable_diffusion_cycle(self):
122
device = "cpu" # ensure determinism for the device-dependent torch.Generator
123
124
components = self.get_dummy_components()
125
pipe = CycleDiffusionPipeline(**components)
126
pipe = pipe.to(device)
127
pipe.set_progress_bar_config(disable=None)
128
129
inputs = self.get_dummy_inputs(device)
130
output = pipe(**inputs)
131
images = output.images
132
133
image_slice = images[0, -3:, -3:, -1]
134
135
assert images.shape == (1, 32, 32, 3)
136
expected_slice = np.array([0.4459, 0.4943, 0.4544, 0.6643, 0.5474, 0.4327, 0.5701, 0.5959, 0.5179])
137
138
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
139
140
@unittest.skipIf(torch_device != "cuda", "This test requires a GPU")
141
def test_stable_diffusion_cycle_fp16(self):
142
components = self.get_dummy_components()
143
for name, module in components.items():
144
if hasattr(module, "half"):
145
components[name] = module.half()
146
pipe = CycleDiffusionPipeline(**components)
147
pipe = pipe.to(torch_device)
148
pipe.set_progress_bar_config(disable=None)
149
150
inputs = self.get_dummy_inputs(torch_device)
151
output = pipe(**inputs)
152
images = output.images
153
154
image_slice = images[0, -3:, -3:, -1]
155
156
assert images.shape == (1, 32, 32, 3)
157
expected_slice = np.array([0.3506, 0.4543, 0.446, 0.4575, 0.5195, 0.4155, 0.5273, 0.518, 0.4116])
158
159
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
160
161
@skip_mps
162
def test_save_load_local(self):
163
return super().test_save_load_local()
164
165
@unittest.skip("non-deterministic pipeline")
166
def test_inference_batch_single_identical(self):
167
return super().test_inference_batch_single_identical()
168
169
@skip_mps
170
def test_dict_tuple_outputs_equivalent(self):
171
return super().test_dict_tuple_outputs_equivalent()
172
173
@skip_mps
174
def test_save_load_optional_components(self):
175
return super().test_save_load_optional_components()
176
177
@skip_mps
178
def test_attention_slicing_forward_pass(self):
179
return super().test_attention_slicing_forward_pass()
180
181
182
@slow
183
@require_torch_gpu
184
class CycleDiffusionPipelineIntegrationTests(unittest.TestCase):
185
def tearDown(self):
186
# clean up the VRAM after each test
187
super().tearDown()
188
gc.collect()
189
torch.cuda.empty_cache()
190
191
def test_cycle_diffusion_pipeline_fp16(self):
192
init_image = load_image(
193
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
194
"/cycle-diffusion/black_colored_car.png"
195
)
196
expected_image = load_numpy(
197
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/cycle-diffusion/blue_colored_car_fp16.npy"
198
)
199
init_image = init_image.resize((512, 512))
200
201
model_id = "CompVis/stable-diffusion-v1-4"
202
scheduler = DDIMScheduler.from_pretrained(model_id, subfolder="scheduler")
203
pipe = CycleDiffusionPipeline.from_pretrained(
204
model_id, scheduler=scheduler, safety_checker=None, torch_dtype=torch.float16, revision="fp16"
205
)
206
207
pipe.to(torch_device)
208
pipe.set_progress_bar_config(disable=None)
209
pipe.enable_attention_slicing()
210
211
source_prompt = "A black colored car"
212
prompt = "A blue colored car"
213
214
generator = torch.manual_seed(0)
215
output = pipe(
216
prompt=prompt,
217
source_prompt=source_prompt,
218
image=init_image,
219
num_inference_steps=100,
220
eta=0.1,
221
strength=0.85,
222
guidance_scale=3,
223
source_guidance_scale=1,
224
generator=generator,
225
output_type="np",
226
)
227
image = output.images
228
229
# the values aren't exactly equal, but the images look the same visually
230
assert np.abs(image - expected_image).max() < 5e-1
231
232
def test_cycle_diffusion_pipeline(self):
233
init_image = load_image(
234
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
235
"/cycle-diffusion/black_colored_car.png"
236
)
237
expected_image = load_numpy(
238
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/cycle-diffusion/blue_colored_car.npy"
239
)
240
init_image = init_image.resize((512, 512))
241
242
model_id = "CompVis/stable-diffusion-v1-4"
243
scheduler = DDIMScheduler.from_pretrained(model_id, subfolder="scheduler")
244
pipe = CycleDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, safety_checker=None)
245
246
pipe.to(torch_device)
247
pipe.set_progress_bar_config(disable=None)
248
pipe.enable_attention_slicing()
249
250
source_prompt = "A black colored car"
251
prompt = "A blue colored car"
252
253
generator = torch.manual_seed(0)
254
output = pipe(
255
prompt=prompt,
256
source_prompt=source_prompt,
257
image=init_image,
258
num_inference_steps=100,
259
eta=0.1,
260
strength=0.85,
261
guidance_scale=3,
262
source_guidance_scale=1,
263
generator=generator,
264
output_type="np",
265
)
266
image = output.images
267
268
assert np.abs(image - expected_image).max() < 1e-2
269
270