CoCalc -- test_cycle

GitHub Repository: shivamshrirao/diffusers
Path: blob/main/tests/pipelines/stable_diffusion/test_cycle_diffusion.py
¹⁴⁵⁰ views
1
# coding=utf-8
2
# Copyright 2023 HuggingFace Inc.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
import gc
17
import random
18
import unittest
19

20
import numpy as np
21
import torch
22
from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer
23

24
from diffusers import AutoencoderKL, CycleDiffusionPipeline, DDIMScheduler, UNet2DConditionModel
25
from diffusers.utils import floats_tensor, load_image, load_numpy, slow, torch_device
26
from diffusers.utils.testing_utils import require_torch_gpu, skip_mps
27

28
from ...pipeline_params import TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS, TEXT_GUIDED_IMAGE_VARIATION_PARAMS
29
from ...test_pipelines_common import PipelineTesterMixin
30

31

32
torch.backends.cuda.matmul.allow_tf32 = False
33

34

35
class CycleDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
36
    pipeline_class = CycleDiffusionPipeline
37
    params = TEXT_GUIDED_IMAGE_VARIATION_PARAMS - {
38
        "negative_prompt",
39
        "height",
40
        "width",
41
        "negative_prompt_embeds",
42
    }
43
    required_optional_params = PipelineTesterMixin.required_optional_params - {"latents"}
44
    batch_params = TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS.union({"source_prompt"})
45

46
    def get_dummy_components(self):
47
        torch.manual_seed(0)
48
        unet = UNet2DConditionModel(
49
            block_out_channels=(32, 64),
50
            layers_per_block=2,
51
            sample_size=32,
52
            in_channels=4,
53
            out_channels=4,
54
            down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
55
            up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
56
            cross_attention_dim=32,
57
        )
58
        scheduler = DDIMScheduler(
59
            beta_start=0.00085,
60
            beta_end=0.012,
61
            beta_schedule="scaled_linear",
62
            num_train_timesteps=1000,
63
            clip_sample=False,
64
            set_alpha_to_one=False,
65
        )
66
        torch.manual_seed(0)
67
        vae = AutoencoderKL(
68
            block_out_channels=[32, 64],
69
            in_channels=3,
70
            out_channels=3,
71
            down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
72
            up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
73
            latent_channels=4,
74
        )
75
        torch.manual_seed(0)
76
        text_encoder_config = CLIPTextConfig(
77
            bos_token_id=0,
78
            eos_token_id=2,
79
            hidden_size=32,
80
            intermediate_size=37,
81
            layer_norm_eps=1e-05,
82
            num_attention_heads=4,
83
            num_hidden_layers=5,
84
            pad_token_id=1,
85
            vocab_size=1000,
86
        )
87
        text_encoder = CLIPTextModel(text_encoder_config)
88
        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
89

90
        components = {
91
            "unet": unet,
92
            "scheduler": scheduler,
93
            "vae": vae,
94
            "text_encoder": text_encoder,
95
            "tokenizer": tokenizer,
96
            "safety_checker": None,
97
            "feature_extractor": None,
98
        }
99
        return components
100

101
    def get_dummy_inputs(self, device, seed=0):
102
        image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device)
103
        if str(device).startswith("mps"):
104
            generator = torch.manual_seed(seed)
105
        else:
106
            generator = torch.Generator(device=device).manual_seed(seed)
107
        inputs = {
108
            "prompt": "An astronaut riding an elephant",
109
            "source_prompt": "An astronaut riding a horse",
110
            "image": image,
111
            "generator": generator,
112
            "num_inference_steps": 2,
113
            "eta": 0.1,
114
            "strength": 0.8,
115
            "guidance_scale": 3,
116
            "source_guidance_scale": 1,
117
            "output_type": "numpy",
118
        }
119
        return inputs
120

121
    def test_stable_diffusion_cycle(self):
122
        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
123

124
        components = self.get_dummy_components()
125
        pipe = CycleDiffusionPipeline(**components)
126
        pipe = pipe.to(device)
127
        pipe.set_progress_bar_config(disable=None)
128

129
        inputs = self.get_dummy_inputs(device)
130
        output = pipe(**inputs)
131
        images = output.images
132

133
        image_slice = images[0, -3:, -3:, -1]
134

135
        assert images.shape == (1, 32, 32, 3)
136
        expected_slice = np.array([0.4459, 0.4943, 0.4544, 0.6643, 0.5474, 0.4327, 0.5701, 0.5959, 0.5179])
137

138
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
139

140
    @unittest.skipIf(torch_device != "cuda", "This test requires a GPU")
141
    def test_stable_diffusion_cycle_fp16(self):
142
        components = self.get_dummy_components()
143
        for name, module in components.items():
144
            if hasattr(module, "half"):
145
                components[name] = module.half()
146
        pipe = CycleDiffusionPipeline(**components)
147
        pipe = pipe.to(torch_device)
148
        pipe.set_progress_bar_config(disable=None)
149

150
        inputs = self.get_dummy_inputs(torch_device)
151
        output = pipe(**inputs)
152
        images = output.images
153

154
        image_slice = images[0, -3:, -3:, -1]
155

156
        assert images.shape == (1, 32, 32, 3)
157
        expected_slice = np.array([0.3506, 0.4543, 0.446, 0.4575, 0.5195, 0.4155, 0.5273, 0.518, 0.4116])
158

159
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
160

161
    @skip_mps
162
    def test_save_load_local(self):
163
        return super().test_save_load_local()
164

165
    @unittest.skip("non-deterministic pipeline")
166
    def test_inference_batch_single_identical(self):
167
        return super().test_inference_batch_single_identical()
168

169
    @skip_mps
170
    def test_dict_tuple_outputs_equivalent(self):
171
        return super().test_dict_tuple_outputs_equivalent()
172

173
    @skip_mps
174
    def test_save_load_optional_components(self):
175
        return super().test_save_load_optional_components()
176

177
    @skip_mps
178
    def test_attention_slicing_forward_pass(self):
179
        return super().test_attention_slicing_forward_pass()
180

181

182
@slow
183
@require_torch_gpu
184
class CycleDiffusionPipelineIntegrationTests(unittest.TestCase):
185
    def tearDown(self):
186
        # clean up the VRAM after each test
187
        super().tearDown()
188
        gc.collect()
189
        torch.cuda.empty_cache()
190

191
    def test_cycle_diffusion_pipeline_fp16(self):
192
        init_image = load_image(
193
            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
194
            "/cycle-diffusion/black_colored_car.png"
195
        )
196
        expected_image = load_numpy(
197
            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/cycle-diffusion/blue_colored_car_fp16.npy"
198
        )
199
        init_image = init_image.resize((512, 512))
200

201
        model_id = "CompVis/stable-diffusion-v1-4"
202
        scheduler = DDIMScheduler.from_pretrained(model_id, subfolder="scheduler")
203
        pipe = CycleDiffusionPipeline.from_pretrained(
204
            model_id, scheduler=scheduler, safety_checker=None, torch_dtype=torch.float16, revision="fp16"
205
        )
206

207
        pipe.to(torch_device)
208
        pipe.set_progress_bar_config(disable=None)
209
        pipe.enable_attention_slicing()
210

211
        source_prompt = "A black colored car"
212
        prompt = "A blue colored car"
213

214
        generator = torch.manual_seed(0)
215
        output = pipe(
216
            prompt=prompt,
217
            source_prompt=source_prompt,
218
            image=init_image,
219
            num_inference_steps=100,
220
            eta=0.1,
221
            strength=0.85,
222
            guidance_scale=3,
223
            source_guidance_scale=1,
224
            generator=generator,
225
            output_type="np",
226
        )
227
        image = output.images
228

229
        # the values aren't exactly equal, but the images look the same visually
230
        assert np.abs(image - expected_image).max() < 5e-1
231

232
    def test_cycle_diffusion_pipeline(self):
233
        init_image = load_image(
234
            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
235
            "/cycle-diffusion/black_colored_car.png"
236
        )
237
        expected_image = load_numpy(
238
            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/cycle-diffusion/blue_colored_car.npy"
239
        )
240
        init_image = init_image.resize((512, 512))
241

242
        model_id = "CompVis/stable-diffusion-v1-4"
243
        scheduler = DDIMScheduler.from_pretrained(model_id, subfolder="scheduler")
244
        pipe = CycleDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, safety_checker=None)
245

246
        pipe.to(torch_device)
247
        pipe.set_progress_bar_config(disable=None)
248
        pipe.enable_attention_slicing()
249

250
        source_prompt = "A black colored car"
251
        prompt = "A blue colored car"
252

253
        generator = torch.manual_seed(0)
254
        output = pipe(
255
            prompt=prompt,
256
            source_prompt=source_prompt,
257
            image=init_image,
258
            num_inference_steps=100,
259
            eta=0.1,
260
            strength=0.85,
261
            guidance_scale=3,
262
            source_guidance_scale=1,
263
            generator=generator,
264
            output_type="np",
265
        )
266
        image = output.images
267

268
        assert np.abs(image - expected_image).max() < 1e-2
269

270
Product

Resources

Company