CoCalc -- test_dance

GitHub Repository: shivamshrirao/diffusers
Path: blob/main/tests/pipelines/dance_diffusion/test_dance_diffusion.py
¹⁴⁵⁰ views
1
# coding=utf-8
2
# Copyright 2023 HuggingFace Inc.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
import gc
17
import unittest
18

19
import numpy as np
20
import torch
21

22
from diffusers import DanceDiffusionPipeline, IPNDMScheduler, UNet1DModel
23
from diffusers.utils import slow, torch_device
24
from diffusers.utils.testing_utils import require_torch_gpu, skip_mps
25

26
from ...pipeline_params import UNCONDITIONAL_AUDIO_GENERATION_BATCH_PARAMS, UNCONDITIONAL_AUDIO_GENERATION_PARAMS
27
from ...test_pipelines_common import PipelineTesterMixin
28

29

30
torch.backends.cuda.matmul.allow_tf32 = False
31

32

33
class DanceDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
34
    pipeline_class = DanceDiffusionPipeline
35
    params = UNCONDITIONAL_AUDIO_GENERATION_PARAMS
36
    required_optional_params = PipelineTesterMixin.required_optional_params - {
37
        "callback",
38
        "latents",
39
        "callback_steps",
40
        "output_type",
41
        "num_images_per_prompt",
42
    }
43
    batch_params = UNCONDITIONAL_AUDIO_GENERATION_BATCH_PARAMS
44
    test_attention_slicing = False
45
    test_cpu_offload = False
46

47
    def get_dummy_components(self):
48
        torch.manual_seed(0)
49
        unet = UNet1DModel(
50
            block_out_channels=(32, 32, 64),
51
            extra_in_channels=16,
52
            sample_size=512,
53
            sample_rate=16_000,
54
            in_channels=2,
55
            out_channels=2,
56
            flip_sin_to_cos=True,
57
            use_timestep_embedding=False,
58
            time_embedding_type="fourier",
59
            mid_block_type="UNetMidBlock1D",
60
            down_block_types=("DownBlock1DNoSkip", "DownBlock1D", "AttnDownBlock1D"),
61
            up_block_types=("AttnUpBlock1D", "UpBlock1D", "UpBlock1DNoSkip"),
62
        )
63
        scheduler = IPNDMScheduler()
64

65
        components = {
66
            "unet": unet,
67
            "scheduler": scheduler,
68
        }
69
        return components
70

71
    def get_dummy_inputs(self, device, seed=0):
72
        if str(device).startswith("mps"):
73
            generator = torch.manual_seed(seed)
74
        else:
75
            generator = torch.Generator(device=device).manual_seed(seed)
76
        inputs = {
77
            "batch_size": 1,
78
            "generator": generator,
79
            "num_inference_steps": 4,
80
        }
81
        return inputs
82

83
    def test_dance_diffusion(self):
84
        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
85
        components = self.get_dummy_components()
86
        pipe = DanceDiffusionPipeline(**components)
87
        pipe = pipe.to(device)
88
        pipe.set_progress_bar_config(disable=None)
89

90
        inputs = self.get_dummy_inputs(device)
91
        output = pipe(**inputs)
92
        audio = output.audios
93

94
        audio_slice = audio[0, -3:, -3:]
95

96
        assert audio.shape == (1, 2, components["unet"].sample_size)
97
        expected_slice = np.array([-0.7265, 1.0000, -0.8388, 0.1175, 0.9498, -1.0000])
98
        assert np.abs(audio_slice.flatten() - expected_slice).max() < 1e-2
99

100
    @skip_mps
101
    def test_save_load_local(self):
102
        return super().test_save_load_local()
103

104
    @skip_mps
105
    def test_dict_tuple_outputs_equivalent(self):
106
        return super().test_dict_tuple_outputs_equivalent()
107

108
    @skip_mps
109
    def test_save_load_optional_components(self):
110
        return super().test_save_load_optional_components()
111

112
    @skip_mps
113
    def test_attention_slicing_forward_pass(self):
114
        return super().test_attention_slicing_forward_pass()
115

116

117
@slow
118
@require_torch_gpu
119
class PipelineIntegrationTests(unittest.TestCase):
120
    def tearDown(self):
121
        # clean up the VRAM after each test
122
        super().tearDown()
123
        gc.collect()
124
        torch.cuda.empty_cache()
125

126
    def test_dance_diffusion(self):
127
        device = torch_device
128

129
        pipe = DanceDiffusionPipeline.from_pretrained("harmonai/maestro-150k")
130
        pipe = pipe.to(device)
131
        pipe.set_progress_bar_config(disable=None)
132

133
        generator = torch.manual_seed(0)
134
        output = pipe(generator=generator, num_inference_steps=100, audio_length_in_s=4.096)
135
        audio = output.audios
136

137
        audio_slice = audio[0, -3:, -3:]
138

139
        assert audio.shape == (1, 2, pipe.unet.sample_size)
140
        expected_slice = np.array([-0.0192, -0.0231, -0.0318, -0.0059, 0.0002, -0.0020])
141

142
        assert np.abs(audio_slice.flatten() - expected_slice).max() < 1e-2
143

144
    def test_dance_diffusion_fp16(self):
145
        device = torch_device
146

147
        pipe = DanceDiffusionPipeline.from_pretrained("harmonai/maestro-150k", torch_dtype=torch.float16)
148
        pipe = pipe.to(device)
149
        pipe.set_progress_bar_config(disable=None)
150

151
        generator = torch.manual_seed(0)
152
        output = pipe(generator=generator, num_inference_steps=100, audio_length_in_s=4.096)
153
        audio = output.audios
154

155
        audio_slice = audio[0, -3:, -3:]
156

157
        assert audio.shape == (1, 2, pipe.unet.sample_size)
158
        expected_slice = np.array([-0.0367, -0.0488, -0.0771, -0.0525, -0.0444, -0.0341])
159

160
        assert np.abs(audio_slice.flatten() - expected_slice).max() < 1e-2
161

162
Product

Resources

Company