Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
shivamshrirao
GitHub Repository: shivamshrirao/diffusers
Path: blob/main/tests/pipelines/dance_diffusion/test_dance_diffusion.py
1450 views
1
# coding=utf-8
2
# Copyright 2023 HuggingFace Inc.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
# http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
16
import gc
17
import unittest
18
19
import numpy as np
20
import torch
21
22
from diffusers import DanceDiffusionPipeline, IPNDMScheduler, UNet1DModel
23
from diffusers.utils import slow, torch_device
24
from diffusers.utils.testing_utils import require_torch_gpu, skip_mps
25
26
from ...pipeline_params import UNCONDITIONAL_AUDIO_GENERATION_BATCH_PARAMS, UNCONDITIONAL_AUDIO_GENERATION_PARAMS
27
from ...test_pipelines_common import PipelineTesterMixin
28
29
30
torch.backends.cuda.matmul.allow_tf32 = False
31
32
33
class DanceDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
34
pipeline_class = DanceDiffusionPipeline
35
params = UNCONDITIONAL_AUDIO_GENERATION_PARAMS
36
required_optional_params = PipelineTesterMixin.required_optional_params - {
37
"callback",
38
"latents",
39
"callback_steps",
40
"output_type",
41
"num_images_per_prompt",
42
}
43
batch_params = UNCONDITIONAL_AUDIO_GENERATION_BATCH_PARAMS
44
test_attention_slicing = False
45
test_cpu_offload = False
46
47
def get_dummy_components(self):
48
torch.manual_seed(0)
49
unet = UNet1DModel(
50
block_out_channels=(32, 32, 64),
51
extra_in_channels=16,
52
sample_size=512,
53
sample_rate=16_000,
54
in_channels=2,
55
out_channels=2,
56
flip_sin_to_cos=True,
57
use_timestep_embedding=False,
58
time_embedding_type="fourier",
59
mid_block_type="UNetMidBlock1D",
60
down_block_types=("DownBlock1DNoSkip", "DownBlock1D", "AttnDownBlock1D"),
61
up_block_types=("AttnUpBlock1D", "UpBlock1D", "UpBlock1DNoSkip"),
62
)
63
scheduler = IPNDMScheduler()
64
65
components = {
66
"unet": unet,
67
"scheduler": scheduler,
68
}
69
return components
70
71
def get_dummy_inputs(self, device, seed=0):
72
if str(device).startswith("mps"):
73
generator = torch.manual_seed(seed)
74
else:
75
generator = torch.Generator(device=device).manual_seed(seed)
76
inputs = {
77
"batch_size": 1,
78
"generator": generator,
79
"num_inference_steps": 4,
80
}
81
return inputs
82
83
def test_dance_diffusion(self):
84
device = "cpu" # ensure determinism for the device-dependent torch.Generator
85
components = self.get_dummy_components()
86
pipe = DanceDiffusionPipeline(**components)
87
pipe = pipe.to(device)
88
pipe.set_progress_bar_config(disable=None)
89
90
inputs = self.get_dummy_inputs(device)
91
output = pipe(**inputs)
92
audio = output.audios
93
94
audio_slice = audio[0, -3:, -3:]
95
96
assert audio.shape == (1, 2, components["unet"].sample_size)
97
expected_slice = np.array([-0.7265, 1.0000, -0.8388, 0.1175, 0.9498, -1.0000])
98
assert np.abs(audio_slice.flatten() - expected_slice).max() < 1e-2
99
100
@skip_mps
101
def test_save_load_local(self):
102
return super().test_save_load_local()
103
104
@skip_mps
105
def test_dict_tuple_outputs_equivalent(self):
106
return super().test_dict_tuple_outputs_equivalent()
107
108
@skip_mps
109
def test_save_load_optional_components(self):
110
return super().test_save_load_optional_components()
111
112
@skip_mps
113
def test_attention_slicing_forward_pass(self):
114
return super().test_attention_slicing_forward_pass()
115
116
117
@slow
118
@require_torch_gpu
119
class PipelineIntegrationTests(unittest.TestCase):
120
def tearDown(self):
121
# clean up the VRAM after each test
122
super().tearDown()
123
gc.collect()
124
torch.cuda.empty_cache()
125
126
def test_dance_diffusion(self):
127
device = torch_device
128
129
pipe = DanceDiffusionPipeline.from_pretrained("harmonai/maestro-150k")
130
pipe = pipe.to(device)
131
pipe.set_progress_bar_config(disable=None)
132
133
generator = torch.manual_seed(0)
134
output = pipe(generator=generator, num_inference_steps=100, audio_length_in_s=4.096)
135
audio = output.audios
136
137
audio_slice = audio[0, -3:, -3:]
138
139
assert audio.shape == (1, 2, pipe.unet.sample_size)
140
expected_slice = np.array([-0.0192, -0.0231, -0.0318, -0.0059, 0.0002, -0.0020])
141
142
assert np.abs(audio_slice.flatten() - expected_slice).max() < 1e-2
143
144
def test_dance_diffusion_fp16(self):
145
device = torch_device
146
147
pipe = DanceDiffusionPipeline.from_pretrained("harmonai/maestro-150k", torch_dtype=torch.float16)
148
pipe = pipe.to(device)
149
pipe.set_progress_bar_config(disable=None)
150
151
generator = torch.manual_seed(0)
152
output = pipe(generator=generator, num_inference_steps=100, audio_length_in_s=4.096)
153
audio = output.audios
154
155
audio_slice = audio[0, -3:, -3:]
156
157
assert audio.shape == (1, 2, pipe.unet.sample_size)
158
expected_slice = np.array([-0.0367, -0.0488, -0.0771, -0.0525, -0.0444, -0.0341])
159
160
assert np.abs(audio_slice.flatten() - expected_slice).max() < 1e-2
161
162