Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
TensorSpeech
GitHub Repository: TensorSpeech/TensorFlowTTS
Path: blob/master/tensorflow_tts/utils/group_conv.py
1558 views
1
# -*- coding: utf-8 -*-
2
# This code is copy from https://github.com/tensorflow/tensorflow/pull/36773.
3
"""Group Convolution Modules."""
4
5
from tensorflow.python.framework import tensor_shape
6
from tensorflow.python.keras import activations, constraints, initializers, regularizers
7
from tensorflow.python.keras.engine.base_layer import Layer
8
from tensorflow.python.keras.engine.input_spec import InputSpec
9
from tensorflow.python.keras.layers import Conv1D, SeparableConv1D
10
from tensorflow.python.keras.utils import conv_utils
11
from tensorflow.python.ops import array_ops, nn, nn_ops
12
13
14
class Convolution(object):
15
"""Helper class for convolution.
16
Note that this class assumes that shapes of input and filter passed to
17
__call__ are compatible with input_shape and filter_shape passed to the
18
constructor.
19
Arguments
20
input_shape: static shape of input. i.e. input.get_shape().
21
filter_shape: static shape of the filter. i.e. filter.get_shape().
22
padding: see convolution.
23
strides: see convolution.
24
dilation_rate: see convolution.
25
name: see convolution.
26
data_format: see convolution.
27
"""
28
29
def __init__(
30
self,
31
input_shape,
32
filter_shape,
33
padding,
34
strides=None,
35
dilation_rate=None,
36
name=None,
37
data_format=None,
38
):
39
"""Helper function for convolution."""
40
num_total_dims = filter_shape.ndims
41
if num_total_dims is None:
42
num_total_dims = input_shape.ndims
43
if num_total_dims is None:
44
raise ValueError("rank of input or filter must be known")
45
46
num_spatial_dims = num_total_dims - 2
47
48
try:
49
input_shape.with_rank(num_spatial_dims + 2)
50
except ValueError:
51
raise ValueError("input tensor must have rank %d" % (num_spatial_dims + 2))
52
53
try:
54
filter_shape.with_rank(num_spatial_dims + 2)
55
except ValueError:
56
raise ValueError("filter tensor must have rank %d" % (num_spatial_dims + 2))
57
58
if data_format is None or not data_format.startswith("NC"):
59
input_channels_dim = tensor_shape.dimension_at_index(
60
input_shape, num_spatial_dims + 1
61
)
62
spatial_dims = range(1, num_spatial_dims + 1)
63
else:
64
input_channels_dim = tensor_shape.dimension_at_index(input_shape, 1)
65
spatial_dims = range(2, num_spatial_dims + 2)
66
67
filter_dim = tensor_shape.dimension_at_index(filter_shape, num_spatial_dims)
68
if not (input_channels_dim % filter_dim).is_compatible_with(0):
69
raise ValueError(
70
"number of input channels is not divisible by corresponding "
71
"dimension of filter, {} % {} != 0".format(
72
input_channels_dim, filter_dim
73
)
74
)
75
76
strides, dilation_rate = nn_ops._get_strides_and_dilation_rate(
77
num_spatial_dims, strides, dilation_rate
78
)
79
80
self.input_shape = input_shape
81
self.filter_shape = filter_shape
82
self.data_format = data_format
83
self.strides = strides
84
self.padding = padding
85
self.name = name
86
self.dilation_rate = dilation_rate
87
self.conv_op = nn_ops._WithSpaceToBatch(
88
input_shape,
89
dilation_rate=dilation_rate,
90
padding=padding,
91
build_op=self._build_op,
92
filter_shape=filter_shape,
93
spatial_dims=spatial_dims,
94
data_format=data_format,
95
)
96
97
def _build_op(self, _, padding):
98
return nn_ops._NonAtrousConvolution(
99
self.input_shape,
100
filter_shape=self.filter_shape,
101
padding=padding,
102
data_format=self.data_format,
103
strides=self.strides,
104
name=self.name,
105
)
106
107
def __call__(self, inp, filter):
108
return self.conv_op(inp, filter)
109
110
111
class Conv(Layer):
112
"""Abstract N-D convolution layer (private, used as implementation base).
113
This layer creates a convolution kernel that is convolved
114
(actually cross-correlated) with the layer input to produce a tensor of
115
outputs. If `use_bias` is True (and a `bias_initializer` is provided),
116
a bias vector is created and added to the outputs. Finally, if
117
`activation` is not `None`, it is applied to the outputs as well.
118
Note: layer attributes cannot be modified after the layer has been called
119
once (except the `trainable` attribute).
120
Arguments:
121
rank: An integer, the rank of the convolution, e.g. "2" for 2D convolution.
122
filters: Integer, the dimensionality of the output space (i.e. the number
123
of filters in the convolution).
124
kernel_size: An integer or tuple/list of n integers, specifying the
125
length of the convolution window.
126
strides: An integer or tuple/list of n integers,
127
specifying the stride length of the convolution.
128
Specifying any stride value != 1 is incompatible with specifying
129
any `dilation_rate` value != 1.
130
padding: One of `"valid"`, `"same"`, or `"causal"` (case-insensitive).
131
data_format: A string, one of `channels_last` (default) or `channels_first`.
132
The ordering of the dimensions in the inputs.
133
`channels_last` corresponds to inputs with shape
134
`(batch_size, ..., channels)` while `channels_first` corresponds to
135
inputs with shape `(batch_size, channels, ...)`.
136
dilation_rate: An integer or tuple/list of n integers, specifying
137
the dilation rate to use for dilated convolution.
138
Currently, specifying any `dilation_rate` value != 1 is
139
incompatible with specifying any `strides` value != 1.
140
groups: Integer, the number of channel groups controlling the connections
141
between inputs and outputs. Input channels and `filters` must both be
142
divisible by `groups`. For example,
143
- At `groups=1`, all inputs are convolved to all outputs.
144
- At `groups=2`, the operation becomes equivalent to having two
145
convolutional layers side by side, each seeing half the input
146
channels, and producing half the output channels, and both
147
subsequently concatenated.
148
- At `groups=input_channels`, each input channel is convolved with its
149
own set of filters, of size `input_channels / filters`
150
activation: Activation function to use.
151
If you don't specify anything, no activation is applied.
152
use_bias: Boolean, whether the layer uses a bias.
153
kernel_initializer: An initializer for the convolution kernel.
154
bias_initializer: An initializer for the bias vector. If None, the default
155
initializer will be used.
156
kernel_regularizer: Optional regularizer for the convolution kernel.
157
bias_regularizer: Optional regularizer for the bias vector.
158
activity_regularizer: Optional regularizer function for the output.
159
kernel_constraint: Optional projection function to be applied to the
160
kernel after being updated by an `Optimizer` (e.g. used to implement
161
norm constraints or value constraints for layer weights). The function
162
must take as input the unprojected variable and must return the
163
projected variable (which must have the same shape). Constraints are
164
not safe to use when doing asynchronous distributed training.
165
bias_constraint: Optional projection function to be applied to the
166
bias after being updated by an `Optimizer`.
167
trainable: Boolean, if `True` the weights of this layer will be marked as
168
trainable (and listed in `layer.trainable_weights`).
169
name: A string, the name of the layer.
170
"""
171
172
def __init__(
173
self,
174
rank,
175
filters,
176
kernel_size,
177
strides=1,
178
padding="valid",
179
data_format=None,
180
dilation_rate=1,
181
groups=1,
182
activation=None,
183
use_bias=True,
184
kernel_initializer="glorot_uniform",
185
bias_initializer="zeros",
186
kernel_regularizer=None,
187
bias_regularizer=None,
188
activity_regularizer=None,
189
kernel_constraint=None,
190
bias_constraint=None,
191
trainable=True,
192
name=None,
193
**kwargs
194
):
195
super(Conv, self).__init__(
196
trainable=trainable,
197
name=name,
198
activity_regularizer=regularizers.get(activity_regularizer),
199
**kwargs
200
)
201
self.rank = rank
202
if filters is not None and not isinstance(filters, int):
203
filters = int(filters)
204
self.filters = filters
205
self.groups = groups or 1
206
if filters is not None and filters % self.groups != 0:
207
raise ValueError(
208
"The number of filters must be evenly divisible by the number of "
209
"groups. Received: groups={}, filters={}".format(groups, filters)
210
)
211
self.kernel_size = conv_utils.normalize_tuple(kernel_size, rank, "kernel_size")
212
if not all(self.kernel_size):
213
raise ValueError(
214
"The argument `kernel_size` cannot contain 0(s). "
215
"Received: %s" % (kernel_size,)
216
)
217
self.strides = conv_utils.normalize_tuple(strides, rank, "strides")
218
self.padding = conv_utils.normalize_padding(padding)
219
if self.padding == "causal" and not isinstance(self, (Conv1D, SeparableConv1D)):
220
raise ValueError(
221
"Causal padding is only supported for `Conv1D`"
222
"and ``SeparableConv1D`."
223
)
224
self.data_format = conv_utils.normalize_data_format(data_format)
225
self.dilation_rate = conv_utils.normalize_tuple(
226
dilation_rate, rank, "dilation_rate"
227
)
228
self.activation = activations.get(activation)
229
self.use_bias = use_bias
230
self.kernel_initializer = initializers.get(kernel_initializer)
231
self.bias_initializer = initializers.get(bias_initializer)
232
self.kernel_regularizer = regularizers.get(kernel_regularizer)
233
self.bias_regularizer = regularizers.get(bias_regularizer)
234
self.kernel_constraint = constraints.get(kernel_constraint)
235
self.bias_constraint = constraints.get(bias_constraint)
236
self.input_spec = InputSpec(ndim=self.rank + 2)
237
238
def build(self, input_shape):
239
input_shape = tensor_shape.TensorShape(input_shape)
240
input_channel = self._get_input_channel(input_shape)
241
if input_channel % self.groups != 0:
242
raise ValueError(
243
"The number of input channels must be evenly divisible by the number "
244
"of groups. Received groups={}, but the input has {} channels "
245
"(full input shape is {}).".format(
246
self.groups, input_channel, input_shape
247
)
248
)
249
kernel_shape = self.kernel_size + (input_channel // self.groups, self.filters)
250
251
self.kernel = self.add_weight(
252
name="kernel",
253
shape=kernel_shape,
254
initializer=self.kernel_initializer,
255
regularizer=self.kernel_regularizer,
256
constraint=self.kernel_constraint,
257
trainable=True,
258
dtype=self.dtype,
259
)
260
if self.use_bias:
261
self.bias = self.add_weight(
262
name="bias",
263
shape=(self.filters,),
264
initializer=self.bias_initializer,
265
regularizer=self.bias_regularizer,
266
constraint=self.bias_constraint,
267
trainable=True,
268
dtype=self.dtype,
269
)
270
else:
271
self.bias = None
272
channel_axis = self._get_channel_axis()
273
self.input_spec = InputSpec(
274
ndim=self.rank + 2, axes={channel_axis: input_channel}
275
)
276
277
self._build_conv_op_input_shape = input_shape
278
self._build_input_channel = input_channel
279
self._padding_op = self._get_padding_op()
280
self._conv_op_data_format = conv_utils.convert_data_format(
281
self.data_format, self.rank + 2
282
)
283
self._convolution_op = Convolution(
284
input_shape,
285
filter_shape=self.kernel.shape,
286
dilation_rate=self.dilation_rate,
287
strides=self.strides,
288
padding=self._padding_op,
289
data_format=self._conv_op_data_format,
290
)
291
self.built = True
292
293
def call(self, inputs):
294
if self._recreate_conv_op(inputs):
295
self._convolution_op = Convolution(
296
inputs.get_shape(),
297
filter_shape=self.kernel.shape,
298
dilation_rate=self.dilation_rate,
299
strides=self.strides,
300
padding=self._padding_op,
301
data_format=self._conv_op_data_format,
302
)
303
self._build_conv_op_input_shape = inputs.get_shape()
304
305
# Apply causal padding to inputs for Conv1D.
306
if self.padding == "causal" and self.__class__.__name__ == "Conv1D":
307
inputs = array_ops.pad(inputs, self._compute_causal_padding())
308
309
outputs = self._convolution_op(inputs, self.kernel)
310
311
if self.use_bias:
312
if self.data_format == "channels_first":
313
if self.rank == 1:
314
# nn.bias_add does not accept a 1D input tensor.
315
bias = array_ops.reshape(self.bias, (1, self.filters, 1))
316
outputs += bias
317
else:
318
outputs = nn.bias_add(outputs, self.bias, data_format="NCHW")
319
else:
320
outputs = nn.bias_add(outputs, self.bias, data_format="NHWC")
321
322
if self.activation is not None:
323
return self.activation(outputs)
324
return outputs
325
326
def compute_output_shape(self, input_shape):
327
input_shape = tensor_shape.TensorShape(input_shape).as_list()
328
if self.data_format == "channels_last":
329
space = input_shape[1:-1]
330
new_space = []
331
for i in range(len(space)):
332
new_dim = conv_utils.conv_output_length(
333
space[i],
334
self.kernel_size[i],
335
padding=self.padding,
336
stride=self.strides[i],
337
dilation=self.dilation_rate[i],
338
)
339
new_space.append(new_dim)
340
return tensor_shape.TensorShape(
341
[input_shape[0]] + new_space + [self.filters]
342
)
343
else:
344
space = input_shape[2:]
345
new_space = []
346
for i in range(len(space)):
347
new_dim = conv_utils.conv_output_length(
348
space[i],
349
self.kernel_size[i],
350
padding=self.padding,
351
stride=self.strides[i],
352
dilation=self.dilation_rate[i],
353
)
354
new_space.append(new_dim)
355
return tensor_shape.TensorShape([input_shape[0], self.filters] + new_space)
356
357
def get_config(self):
358
config = {
359
"filters": self.filters,
360
"kernel_size": self.kernel_size,
361
"strides": self.strides,
362
"padding": self.padding,
363
"data_format": self.data_format,
364
"dilation_rate": self.dilation_rate,
365
"groups": self.groups,
366
"activation": activations.serialize(self.activation),
367
"use_bias": self.use_bias,
368
"kernel_initializer": initializers.serialize(self.kernel_initializer),
369
"bias_initializer": initializers.serialize(self.bias_initializer),
370
"kernel_regularizer": regularizers.serialize(self.kernel_regularizer),
371
"bias_regularizer": regularizers.serialize(self.bias_regularizer),
372
"activity_regularizer": regularizers.serialize(self.activity_regularizer),
373
"kernel_constraint": constraints.serialize(self.kernel_constraint),
374
"bias_constraint": constraints.serialize(self.bias_constraint),
375
}
376
base_config = super(Conv, self).get_config()
377
return dict(list(base_config.items()) + list(config.items()))
378
379
def _compute_causal_padding(self):
380
"""Calculates padding for 'causal' option for 1-d conv layers."""
381
left_pad = self.dilation_rate[0] * (self.kernel_size[0] - 1)
382
if self.data_format == "channels_last":
383
causal_padding = [[0, 0], [left_pad, 0], [0, 0]]
384
else:
385
causal_padding = [[0, 0], [0, 0], [left_pad, 0]]
386
return causal_padding
387
388
def _get_channel_axis(self):
389
if self.data_format == "channels_first":
390
return 1
391
else:
392
return -1
393
394
def _get_input_channel(self, input_shape):
395
channel_axis = self._get_channel_axis()
396
if input_shape.dims[channel_axis].value is None:
397
raise ValueError(
398
"The channel dimension of the inputs "
399
"should be defined. Found `None`."
400
)
401
return int(input_shape[channel_axis])
402
403
def _get_padding_op(self):
404
if self.padding == "causal":
405
op_padding = "valid"
406
else:
407
op_padding = self.padding
408
if not isinstance(op_padding, (list, tuple)):
409
op_padding = op_padding.upper()
410
return op_padding
411
412
def _recreate_conv_op(self, inputs):
413
"""Recreate conv_op if necessary.
414
Check if the input_shape in call() is different from that in build().
415
For the values that are not None, if they are different, recreate
416
the _convolution_op to avoid the stateful behavior.
417
Args:
418
inputs: The input data to call() method.
419
Returns:
420
`True` or `False` to indicate whether to recreate the conv_op.
421
"""
422
call_input_shape = inputs.get_shape()
423
for axis in range(1, len(call_input_shape)):
424
if (
425
call_input_shape[axis] is not None
426
and self._build_conv_op_input_shape[axis] is not None
427
and call_input_shape[axis] != self._build_conv_op_input_shape[axis]
428
):
429
return True
430
return False
431
432
433
class GroupConv1D(Conv):
434
"""1D convolution layer (e.g. temporal convolution).
435
This layer creates a convolution kernel that is convolved
436
with the layer input over a single spatial (or temporal) dimension
437
to produce a tensor of outputs.
438
If `use_bias` is True, a bias vector is created and added to the outputs.
439
Finally, if `activation` is not `None`,
440
it is applied to the outputs as well.
441
When using this layer as the first layer in a model,
442
provide an `input_shape` argument
443
(tuple of integers or `None`, e.g.
444
`(10, 128)` for sequences of 10 vectors of 128-dimensional vectors,
445
or `(None, 128)` for variable-length sequences of 128-dimensional vectors.
446
Examples:
447
>>> # The inputs are 128-length vectors with 10 timesteps, and the batch size
448
>>> # is 4.
449
>>> input_shape = (4, 10, 128)
450
>>> x = tf.random.normal(input_shape)
451
>>> y = tf.keras.layers.Conv1D(
452
... 32, 3, activation='relu',input_shape=input_shape)(x)
453
>>> print(y.shape)
454
(4, 8, 32)
455
Arguments:
456
filters: Integer, the dimensionality of the output space
457
(i.e. the number of output filters in the convolution).
458
kernel_size: An integer or tuple/list of a single integer,
459
specifying the length of the 1D convolution window.
460
strides: An integer or tuple/list of a single integer,
461
specifying the stride length of the convolution.
462
Specifying any stride value != 1 is incompatible with specifying
463
any `dilation_rate` value != 1.
464
padding: One of `"valid"`, `"causal"` or `"same"` (case-insensitive).
465
`"causal"` results in causal (dilated) convolutions, e.g. `output[t]`
466
does not depend on `input[t+1:]`. Useful when modeling temporal data
467
where the model should not violate the temporal order.
468
See [WaveNet: A Generative Model for Raw Audio, section
469
2.1](https://arxiv.org/abs/1609.03499).
470
data_format: A string,
471
one of `channels_last` (default) or `channels_first`.
472
groups: Integer, the number of channel groups controlling the connections
473
between inputs and outputs. Input channels and `filters` must both be
474
divisible by `groups`. For example,
475
- At `groups=1`, all inputs are convolved to all outputs.
476
- At `groups=2`, the operation becomes equivalent to having two
477
convolutional layers side by side, each seeing half the input
478
channels, and producing half the output channels, and both
479
subsequently concatenated.
480
- At `groups=input_channels`, each input channel is convolved with its
481
own set of filters, of size `input_channels / filters`
482
dilation_rate: an integer or tuple/list of a single integer, specifying
483
the dilation rate to use for dilated convolution.
484
Currently, specifying any `dilation_rate` value != 1 is
485
incompatible with specifying any `strides` value != 1.
486
activation: Activation function to use.
487
If you don't specify anything, no activation is applied (
488
see `keras.activations`).
489
use_bias: Boolean, whether the layer uses a bias vector.
490
kernel_initializer: Initializer for the `kernel` weights matrix (
491
see `keras.initializers`).
492
bias_initializer: Initializer for the bias vector (
493
see `keras.initializers`).
494
kernel_regularizer: Regularizer function applied to
495
the `kernel` weights matrix (see `keras.regularizers`).
496
bias_regularizer: Regularizer function applied to the bias vector (
497
see `keras.regularizers`).
498
activity_regularizer: Regularizer function applied to
499
the output of the layer (its "activation") (
500
see `keras.regularizers`).
501
kernel_constraint: Constraint function applied to the kernel matrix (
502
see `keras.constraints`).
503
bias_constraint: Constraint function applied to the bias vector (
504
see `keras.constraints`).
505
Input shape:
506
3D tensor with shape: `(batch_size, steps, input_dim)`
507
Output shape:
508
3D tensor with shape: `(batch_size, new_steps, filters)`
509
`steps` value might have changed due to padding or strides.
510
Returns:
511
A tensor of rank 3 representing
512
`activation(conv1d(inputs, kernel) + bias)`.
513
Raises:
514
ValueError: when both `strides` > 1 and `dilation_rate` > 1.
515
"""
516
517
def __init__(
518
self,
519
filters,
520
kernel_size,
521
strides=1,
522
padding="valid",
523
data_format="channels_last",
524
dilation_rate=1,
525
groups=1,
526
activation=None,
527
use_bias=True,
528
kernel_initializer="glorot_uniform",
529
bias_initializer="zeros",
530
kernel_regularizer=None,
531
bias_regularizer=None,
532
activity_regularizer=None,
533
kernel_constraint=None,
534
bias_constraint=None,
535
**kwargs
536
):
537
super().__init__(
538
rank=1,
539
filters=filters,
540
kernel_size=kernel_size,
541
strides=strides,
542
padding=padding,
543
data_format=data_format,
544
dilation_rate=dilation_rate,
545
groups=groups,
546
activation=activations.get(activation),
547
use_bias=use_bias,
548
kernel_initializer=initializers.get(kernel_initializer),
549
bias_initializer=initializers.get(bias_initializer),
550
kernel_regularizer=regularizers.get(kernel_regularizer),
551
bias_regularizer=regularizers.get(bias_regularizer),
552
activity_regularizer=regularizers.get(activity_regularizer),
553
kernel_constraint=constraints.get(kernel_constraint),
554
bias_constraint=constraints.get(bias_constraint),
555
**kwargs
556
)
557
558