CoCalc -- group

GitHub Repository: TensorSpeech/TensorFlowTTS
Path: blob/master/tensorflow_tts/utils/group_conv.py
¹⁵⁵⁸ views
1
# -*- coding: utf-8 -*-
2
# This code is copy from https://github.com/tensorflow/tensorflow/pull/36773.
3
"""Group Convolution Modules."""
4

5
from tensorflow.python.framework import tensor_shape
6
from tensorflow.python.keras import activations, constraints, initializers, regularizers
7
from tensorflow.python.keras.engine.base_layer import Layer
8
from tensorflow.python.keras.engine.input_spec import InputSpec
9
from tensorflow.python.keras.layers import Conv1D, SeparableConv1D
10
from tensorflow.python.keras.utils import conv_utils
11
from tensorflow.python.ops import array_ops, nn, nn_ops
12

13

14
class Convolution(object):
15
    """Helper class for convolution.
16
    Note that this class assumes that shapes of input and filter passed to
17
    __call__ are compatible with input_shape and filter_shape passed to the
18
    constructor.
19
    Arguments
20
      input_shape: static shape of input. i.e. input.get_shape().
21
      filter_shape: static shape of the filter. i.e. filter.get_shape().
22
      padding:  see convolution.
23
      strides: see convolution.
24
      dilation_rate: see convolution.
25
      name: see convolution.
26
      data_format: see convolution.
27
    """
28

29
    def __init__(
30
        self,
31
        input_shape,
32
        filter_shape,
33
        padding,
34
        strides=None,
35
        dilation_rate=None,
36
        name=None,
37
        data_format=None,
38
    ):
39
        """Helper function for convolution."""
40
        num_total_dims = filter_shape.ndims
41
        if num_total_dims is None:
42
            num_total_dims = input_shape.ndims
43
        if num_total_dims is None:
44
            raise ValueError("rank of input or filter must be known")
45

46
        num_spatial_dims = num_total_dims - 2
47

48
        try:
49
            input_shape.with_rank(num_spatial_dims + 2)
50
        except ValueError:
51
            raise ValueError("input tensor must have rank %d" % (num_spatial_dims + 2))
52

53
        try:
54
            filter_shape.with_rank(num_spatial_dims + 2)
55
        except ValueError:
56
            raise ValueError("filter tensor must have rank %d" % (num_spatial_dims + 2))
57

58
        if data_format is None or not data_format.startswith("NC"):
59
            input_channels_dim = tensor_shape.dimension_at_index(
60
                input_shape, num_spatial_dims + 1
61
            )
62
            spatial_dims = range(1, num_spatial_dims + 1)
63
        else:
64
            input_channels_dim = tensor_shape.dimension_at_index(input_shape, 1)
65
            spatial_dims = range(2, num_spatial_dims + 2)
66

67
        filter_dim = tensor_shape.dimension_at_index(filter_shape, num_spatial_dims)
68
        if not (input_channels_dim % filter_dim).is_compatible_with(0):
69
            raise ValueError(
70
                "number of input channels is not divisible by corresponding "
71
                "dimension of filter, {} % {} != 0".format(
72
                    input_channels_dim, filter_dim
73
                )
74
            )
75

76
        strides, dilation_rate = nn_ops._get_strides_and_dilation_rate(
77
            num_spatial_dims, strides, dilation_rate
78
        )
79

80
        self.input_shape = input_shape
81
        self.filter_shape = filter_shape
82
        self.data_format = data_format
83
        self.strides = strides
84
        self.padding = padding
85
        self.name = name
86
        self.dilation_rate = dilation_rate
87
        self.conv_op = nn_ops._WithSpaceToBatch(
88
            input_shape,
89
            dilation_rate=dilation_rate,
90
            padding=padding,
91
            build_op=self._build_op,
92
            filter_shape=filter_shape,
93
            spatial_dims=spatial_dims,
94
            data_format=data_format,
95
        )
96

97
    def _build_op(self, _, padding):
98
        return nn_ops._NonAtrousConvolution(
99
            self.input_shape,
100
            filter_shape=self.filter_shape,
101
            padding=padding,
102
            data_format=self.data_format,
103
            strides=self.strides,
104
            name=self.name,
105
        )
106

107
    def __call__(self, inp, filter):
108
        return self.conv_op(inp, filter)
109

110

111
class Conv(Layer):
112
    """Abstract N-D convolution layer (private, used as implementation base).
113
    This layer creates a convolution kernel that is convolved
114
    (actually cross-correlated) with the layer input to produce a tensor of
115
    outputs. If `use_bias` is True (and a `bias_initializer` is provided),
116
    a bias vector is created and added to the outputs. Finally, if
117
    `activation` is not `None`, it is applied to the outputs as well.
118
    Note: layer attributes cannot be modified after the layer has been called
119
    once (except the `trainable` attribute).
120
    Arguments:
121
      rank: An integer, the rank of the convolution, e.g. "2" for 2D convolution.
122
      filters: Integer, the dimensionality of the output space (i.e. the number
123
        of filters in the convolution).
124
      kernel_size: An integer or tuple/list of n integers, specifying the
125
        length of the convolution window.
126
      strides: An integer or tuple/list of n integers,
127
        specifying the stride length of the convolution.
128
        Specifying any stride value != 1 is incompatible with specifying
129
        any `dilation_rate` value != 1.
130
      padding: One of `"valid"`,  `"same"`, or `"causal"` (case-insensitive).
131
      data_format: A string, one of `channels_last` (default) or `channels_first`.
132
        The ordering of the dimensions in the inputs.
133
        `channels_last` corresponds to inputs with shape
134
        `(batch_size, ..., channels)` while `channels_first` corresponds to
135
        inputs with shape `(batch_size, channels, ...)`.
136
      dilation_rate: An integer or tuple/list of n integers, specifying
137
        the dilation rate to use for dilated convolution.
138
        Currently, specifying any `dilation_rate` value != 1 is
139
        incompatible with specifying any `strides` value != 1.
140
      groups: Integer, the number of channel groups controlling the connections
141
        between inputs and outputs. Input channels and `filters` must both be
142
        divisible by `groups`. For example,
143
          - At `groups=1`, all inputs are convolved to all outputs.
144
          - At `groups=2`, the operation becomes equivalent to having two
145
            convolutional layers side by side, each seeing half the input
146
            channels, and producing half the output channels, and both
147
            subsequently concatenated.
148
          - At `groups=input_channels`, each input channel is convolved with its
149
            own set of filters, of size `input_channels / filters`
150
      activation: Activation function to use.
151
        If you don't specify anything, no activation is applied.
152
      use_bias: Boolean, whether the layer uses a bias.
153
      kernel_initializer: An initializer for the convolution kernel.
154
      bias_initializer: An initializer for the bias vector. If None, the default
155
        initializer will be used.
156
      kernel_regularizer: Optional regularizer for the convolution kernel.
157
      bias_regularizer: Optional regularizer for the bias vector.
158
      activity_regularizer: Optional regularizer function for the output.
159
      kernel_constraint: Optional projection function to be applied to the
160
          kernel after being updated by an `Optimizer` (e.g. used to implement
161
          norm constraints or value constraints for layer weights). The function
162
          must take as input the unprojected variable and must return the
163
          projected variable (which must have the same shape). Constraints are
164
          not safe to use when doing asynchronous distributed training.
165
      bias_constraint: Optional projection function to be applied to the
166
          bias after being updated by an `Optimizer`.
167
      trainable: Boolean, if `True` the weights of this layer will be marked as
168
        trainable (and listed in `layer.trainable_weights`).
169
      name: A string, the name of the layer.
170
    """
171

172
    def __init__(
173
        self,
174
        rank,
175
        filters,
176
        kernel_size,
177
        strides=1,
178
        padding="valid",
179
        data_format=None,
180
        dilation_rate=1,
181
        groups=1,
182
        activation=None,
183
        use_bias=True,
184
        kernel_initializer="glorot_uniform",
185
        bias_initializer="zeros",
186
        kernel_regularizer=None,
187
        bias_regularizer=None,
188
        activity_regularizer=None,
189
        kernel_constraint=None,
190
        bias_constraint=None,
191
        trainable=True,
192
        name=None,
193
        **kwargs
194
    ):
195
        super(Conv, self).__init__(
196
            trainable=trainable,
197
            name=name,
198
            activity_regularizer=regularizers.get(activity_regularizer),
199
            **kwargs
200
        )
201
        self.rank = rank
202
        if filters is not None and not isinstance(filters, int):
203
            filters = int(filters)
204
        self.filters = filters
205
        self.groups = groups or 1
206
        if filters is not None and filters % self.groups != 0:
207
            raise ValueError(
208
                "The number of filters must be evenly divisible by the number of "
209
                "groups. Received: groups={}, filters={}".format(groups, filters)
210
            )
211
        self.kernel_size = conv_utils.normalize_tuple(kernel_size, rank, "kernel_size")
212
        if not all(self.kernel_size):
213
            raise ValueError(
214
                "The argument `kernel_size` cannot contain 0(s). "
215
                "Received: %s" % (kernel_size,)
216
            )
217
        self.strides = conv_utils.normalize_tuple(strides, rank, "strides")
218
        self.padding = conv_utils.normalize_padding(padding)
219
        if self.padding == "causal" and not isinstance(self, (Conv1D, SeparableConv1D)):
220
            raise ValueError(
221
                "Causal padding is only supported for `Conv1D`"
222
                "and ``SeparableConv1D`."
223
            )
224
        self.data_format = conv_utils.normalize_data_format(data_format)
225
        self.dilation_rate = conv_utils.normalize_tuple(
226
            dilation_rate, rank, "dilation_rate"
227
        )
228
        self.activation = activations.get(activation)
229
        self.use_bias = use_bias
230
        self.kernel_initializer = initializers.get(kernel_initializer)
231
        self.bias_initializer = initializers.get(bias_initializer)
232
        self.kernel_regularizer = regularizers.get(kernel_regularizer)
233
        self.bias_regularizer = regularizers.get(bias_regularizer)
234
        self.kernel_constraint = constraints.get(kernel_constraint)
235
        self.bias_constraint = constraints.get(bias_constraint)
236
        self.input_spec = InputSpec(ndim=self.rank + 2)
237

238
    def build(self, input_shape):
239
        input_shape = tensor_shape.TensorShape(input_shape)
240
        input_channel = self._get_input_channel(input_shape)
241
        if input_channel % self.groups != 0:
242
            raise ValueError(
243
                "The number of input channels must be evenly divisible by the number "
244
                "of groups. Received groups={}, but the input has {} channels "
245
                "(full input shape is {}).".format(
246
                    self.groups, input_channel, input_shape
247
                )
248
            )
249
        kernel_shape = self.kernel_size + (input_channel // self.groups, self.filters)
250

251
        self.kernel = self.add_weight(
252
            name="kernel",
253
            shape=kernel_shape,
254
            initializer=self.kernel_initializer,
255
            regularizer=self.kernel_regularizer,
256
            constraint=self.kernel_constraint,
257
            trainable=True,
258
            dtype=self.dtype,
259
        )
260
        if self.use_bias:
261
            self.bias = self.add_weight(
262
                name="bias",
263
                shape=(self.filters,),
264
                initializer=self.bias_initializer,
265
                regularizer=self.bias_regularizer,
266
                constraint=self.bias_constraint,
267
                trainable=True,
268
                dtype=self.dtype,
269
            )
270
        else:
271
            self.bias = None
272
        channel_axis = self._get_channel_axis()
273
        self.input_spec = InputSpec(
274
            ndim=self.rank + 2, axes={channel_axis: input_channel}
275
        )
276

277
        self._build_conv_op_input_shape = input_shape
278
        self._build_input_channel = input_channel
279
        self._padding_op = self._get_padding_op()
280
        self._conv_op_data_format = conv_utils.convert_data_format(
281
            self.data_format, self.rank + 2
282
        )
283
        self._convolution_op = Convolution(
284
            input_shape,
285
            filter_shape=self.kernel.shape,
286
            dilation_rate=self.dilation_rate,
287
            strides=self.strides,
288
            padding=self._padding_op,
289
            data_format=self._conv_op_data_format,
290
        )
291
        self.built = True
292

293
    def call(self, inputs):
294
        if self._recreate_conv_op(inputs):
295
            self._convolution_op = Convolution(
296
                inputs.get_shape(),
297
                filter_shape=self.kernel.shape,
298
                dilation_rate=self.dilation_rate,
299
                strides=self.strides,
300
                padding=self._padding_op,
301
                data_format=self._conv_op_data_format,
302
            )
303
            self._build_conv_op_input_shape = inputs.get_shape()
304

305
        # Apply causal padding to inputs for Conv1D.
306
        if self.padding == "causal" and self.__class__.__name__ == "Conv1D":
307
            inputs = array_ops.pad(inputs, self._compute_causal_padding())
308

309
        outputs = self._convolution_op(inputs, self.kernel)
310

311
        if self.use_bias:
312
            if self.data_format == "channels_first":
313
                if self.rank == 1:
314
                    # nn.bias_add does not accept a 1D input tensor.
315
                    bias = array_ops.reshape(self.bias, (1, self.filters, 1))
316
                    outputs += bias
317
                else:
318
                    outputs = nn.bias_add(outputs, self.bias, data_format="NCHW")
319
            else:
320
                outputs = nn.bias_add(outputs, self.bias, data_format="NHWC")
321

322
        if self.activation is not None:
323
            return self.activation(outputs)
324
        return outputs
325

326
    def compute_output_shape(self, input_shape):
327
        input_shape = tensor_shape.TensorShape(input_shape).as_list()
328
        if self.data_format == "channels_last":
329
            space = input_shape[1:-1]
330
            new_space = []
331
            for i in range(len(space)):
332
                new_dim = conv_utils.conv_output_length(
333
                    space[i],
334
                    self.kernel_size[i],
335
                    padding=self.padding,
336
                    stride=self.strides[i],
337
                    dilation=self.dilation_rate[i],
338
                )
339
                new_space.append(new_dim)
340
            return tensor_shape.TensorShape(
341
                [input_shape[0]] + new_space + [self.filters]
342
            )
343
        else:
344
            space = input_shape[2:]
345
            new_space = []
346
            for i in range(len(space)):
347
                new_dim = conv_utils.conv_output_length(
348
                    space[i],
349
                    self.kernel_size[i],
350
                    padding=self.padding,
351
                    stride=self.strides[i],
352
                    dilation=self.dilation_rate[i],
353
                )
354
                new_space.append(new_dim)
355
            return tensor_shape.TensorShape([input_shape[0], self.filters] + new_space)
356

357
    def get_config(self):
358
        config = {
359
            "filters": self.filters,
360
            "kernel_size": self.kernel_size,
361
            "strides": self.strides,
362
            "padding": self.padding,
363
            "data_format": self.data_format,
364
            "dilation_rate": self.dilation_rate,
365
            "groups": self.groups,
366
            "activation": activations.serialize(self.activation),
367
            "use_bias": self.use_bias,
368
            "kernel_initializer": initializers.serialize(self.kernel_initializer),
369
            "bias_initializer": initializers.serialize(self.bias_initializer),
370
            "kernel_regularizer": regularizers.serialize(self.kernel_regularizer),
371
            "bias_regularizer": regularizers.serialize(self.bias_regularizer),
372
            "activity_regularizer": regularizers.serialize(self.activity_regularizer),
373
            "kernel_constraint": constraints.serialize(self.kernel_constraint),
374
            "bias_constraint": constraints.serialize(self.bias_constraint),
375
        }
376
        base_config = super(Conv, self).get_config()
377
        return dict(list(base_config.items()) + list(config.items()))
378

379
    def _compute_causal_padding(self):
380
        """Calculates padding for 'causal' option for 1-d conv layers."""
381
        left_pad = self.dilation_rate[0] * (self.kernel_size[0] - 1)
382
        if self.data_format == "channels_last":
383
            causal_padding = [[0, 0], [left_pad, 0], [0, 0]]
384
        else:
385
            causal_padding = [[0, 0], [0, 0], [left_pad, 0]]
386
        return causal_padding
387

388
    def _get_channel_axis(self):
389
        if self.data_format == "channels_first":
390
            return 1
391
        else:
392
            return -1
393

394
    def _get_input_channel(self, input_shape):
395
        channel_axis = self._get_channel_axis()
396
        if input_shape.dims[channel_axis].value is None:
397
            raise ValueError(
398
                "The channel dimension of the inputs "
399
                "should be defined. Found `None`."
400
            )
401
        return int(input_shape[channel_axis])
402

403
    def _get_padding_op(self):
404
        if self.padding == "causal":
405
            op_padding = "valid"
406
        else:
407
            op_padding = self.padding
408
        if not isinstance(op_padding, (list, tuple)):
409
            op_padding = op_padding.upper()
410
        return op_padding
411

412
    def _recreate_conv_op(self, inputs):
413
        """Recreate conv_op if necessary.
414
        Check if the input_shape in call() is different from that in build().
415
        For the values that are not None, if they are different, recreate
416
        the _convolution_op to avoid the stateful behavior.
417
        Args:
418
          inputs: The input data to call() method.
419
        Returns:
420
          `True` or `False` to indicate whether to recreate the conv_op.
421
        """
422
        call_input_shape = inputs.get_shape()
423
        for axis in range(1, len(call_input_shape)):
424
            if (
425
                call_input_shape[axis] is not None
426
                and self._build_conv_op_input_shape[axis] is not None
427
                and call_input_shape[axis] != self._build_conv_op_input_shape[axis]
428
            ):
429
                return True
430
        return False
431

432

433
class GroupConv1D(Conv):
434
    """1D convolution layer (e.g. temporal convolution).
435
    This layer creates a convolution kernel that is convolved
436
    with the layer input over a single spatial (or temporal) dimension
437
    to produce a tensor of outputs.
438
    If `use_bias` is True, a bias vector is created and added to the outputs.
439
    Finally, if `activation` is not `None`,
440
    it is applied to the outputs as well.
441
    When using this layer as the first layer in a model,
442
    provide an `input_shape` argument
443
    (tuple of integers or `None`, e.g.
444
    `(10, 128)` for sequences of 10 vectors of 128-dimensional vectors,
445
    or `(None, 128)` for variable-length sequences of 128-dimensional vectors.
446
    Examples:
447
    >>> # The inputs are 128-length vectors with 10 timesteps, and the batch size
448
    >>> # is 4.
449
    >>> input_shape = (4, 10, 128)
450
    >>> x = tf.random.normal(input_shape)
451
    >>> y = tf.keras.layers.Conv1D(
452
    ... 32, 3, activation='relu',input_shape=input_shape)(x)
453
    >>> print(y.shape)
454
    (4, 8, 32)
455
    Arguments:
456
      filters: Integer, the dimensionality of the output space
457
        (i.e. the number of output filters in the convolution).
458
      kernel_size: An integer or tuple/list of a single integer,
459
        specifying the length of the 1D convolution window.
460
      strides: An integer or tuple/list of a single integer,
461
        specifying the stride length of the convolution.
462
        Specifying any stride value != 1 is incompatible with specifying
463
        any `dilation_rate` value != 1.
464
      padding: One of `"valid"`, `"causal"` or `"same"` (case-insensitive).
465
        `"causal"` results in causal (dilated) convolutions, e.g. `output[t]`
466
        does not depend on `input[t+1:]`. Useful when modeling temporal data
467
        where the model should not violate the temporal order.
468
        See [WaveNet: A Generative Model for Raw Audio, section
469
          2.1](https://arxiv.org/abs/1609.03499).
470
      data_format: A string,
471
        one of `channels_last` (default) or `channels_first`.
472
      groups: Integer, the number of channel groups controlling the connections
473
        between inputs and outputs. Input channels and `filters` must both be
474
        divisible by `groups`. For example,
475
          - At `groups=1`, all inputs are convolved to all outputs.
476
          - At `groups=2`, the operation becomes equivalent to having two
477
            convolutional layers side by side, each seeing half the input
478
            channels, and producing half the output channels, and both
479
            subsequently concatenated.
480
          - At `groups=input_channels`, each input channel is convolved with its
481
            own set of filters, of size `input_channels / filters`
482
      dilation_rate: an integer or tuple/list of a single integer, specifying
483
        the dilation rate to use for dilated convolution.
484
        Currently, specifying any `dilation_rate` value != 1 is
485
        incompatible with specifying any `strides` value != 1.
486
      activation: Activation function to use.
487
        If you don't specify anything, no activation is applied (
488
        see `keras.activations`).
489
      use_bias: Boolean, whether the layer uses a bias vector.
490
      kernel_initializer: Initializer for the `kernel` weights matrix (
491
        see `keras.initializers`).
492
      bias_initializer: Initializer for the bias vector (
493
        see `keras.initializers`).
494
      kernel_regularizer: Regularizer function applied to
495
        the `kernel` weights matrix (see `keras.regularizers`).
496
      bias_regularizer: Regularizer function applied to the bias vector (
497
        see `keras.regularizers`).
498
      activity_regularizer: Regularizer function applied to
499
        the output of the layer (its "activation") (
500
        see `keras.regularizers`).
501
      kernel_constraint: Constraint function applied to the kernel matrix (
502
        see `keras.constraints`).
503
      bias_constraint: Constraint function applied to the bias vector (
504
        see `keras.constraints`).
505
    Input shape:
506
      3D tensor with shape: `(batch_size, steps, input_dim)`
507
    Output shape:
508
      3D tensor with shape: `(batch_size, new_steps, filters)`
509
        `steps` value might have changed due to padding or strides.
510
    Returns:
511
      A tensor of rank 3 representing
512
      `activation(conv1d(inputs, kernel) + bias)`.
513
    Raises:
514
      ValueError: when both `strides` > 1 and `dilation_rate` > 1.
515
    """
516

517
    def __init__(
518
        self,
519
        filters,
520
        kernel_size,
521
        strides=1,
522
        padding="valid",
523
        data_format="channels_last",
524
        dilation_rate=1,
525
        groups=1,
526
        activation=None,
527
        use_bias=True,
528
        kernel_initializer="glorot_uniform",
529
        bias_initializer="zeros",
530
        kernel_regularizer=None,
531
        bias_regularizer=None,
532
        activity_regularizer=None,
533
        kernel_constraint=None,
534
        bias_constraint=None,
535
        **kwargs
536
    ):
537
        super().__init__(
538
            rank=1,
539
            filters=filters,
540
            kernel_size=kernel_size,
541
            strides=strides,
542
            padding=padding,
543
            data_format=data_format,
544
            dilation_rate=dilation_rate,
545
            groups=groups,
546
            activation=activations.get(activation),
547
            use_bias=use_bias,
548
            kernel_initializer=initializers.get(kernel_initializer),
549
            bias_initializer=initializers.get(bias_initializer),
550
            kernel_regularizer=regularizers.get(kernel_regularizer),
551
            bias_regularizer=regularizers.get(bias_regularizer),
552
            activity_regularizer=regularizers.get(activity_regularizer),
553
            kernel_constraint=constraints.get(kernel_constraint),
554
            bias_constraint=constraints.get(bias_constraint),
555
            **kwargs
556
        )
557

558
Product

Resources

Company