CoCalc -- qrnn.py

GitHub Repository: jantic/deoldify
Path: blob/master/fastai/text/models/qrnn.py
⁸⁴¹ views
1
from ...torch_core import *
2
from torch.utils.cpp_extension import load
3
from torch.autograd import Function
4

5
__all__ = ['QRNNLayer', 'QRNN']
6

7
import fastai
8
if torch.cuda.is_available():
9
    fastai_path = Path(fastai.__path__[0])/'text'/'models'
10
    files = ['forget_mult_cuda.cpp', 'forget_mult_cuda_kernel.cu']
11
    forget_mult_cuda = load(name='forget_mult_cuda', sources=[fastai_path/f for f in files])
12
    files = ['bwd_forget_mult_cuda.cpp', 'bwd_forget_mult_cuda_kernel.cu']
13
    bwd_forget_mult_cuda = load(name='bwd_forget_mult_cuda', sources=[fastai_path/f for f in files])
14

15
def dispatch_cuda(cuda_class, cpu_func, x):
16
    return cuda_class.apply if x.device.type == 'cuda' else cpu_func
17
    
18
class ForgetMultGPU(Function):
19
    
20
    @staticmethod
21
    def forward(ctx, x:Tensor, f:Tensor, hidden_init:Optional[Tensor]=None, batch_first:bool=True):
22
        if batch_first:
23
            batch_size, seq_size, hidden_size = f.size()
24
            output = f.new_zeros(batch_size, seq_size + 1, hidden_size)
25
            if hidden_init is not None: output[:, 0] = hidden_init
26
            else: output.zero_()
27
        else: 
28
            seq_size, batch_size, hidden_size = f.size()
29
            output = f.new(seq_size + 1, batch_size, hidden_size)
30
            if hidden_init is not None: output[0] = hidden_init
31
            else: output.zero_()
32
        output = forget_mult_cuda.forward(x, f, output, batch_first)
33
        ctx.save_for_backward(x, f, hidden_init, output)
34
        ctx.batch_first = batch_first
35
        return output[:,1:] if batch_first else output[1:]
36
    
37
    @staticmethod
38
    def backward(ctx, grad_output):
39
        x, f, hidden_init, output = ctx.saved_tensors
40
        grad_x, grad_f, grad_h = forget_mult_cuda.backward(x, f, output, grad_output, ctx.batch_first)
41
        return (grad_x, grad_f, (None if hidden_init is None else grad_h), None)
42
    
43
class BwdForgetMultGPU(Function):
44
    
45
    @staticmethod
46
    def forward(ctx, x:Tensor, f:Tensor, hidden_init:Optional[Tensor]=None, batch_first:bool=True):
47
        if batch_first:
48
            batch_size, seq_size, hidden_size = f.size()
49
            output = f.new(batch_size, seq_size + 1, hidden_size)
50
            if hidden_init is not None: output[:, -1] = hidden_init
51
            else: output.zero_()
52
        else: 
53
            seq_size, batch_size, hidden_size = f.size()
54
            output = f.new(seq_size + 1, batch_size, hidden_size)
55
            if hidden_init is not None: output[-1] = hidden_init
56
            else: output.zero_()
57
        output = bwd_forget_mult_cuda.forward(x, f, output, batch_first)
58
        ctx.save_for_backward(x, f, hidden_init, output)
59
        ctx.batch_first = batch_first
60
        return output[:,:-1] if batch_first else output[:-1]
61
    
62
    @staticmethod
63
    def backward(ctx, grad_output:Tensor):
64
        x, f, hidden_init, output = ctx.saved_tensors
65
        grad_x, grad_f, grad_h = bwd_forget_mult_cuda.backward(x, f, output, grad_output, ctx.batch_first)
66
        return (grad_x, grad_f, (None if hidden_init is None else grad_h), None)
67
    
68
def forget_mult_CPU(x:Tensor, f:Tensor, hidden_init:Optional[Tensor]=None, batch_first:bool=True, backward:bool=False):
69
    result = []
70
    dim = (1 if batch_first else 0)
71
    forgets = f.split(1, dim=dim)
72
    inputs =  x.split(1, dim=dim)
73
    prev_h = None if hidden_init is None else hidden_init.unsqueeze(1 if batch_first else 0)
74
    idx_range = range(len(inputs)-1,-1,-1) if backward else range(len(inputs))
75
    for i in idx_range:
76
        prev_h = inputs[i] * forgets[i] if prev_h is None else inputs[i] * forgets[i] + (1-forgets[i]) * prev_h
77
        if backward: result.insert(0, prev_h)
78
        else:        result.append(prev_h)
79
    return torch.cat(result, dim=dim)
80

81
class QRNNLayer(Module):
82
    "Apply a single layer Quasi-Recurrent Neural Network (QRNN) to an input sequence."
83

84
    def __init__(self, input_size:int, hidden_size:int=None, save_prev_x:bool=False, zoneout:float=0, window:int=1, 
85
                 output_gate:bool=True, batch_first:bool=True, backward:bool=False):
86
        super().__init__()
87
        assert window in [1, 2], "This QRNN implementation currently only handles convolutional window of size 1 or size 2"
88
        self.save_prev_x,self.zoneout,self.window = save_prev_x,zoneout,window
89
        self.output_gate,self.batch_first,self.backward = output_gate,batch_first,backward
90
        hidden_size = ifnone(hidden_size, input_size)
91
        #One large matmul with concat is faster than N small matmuls and no concat
92
        mult = (3 if output_gate else 2)
93
        self.linear = nn.Linear(window * input_size, mult * hidden_size)
94
        self.prevX = None
95

96
    def reset(self):
97
        # If you are saving the previous value of x, you should call this when starting with a new state
98
        self.prevX = None
99
        
100
    def forward(self, inp, hid=None):
101
        y = self.linear(self._get_source(inp))
102
        if self.output_gate: z_gate,f_gate,o_gate = y.chunk(3, dim=2)
103
        else:                z_gate,f_gate        = y.chunk(2, dim=2)
104
        z_gate.tanh_()
105
        f_gate.sigmoid_()
106
        if self.zoneout and self.training:
107
            mask = dropout_mask(f_gate, f_gate.size(), self.zoneout).requires_grad_(False)
108
            f_gate = f_gate * mask
109
        z_gate,f_gate = z_gate.contiguous(),f_gate.contiguous()
110
        if self.backward: forget_mult = dispatch_cuda(BwdForgetMultGPU, partial(forget_mult_CPU, backward=True), inp)
111
        else:             forget_mult = dispatch_cuda(ForgetMultGPU, forget_mult_CPU, inp)
112
        c_gate = forget_mult(z_gate, f_gate, hid, self.batch_first)
113
        output = torch.sigmoid(o_gate) * c_gate if self.output_gate else c_gate
114
        if self.window > 1 and self.save_prev_x: 
115
            if self.backward: self.prevX = (inp[:, :1] if self.batch_first else inp[:1]).detach()
116
            else:             self.prevX = (inp[:, -1:] if self.batch_first else inp[-1:]).detach()
117
        idx = 0 if self.backward else -1
118
        return output, (c_gate[:, idx] if self.batch_first else c_gate[idx])
119

120
    def _get_source(self, inp):
121
        if self.window == 1: return inp
122
        dim = (1 if self.batch_first else 0)
123
        inp_shift = [torch.zeros_like(inp[:,:1] if self.batch_first else inp[:1]) if self.prevX is None else self.prevX]
124
        if self.backward: inp_shift.insert(0,inp[:,1:] if self.batch_first else inp[1:])
125
        else:             inp_shift.append(inp[:,:-1] if self.batch_first else inp[:-1])
126
        inp_shift = torch.cat(inp_shift, dim)
127
        return torch.cat([inp, inp_shift], 2)
128
    
129
class QRNN(Module):
130
    "Apply a multiple layer Quasi-Recurrent Neural Network (QRNN) to an input sequence."
131

132
    def __init__(self, input_size:int, hidden_size:int, n_layers:int=1, bias:bool=True, batch_first:bool=True,
133
                 dropout:float=0, bidirectional:bool=False, save_prev_x:bool=False, zoneout:float=0, window:int=None, 
134
                 output_gate:bool=True):
135
        assert not (save_prev_x and bidirectional), "Can't save the previous X with bidirectional."
136
        assert bias == True, 'Removing underlying bias is not yet supported'
137
        super().__init__()
138
        kwargs = dict(batch_first=batch_first, zoneout=zoneout, output_gate=output_gate)
139
        self.layers = nn.ModuleList([QRNNLayer(input_size if l == 0 else hidden_size, hidden_size, save_prev_x=save_prev_x, 
140
                                               window=((2 if l ==0 else 1) if window is None else window), **kwargs) 
141
                                     for l in range(n_layers)])
142
        if bidirectional:
143
            self.layers_bwd = nn.ModuleList([QRNNLayer(input_size if l == 0 else hidden_size, hidden_size, 
144
                                                       backward=True, window=((2 if l ==0 else 1) if window is None else window), 
145
                                                       **kwargs) for l in range(n_layers)])
146
        self.n_layers,self.batch_first,self.dropout,self.bidirectional = n_layers,batch_first,dropout,bidirectional
147
        
148
    def reset(self):
149
        "If your convolutional window is greater than 1 and you save previous xs, you must reset at the beginning of each new sequence."
150
        for layer in self.layers:     layer.reset()
151
        if self.bidirectional:
152
            for layer in self.layers_bwd: layer.reset()    
153

154
    def forward(self, inp, hid=None):
155
        new_hid = []
156
        if self.bidirectional: inp_bwd = inp.clone()
157
        for i, layer in enumerate(self.layers):
158
            inp, h = layer(inp, None if hid is None else hid[2*i if self.bidirectional else i])
159
            new_hid.append(h)
160
            if self.bidirectional:
161
                inp_bwd, h_bwd = self.layers_bwd[i](inp_bwd, None if hid is None else hid[2*i+1])
162
                new_hid.append(h_bwd)
163
            if self.dropout != 0 and i < len(self.layers) - 1:
164
                for o in ([inp, inp_bwd] if self.bidirectional else [inp]):
165
                    o = F.dropout(o, p=self.dropout, training=self.training, inplace=False)
166
        if self.bidirectional: inp = torch.cat([inp, inp_bwd], dim=2)
167
        return inp, torch.stack(new_hid, 0)
168
Product

Resources

Company