CoCalc -- efficient.py

GitHub Repository: lucidrains/vit-pytorch
Path: blob/main/vit_pytorch/efficient.py
⁶⁴⁹ views
1
import torch
2
from torch import nn
3
from einops import rearrange, repeat
4
from einops.layers.torch import Rearrange
5

6
def pair(t):
7
    return t if isinstance(t, tuple) else (t, t)
8

9
class ViT(nn.Module):
10
    def __init__(self, *, image_size, patch_size, num_classes, dim, transformer, pool = 'cls', channels = 3):
11
        super().__init__()
12
        image_size_h, image_size_w = pair(image_size)
13
        assert image_size_h % patch_size == 0 and image_size_w % patch_size == 0, 'image dimensions must be divisible by the patch size'
14
        assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'
15
        num_patches = (image_size_h // patch_size) * (image_size_w // patch_size)
16
        patch_dim = channels * patch_size ** 2
17

18
        self.to_patch_embedding = nn.Sequential(
19
            Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1 = patch_size, p2 = patch_size),
20
            nn.LayerNorm(patch_dim),
21
            nn.Linear(patch_dim, dim),
22
            nn.LayerNorm(dim)
23
        )
24

25
        self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))
26
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
27
        self.transformer = transformer
28

29
        self.pool = pool
30
        self.to_latent = nn.Identity()
31

32
        self.mlp_head = nn.Sequential(
33
            nn.LayerNorm(dim),
34
            nn.Linear(dim, num_classes)
35
        )
36

37
    def forward(self, img):
38
        x = self.to_patch_embedding(img)
39
        b, n, _ = x.shape
40

41
        cls_tokens = repeat(self.cls_token, '() n d -> b n d', b = b)
42
        x = torch.cat((cls_tokens, x), dim=1)
43
        x += self.pos_embedding[:, :(n + 1)]
44
        x = self.transformer(x)
45

46
        x = x.mean(dim = 1) if self.pool == 'mean' else x[:, 0]
47

48
        x = self.to_latent(x)
49
        return self.mlp_head(x)
50

51
Product

Resources

Company