CoCalc -- lowvram.py

GitHub Repository: automatic1111/stable-diffusion-webui
Path: blob/master/modules/lowvram.py
³⁰⁵⁵ views
1
from collections import namedtuple
2

3
import torch
4
from modules import devices, shared
5

6
module_in_gpu = None
7
cpu = torch.device("cpu")
8

9
ModuleWithParent = namedtuple('ModuleWithParent', ['module', 'parent'], defaults=['None'])
10

11
def send_everything_to_cpu():
12
    global module_in_gpu
13

14
    if module_in_gpu is not None:
15
        module_in_gpu.to(cpu)
16

17
    module_in_gpu = None
18

19

20
def is_needed(sd_model):
21
    return shared.cmd_opts.lowvram or shared.cmd_opts.medvram or shared.cmd_opts.medvram_sdxl and hasattr(sd_model, 'conditioner')
22

23

24
def apply(sd_model):
25
    enable = is_needed(sd_model)
26
    shared.parallel_processing_allowed = not enable
27

28
    if enable:
29
        setup_for_low_vram(sd_model, not shared.cmd_opts.lowvram)
30
    else:
31
        sd_model.lowvram = False
32

33

34
def setup_for_low_vram(sd_model, use_medvram):
35
    if getattr(sd_model, 'lowvram', False):
36
        return
37

38
    sd_model.lowvram = True
39

40
    parents = {}
41

42
    def send_me_to_gpu(module, _):
43
        """send this module to GPU; send whatever tracked module was previous in GPU to CPU;
44
        we add this as forward_pre_hook to a lot of modules and this way all but one of them will
45
        be in CPU
46
        """
47
        global module_in_gpu
48

49
        module = parents.get(module, module)
50

51
        if module_in_gpu == module:
52
            return
53

54
        if module_in_gpu is not None:
55
            module_in_gpu.to(cpu)
56

57
        module.to(devices.device)
58
        module_in_gpu = module
59

60
    # see below for register_forward_pre_hook;
61
    # first_stage_model does not use forward(), it uses encode/decode, so register_forward_pre_hook is
62
    # useless here, and we just replace those methods
63

64
    first_stage_model = sd_model.first_stage_model
65
    first_stage_model_encode = sd_model.first_stage_model.encode
66
    first_stage_model_decode = sd_model.first_stage_model.decode
67

68
    def first_stage_model_encode_wrap(x):
69
        send_me_to_gpu(first_stage_model, None)
70
        return first_stage_model_encode(x)
71

72
    def first_stage_model_decode_wrap(z):
73
        send_me_to_gpu(first_stage_model, None)
74
        return first_stage_model_decode(z)
75

76
    to_remain_in_cpu = [
77
        (sd_model, 'first_stage_model'),
78
        (sd_model, 'depth_model'),
79
        (sd_model, 'embedder'),
80
        (sd_model, 'model'),
81
    ]
82

83
    is_sdxl = hasattr(sd_model, 'conditioner')
84
    is_sd2 = not is_sdxl and hasattr(sd_model.cond_stage_model, 'model')
85

86
    if hasattr(sd_model, 'medvram_fields'):
87
        to_remain_in_cpu = sd_model.medvram_fields()
88
    elif is_sdxl:
89
        to_remain_in_cpu.append((sd_model, 'conditioner'))
90
    elif is_sd2:
91
        to_remain_in_cpu.append((sd_model.cond_stage_model, 'model'))
92
    else:
93
        to_remain_in_cpu.append((sd_model.cond_stage_model, 'transformer'))
94

95
    # remove several big modules: cond, first_stage, depth/embedder (if applicable), and unet from the model
96
    stored = []
97
    for obj, field in to_remain_in_cpu:
98
        module = getattr(obj, field, None)
99
        stored.append(module)
100
        setattr(obj, field, None)
101

102
    # send the model to GPU.
103
    sd_model.to(devices.device)
104

105
    # put modules back. the modules will be in CPU.
106
    for (obj, field), module in zip(to_remain_in_cpu, stored):
107
        setattr(obj, field, module)
108

109
    # register hooks for those the first three models
110
    if hasattr(sd_model, "cond_stage_model") and hasattr(sd_model.cond_stage_model, "medvram_modules"):
111
        for module in sd_model.cond_stage_model.medvram_modules():
112
            if isinstance(module, ModuleWithParent):
113
                parent = module.parent
114
                module = module.module
115
            else:
116
                parent = None
117

118
            if module:
119
                module.register_forward_pre_hook(send_me_to_gpu)
120

121
                if parent:
122
                    parents[module] = parent
123

124
    elif is_sdxl:
125
        sd_model.conditioner.register_forward_pre_hook(send_me_to_gpu)
126
    elif is_sd2:
127
        sd_model.cond_stage_model.model.register_forward_pre_hook(send_me_to_gpu)
128
        sd_model.cond_stage_model.model.token_embedding.register_forward_pre_hook(send_me_to_gpu)
129
        parents[sd_model.cond_stage_model.model] = sd_model.cond_stage_model
130
        parents[sd_model.cond_stage_model.model.token_embedding] = sd_model.cond_stage_model
131
    else:
132
        sd_model.cond_stage_model.transformer.register_forward_pre_hook(send_me_to_gpu)
133
        parents[sd_model.cond_stage_model.transformer] = sd_model.cond_stage_model
134

135
    sd_model.first_stage_model.register_forward_pre_hook(send_me_to_gpu)
136
    sd_model.first_stage_model.encode = first_stage_model_encode_wrap
137
    sd_model.first_stage_model.decode = first_stage_model_decode_wrap
138
    if getattr(sd_model, 'depth_model', None) is not None:
139
        sd_model.depth_model.register_forward_pre_hook(send_me_to_gpu)
140
    if getattr(sd_model, 'embedder', None) is not None:
141
        sd_model.embedder.register_forward_pre_hook(send_me_to_gpu)
142

143
    if use_medvram:
144
        sd_model.model.register_forward_pre_hook(send_me_to_gpu)
145
    else:
146
        diff_model = sd_model.model.diffusion_model
147

148
        # the third remaining model is still too big for 4 GB, so we also do the same for its submodules
149
        # so that only one of them is in GPU at a time
150
        stored = diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed
151
        diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed = None, None, None, None
152
        sd_model.model.to(devices.device)
153
        diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed = stored
154

155
        # install hooks for bits of third model
156
        diff_model.time_embed.register_forward_pre_hook(send_me_to_gpu)
157
        for block in diff_model.input_blocks:
158
            block.register_forward_pre_hook(send_me_to_gpu)
159
        diff_model.middle_block.register_forward_pre_hook(send_me_to_gpu)
160
        for block in diff_model.output_blocks:
161
            block.register_forward_pre_hook(send_me_to_gpu)
162

163

164
def is_enabled(sd_model):
165
    return sd_model.lowvram
166

167
Product

Resources

Company