CoCalc -- mem.py

GitHub Repository: jantic/deoldify
Path: blob/master/fastai/utils/mem.py
⁸⁴⁰ views
1
"Utility functions for memory management"
2

3
from ..imports.torch import *
4
from ..core import *
5
from ..script import *
6
import functools, threading, time
7
from .pynvml_gate import *
8
from collections import namedtuple
9

10
#is_osx = platform.system() == "Darwin"
11
use_gpu = torch.cuda.is_available()
12

13
GPUMemory = namedtuple('GPUMemory', ['total', 'free', 'used'])
14

15
if use_gpu:
16
    pynvml = load_pynvml_env()
17

18
def preload_pytorch():
19
    torch.ones((1, 1)).cuda()
20

21
def b2mb(num):
22
    """ convert Bs to MBs and round down """
23
    return int(num/2**20)
24

25
def gpu_mem_get(id=None):
26
    "get total, used and free memory (in MBs) for gpu `id`. if `id` is not passed, currently selected torch device is used"
27
    if not use_gpu: return GPUMemory(0, 0, 0)
28
    if id is None: id = torch.cuda.current_device()
29
    try:
30
        handle = pynvml.nvmlDeviceGetHandleByIndex(id)
31
        info = pynvml.nvmlDeviceGetMemoryInfo(handle)
32
        return GPUMemory(*(map(b2mb, [info.total, info.free, info.used])))
33
    except:
34
        return GPUMemory(0, 0, 0)
35

36
def gpu_mem_get_all():
37
    "get total, used and free memory (in MBs) for each available gpu"
38
    if not use_gpu: return []
39
    return list(map(gpu_mem_get, range(pynvml.nvmlDeviceGetCount())))
40

41
def gpu_mem_get_free():
42
    "get free memory (in MBs) for the currently selected gpu id, w/o emptying the cache"
43
    return gpu_mem_get().free
44

45
def gpu_mem_get_free_no_cache():
46
    "get free memory (in MBs) for the currently selected gpu id, after emptying the cache"
47
    torch.cuda.empty_cache()
48
    return gpu_mem_get().free
49

50
def gpu_mem_get_used():
51
    "get used memory (in MBs) for the currently selected gpu id, w/o emptying the cache"
52
    return gpu_mem_get().used
53

54
def gpu_mem_get_used_fast(gpu_handle):
55
    "get used memory (in MBs) for the currently selected gpu id, w/o emptying the cache, and needing the `gpu_handle` arg"
56
    info = pynvml.nvmlDeviceGetMemoryInfo(gpu_handle)
57
    return b2mb(info.used)
58

59
def gpu_mem_get_used_no_cache():
60
    "get used memory (in MBs) for the currently selected gpu id, after emptying the cache"
61
    torch.cuda.empty_cache()
62
    return gpu_mem_get().used
63

64
def gpu_with_max_free_mem():
65
    "get [gpu_id, its_free_ram] for the first gpu with highest available RAM"
66
    mem_all = gpu_mem_get_all()
67
    if not len(mem_all): return None, 0
68
    free_all = np.array([x.free for x in mem_all])
69
    id = np.argmax(free_all)
70
    return id, free_all[id]
71

72
class GPUMemTrace():
73
    "Trace allocated and peaked GPU memory usage (deltas)."
74
    def __init__(self, silent=False, ctx=None, on_exit_report=True):
75
        assert torch.cuda.is_available(), "pytorch CUDA is required"
76
        self.silent = silent # shortcut to turn off all reports from constructor
77
        self.ctx    = ctx    # default context note in report
78
        self.on_exit_report = on_exit_report # auto-report on ctx manager exit (default: True)
79
        self.start()
80

81
    def reset(self):
82
        self.used_start = gpu_mem_get_used_no_cache()
83
        self.used_peak  = self.used_start
84

85
    def data_set(self):
86
        # delta_used is the difference between current used mem and used mem at the start
87
        self.delta_used = gpu_mem_get_used_no_cache() - self.used_start
88

89
        # delta_peaked is the overhead if any. It is calculated as follows:
90
        #
91
        # 1. The difference between the peak memory and the used memory at the
92
        # start is measured:
93
        # 2a. If it's negative, then delta_peaked is 0
94
        # 2b. Otherwise, if used_delta is positive it gets subtracted from delta_peaked
95
        # XXX: 2a shouldn't be needed once we have a reliable peak counter
96
        self.delta_peaked = self.used_peak - self.used_start
97
        if self.delta_peaked < 0: self.delta_peaked = 0
98
        elif self.delta_used > 0: self.delta_peaked -= self.delta_used
99

100
    def data(self):
101
        if self.is_running: self.data_set()
102
        return self.delta_used, self.delta_peaked
103

104
    def start(self):
105
        self.is_running = True
106
        self.reset()
107
        self.peak_monitor_start()
108

109
    def stop(self):
110
        self.peak_monitor_stop()
111
        self.data_set()
112
        self.is_running = False
113

114
    def __enter__(self):
115
        self.start()
116
        return self
117

118
    def __exit__(self, *exc):
119
        self.stop()
120
        if self.on_exit_report: self.report('exit')
121

122
    def __del__(self):
123
        self.stop()
124

125
    def __repr__(self):
126
        delta_used, delta_peaked = self.data()
127
        return f"△Used Peaked MB: {delta_used:6,.0f} {delta_peaked:6,.0f}"
128

129
    def _get_ctx(self, subctx=None):
130
        "Return ' (ctx: subctx)' or ' (ctx)' or ' (subctx)' or '' depending on this and constructor arguments"
131
        l = []
132
        if self.ctx is not None:      l.append(self.ctx)
133
        if subctx is not None:        l.append(subctx)
134
        return '' if len(l) == 0 else f" ({': '.join(l)})"
135

136
    def silent(self, silent=True):
137
        self.silent = silent
138

139
    def report(self, subctx=None):
140
        "Print delta used+peaked, and an optional context note, which can also be preset in constructor"
141
        if self.silent: return
142
        print(f"{ self.__repr__() }{ self._get_ctx(subctx) }")
143

144
    def report_n_reset(self, subctx=None):
145
        "Print delta used+peaked, and an optional context note. Then reset counters"
146
        self.report(subctx)
147
        self.reset()
148

149
    def peak_monitor_start(self):
150
        self.peak_monitoring = True
151

152
        # continually sample GPU RAM usage
153
        peak_monitor_thread = threading.Thread(target=self.peak_monitor_func)
154
        peak_monitor_thread.daemon = True
155
        peak_monitor_thread.start()
156

157
    def peak_monitor_stop(self):
158
        self.peak_monitoring = False
159

160
    # XXX: this is an unreliable function, since there is no thread priority
161
    # control and it may not run enough or not run at all
162
    def peak_monitor_func(self):
163
        gpu_handle = pynvml.nvmlDeviceGetHandleByIndex(torch.cuda.current_device())
164
        while True:
165
            self.used_peak = max(gpu_mem_get_used_fast(gpu_handle), self.used_peak)
166
            if not self.peak_monitoring: break
167
            time.sleep(0.001) # 1msec
168

169
def gpu_mem_trace(func):
170
    "A decorator that runs `GPUMemTrace` w/ report on func"
171
    @functools.wraps(func)
172
    def wrapper(*args, **kwargs):
173
        with GPUMemTrace(ctx=func.__qualname__, on_exit_report=True):
174
            return func(*args, **kwargs)
175
    return wrapper
176

177
def reduce_mem_usage(df):
178
    """ iterate through all the columns of a dataframe and modify the data type
179
        to reduce memory usage.
180
    """
181
    start_mem = df.memory_usage().sum() / 1024**2
182
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
183

184
    #Removed from debugging
185
    columns = df.columns
186
    #.drop('index')
187

188
    for col in columns:
189
        col_type = df[col].dtype
190
        if str(col_type) != 'category' and col_type != 'datetime64[ns]' and col_type != bool:
191
            if col_type != object:
192
                c_min = df[col].min()
193
                c_max = df[col].max()
194
                if str(col_type)[:3] == 'int':
195
                    if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
196
                        df[col] = df[col].astype(np.int8)
197
                    elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
198
                        df[col] = df[col].astype(np.int16)
199
                    elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
200
                        df[col] = df[col].astype(np.int32)
201
                    elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
202
                        df[col] = df[col].astype(np.int64)
203
                else:
204
                    #if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
205
                        #df[col] = df[col].astype(np.float16)
206
                    #Sometimes causes and error and had to remove
207
                    if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
208
                        df[col] = df[col].astype(np.float32)
209
                    else:
210
                        print('Error '+col+' Value would be a float64. Disregarding.')
211
            else:
212
                df[col] = df[col].astype('category')
213

214
    end_mem = df.memory_usage().sum() / 1024**2
215
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
216
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
217

218
    return df
219

220
Product

Resources

Company