Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
jantic
GitHub Repository: jantic/deoldify
Path: blob/master/fastai/utils/mem.py
781 views
1
"Utility functions for memory management"
2
3
from ..imports.torch import *
4
from ..core import *
5
from ..script import *
6
import functools, threading, time
7
from .pynvml_gate import *
8
from collections import namedtuple
9
10
#is_osx = platform.system() == "Darwin"
11
use_gpu = torch.cuda.is_available()
12
13
GPUMemory = namedtuple('GPUMemory', ['total', 'free', 'used'])
14
15
if use_gpu:
16
pynvml = load_pynvml_env()
17
18
def preload_pytorch():
19
torch.ones((1, 1)).cuda()
20
21
def b2mb(num):
22
""" convert Bs to MBs and round down """
23
return int(num/2**20)
24
25
def gpu_mem_get(id=None):
26
"get total, used and free memory (in MBs) for gpu `id`. if `id` is not passed, currently selected torch device is used"
27
if not use_gpu: return GPUMemory(0, 0, 0)
28
if id is None: id = torch.cuda.current_device()
29
try:
30
handle = pynvml.nvmlDeviceGetHandleByIndex(id)
31
info = pynvml.nvmlDeviceGetMemoryInfo(handle)
32
return GPUMemory(*(map(b2mb, [info.total, info.free, info.used])))
33
except:
34
return GPUMemory(0, 0, 0)
35
36
def gpu_mem_get_all():
37
"get total, used and free memory (in MBs) for each available gpu"
38
if not use_gpu: return []
39
return list(map(gpu_mem_get, range(pynvml.nvmlDeviceGetCount())))
40
41
def gpu_mem_get_free():
42
"get free memory (in MBs) for the currently selected gpu id, w/o emptying the cache"
43
return gpu_mem_get().free
44
45
def gpu_mem_get_free_no_cache():
46
"get free memory (in MBs) for the currently selected gpu id, after emptying the cache"
47
torch.cuda.empty_cache()
48
return gpu_mem_get().free
49
50
def gpu_mem_get_used():
51
"get used memory (in MBs) for the currently selected gpu id, w/o emptying the cache"
52
return gpu_mem_get().used
53
54
def gpu_mem_get_used_fast(gpu_handle):
55
"get used memory (in MBs) for the currently selected gpu id, w/o emptying the cache, and needing the `gpu_handle` arg"
56
info = pynvml.nvmlDeviceGetMemoryInfo(gpu_handle)
57
return b2mb(info.used)
58
59
def gpu_mem_get_used_no_cache():
60
"get used memory (in MBs) for the currently selected gpu id, after emptying the cache"
61
torch.cuda.empty_cache()
62
return gpu_mem_get().used
63
64
def gpu_with_max_free_mem():
65
"get [gpu_id, its_free_ram] for the first gpu with highest available RAM"
66
mem_all = gpu_mem_get_all()
67
if not len(mem_all): return None, 0
68
free_all = np.array([x.free for x in mem_all])
69
id = np.argmax(free_all)
70
return id, free_all[id]
71
72
class GPUMemTrace():
73
"Trace allocated and peaked GPU memory usage (deltas)."
74
def __init__(self, silent=False, ctx=None, on_exit_report=True):
75
assert torch.cuda.is_available(), "pytorch CUDA is required"
76
self.silent = silent # shortcut to turn off all reports from constructor
77
self.ctx = ctx # default context note in report
78
self.on_exit_report = on_exit_report # auto-report on ctx manager exit (default: True)
79
self.start()
80
81
def reset(self):
82
self.used_start = gpu_mem_get_used_no_cache()
83
self.used_peak = self.used_start
84
85
def data_set(self):
86
# delta_used is the difference between current used mem and used mem at the start
87
self.delta_used = gpu_mem_get_used_no_cache() - self.used_start
88
89
# delta_peaked is the overhead if any. It is calculated as follows:
90
#
91
# 1. The difference between the peak memory and the used memory at the
92
# start is measured:
93
# 2a. If it's negative, then delta_peaked is 0
94
# 2b. Otherwise, if used_delta is positive it gets subtracted from delta_peaked
95
# XXX: 2a shouldn't be needed once we have a reliable peak counter
96
self.delta_peaked = self.used_peak - self.used_start
97
if self.delta_peaked < 0: self.delta_peaked = 0
98
elif self.delta_used > 0: self.delta_peaked -= self.delta_used
99
100
def data(self):
101
if self.is_running: self.data_set()
102
return self.delta_used, self.delta_peaked
103
104
def start(self):
105
self.is_running = True
106
self.reset()
107
self.peak_monitor_start()
108
109
def stop(self):
110
self.peak_monitor_stop()
111
self.data_set()
112
self.is_running = False
113
114
def __enter__(self):
115
self.start()
116
return self
117
118
def __exit__(self, *exc):
119
self.stop()
120
if self.on_exit_report: self.report('exit')
121
122
def __del__(self):
123
self.stop()
124
125
def __repr__(self):
126
delta_used, delta_peaked = self.data()
127
return f"△Used Peaked MB: {delta_used:6,.0f} {delta_peaked:6,.0f}"
128
129
def _get_ctx(self, subctx=None):
130
"Return ' (ctx: subctx)' or ' (ctx)' or ' (subctx)' or '' depending on this and constructor arguments"
131
l = []
132
if self.ctx is not None: l.append(self.ctx)
133
if subctx is not None: l.append(subctx)
134
return '' if len(l) == 0 else f" ({': '.join(l)})"
135
136
def silent(self, silent=True):
137
self.silent = silent
138
139
def report(self, subctx=None):
140
"Print delta used+peaked, and an optional context note, which can also be preset in constructor"
141
if self.silent: return
142
print(f"{ self.__repr__() }{ self._get_ctx(subctx) }")
143
144
def report_n_reset(self, subctx=None):
145
"Print delta used+peaked, and an optional context note. Then reset counters"
146
self.report(subctx)
147
self.reset()
148
149
def peak_monitor_start(self):
150
self.peak_monitoring = True
151
152
# continually sample GPU RAM usage
153
peak_monitor_thread = threading.Thread(target=self.peak_monitor_func)
154
peak_monitor_thread.daemon = True
155
peak_monitor_thread.start()
156
157
def peak_monitor_stop(self):
158
self.peak_monitoring = False
159
160
# XXX: this is an unreliable function, since there is no thread priority
161
# control and it may not run enough or not run at all
162
def peak_monitor_func(self):
163
gpu_handle = pynvml.nvmlDeviceGetHandleByIndex(torch.cuda.current_device())
164
while True:
165
self.used_peak = max(gpu_mem_get_used_fast(gpu_handle), self.used_peak)
166
if not self.peak_monitoring: break
167
time.sleep(0.001) # 1msec
168
169
def gpu_mem_trace(func):
170
"A decorator that runs `GPUMemTrace` w/ report on func"
171
@functools.wraps(func)
172
def wrapper(*args, **kwargs):
173
with GPUMemTrace(ctx=func.__qualname__, on_exit_report=True):
174
return func(*args, **kwargs)
175
return wrapper
176
177
def reduce_mem_usage(df):
178
""" iterate through all the columns of a dataframe and modify the data type
179
to reduce memory usage.
180
"""
181
start_mem = df.memory_usage().sum() / 1024**2
182
print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
183
184
#Removed from debugging
185
columns = df.columns
186
#.drop('index')
187
188
for col in columns:
189
col_type = df[col].dtype
190
if str(col_type) != 'category' and col_type != 'datetime64[ns]' and col_type != bool:
191
if col_type != object:
192
c_min = df[col].min()
193
c_max = df[col].max()
194
if str(col_type)[:3] == 'int':
195
if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
196
df[col] = df[col].astype(np.int8)
197
elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
198
df[col] = df[col].astype(np.int16)
199
elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
200
df[col] = df[col].astype(np.int32)
201
elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
202
df[col] = df[col].astype(np.int64)
203
else:
204
#if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
205
#df[col] = df[col].astype(np.float16)
206
#Sometimes causes and error and had to remove
207
if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
208
df[col] = df[col].astype(np.float32)
209
else:
210
print('Error '+col+' Value would be a float64. Disregarding.')
211
else:
212
df[col] = df[col].astype('category')
213
214
end_mem = df.memory_usage().sum() / 1024**2
215
print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
216
print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
217
218
return df
219
220