Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
jantic
GitHub Repository: jantic/deoldify
Path: blob/master/fastai/launch.py
781 views
1
import subprocess, torch
2
from fastai.script import *
3
4
@call_parse
5
def main(
6
gpus:Param("The GPUs to use for distributed training", str)='all',
7
script:Param("Script to run", str, opt=False)='',
8
args:Param("Args to pass to script", nargs='...', opt=False)=''
9
):
10
"PyTorch distributed training launch helper that spawns multiple distributed processes"
11
# Loosely based on torch.distributed.launch
12
current_env = os.environ.copy()
13
gpus = list(range(torch.cuda.device_count())) if gpus=='all' else list(gpus)
14
current_env["WORLD_SIZE"] = str(len(gpus))
15
current_env["MASTER_ADDR"] = '127.0.0.1'
16
current_env["MASTER_PORT"] = '29500'
17
18
processes = []
19
for i,gpu in enumerate(gpus):
20
current_env["RANK"] = str(i)
21
cmd = [sys.executable, "-u", script, f"--gpu={gpu}"] + args
22
process = subprocess.Popen(cmd, env=current_env)
23
processes.append(process)
24
25
for process in processes: process.wait()
26
27
28