Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
jantic
GitHub Repository: jantic/deoldify
Path: blob/master/fastai/callbacks/mlflow.py
781 views
1
"A `Callback` that saves tracked metrics and notebook file into MLflow server."
2
from ..torch_core import *
3
from ..callback import *
4
from ..basic_train import Learner, LearnerCallback
5
#This is an optional dependency in fastai. Must install separately.
6
try: import mlflow
7
except: print("To use this tracker, please run 'pip install mlflow'")
8
9
class MLFlowTracker(LearnerCallback):
10
"A `TrackerCallback` that tracks the loss and metrics into MLFlow"
11
def __init__(self, learn:Learner, exp_name: str, params: dict, nb_path: str, uri: str = "http://localhost:5000"):
12
super().__init__(learn)
13
self.learn,self.exp_name,self.params,self.nb_path,self.uri = learn,exp_name,params,nb_path,uri
14
self.metrics_names = ['train_loss', 'valid_loss'] + [o.__name__ for o in learn.metrics]
15
16
def on_train_begin(self, **kwargs: Any) -> None:
17
"Prepare MLflow experiment and log params"
18
self.client = mlflow.tracking.MlflowClient(self.uri)
19
exp = self.client.get_experiment_by_name(self.exp_name)
20
self.exp_id = self.client.create_experiment(self.exp_name) if exp is None else exp.experiment_id
21
run = self.client.create_run(experiment_id=self.exp_id)
22
self.run = run.info.run_uuid
23
for k,v in self.params.items():
24
self.client.log_param(run_id=self.run, key=k, value=v)
25
26
def on_epoch_end(self, epoch, **kwargs:Any)->None:
27
"Send loss and metrics values to MLFlow after each epoch"
28
if kwargs['smooth_loss'] is None or kwargs["last_metrics"] is None: return
29
metrics = [kwargs['smooth_loss']] + kwargs["last_metrics"]
30
for name, val in zip(self.metrics_names, metrics):
31
self.client.log_metric(self.run, name, np.float(val), step=epoch)
32
33
def on_train_end(self, **kwargs: Any) -> None:
34
"Store the notebook and stop run"
35
self.client.log_artifact(run_id=self.run, local_path=self.nb_path)
36
self.client.set_terminated(run_id=self.run)
37
38