Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Path: blob/main/course/videos/debug_training_pt.ipynb
Views: 2542
Kernel: Unknown Kernel
This notebook regroups the code sample of the video below, which is a part of the Hugging Face course.
In [ ]:
#@title from IPython.display import HTML HTML('<iframe width="560" height="315" src="https://www.youtube.com/embed/L-WSwUWde1U?rel=0&controls=0&showinfo=0" frameborder="0" allowfullscreen></iframe>')
Install the Transformers and Datasets libraries to run this notebook.
In [ ]:
! pip install datasets transformers[sentencepiece]
In [ ]:
from datasets import load_dataset, load_metric from transformers import ( AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, ) raw_datasets = load_dataset("glue", "mnli") model_checkpoint = "distilbert-base-uncased" tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) def preprocess_function(examples): return tokenizer(examples["premise"], examples["hypothesis"], truncation=True) tokenized_datasets = raw_datasets.map(preprocess_function, batched=True) model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint) args = TrainingArguments( "distilbert-finetuned-mnli", evaluation_strategy="epoch", save_strategy="epoch", learning_rate=2e-5, num_train_epochs=3, weight_decay=0.01, ) metric = load_metric("glue", "mnli") def compute_metrics(eval_pred): predictions, labels = eval_pred predictions = np.argmax(predictions, axis=1) return metric.compute( predictions=predictions, references=labels ) trainer = Trainer( model, args, train_dataset=raw_datasets["train"], eval_dataset=raw_datasets["validation_matched"], compute_metrics=compute_metrics, ) trainer.train()
In [ ]:
trainer.train_dataset[0]
In [ ]:
from datasets import load_dataset, load_metric from transformers import ( AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, ) raw_datasets = load_dataset("glue", "mnli") model_checkpoint = "distilbert-base-uncased" tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) def preprocess_function(examples): return tokenizer(examples["premise"], examples["hypothesis"], truncation=True) tokenized_datasets = raw_datasets.map(preprocess_function, batched=True) model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint) args = TrainingArguments( "distilbert-finetuned-mnli", evaluation_strategy="epoch", save_strategy="epoch", learning_rate=2e-5, num_train_epochs=3, weight_decay=0.01, ) metric = load_metric("glue", "mnli") def compute_metrics(eval_pred): predictions, labels = eval_pred predictions = np.argmax(predictions, axis=1) return metric.compute( predictions=predictions, references=labels ) trainer = Trainer( model, args, train_dataset=tokenized_datasets["train"], eval_dataset=tokenized_datasets["validation_matched"], compute_metrics=compute_metrics, ) trainer.train()
In [ ]:
for batch in trainer.get_train_dataloader(): break
In [ ]:
from datasets import load_dataset, load_metric from transformers import ( AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, ) raw_datasets = load_dataset("glue", "mnli") model_checkpoint = "distilbert-base-uncased" tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) def preprocess_function(examples): return tokenizer(examples["premise"], examples["hypothesis"], truncation=True) tokenized_datasets = raw_datasets.map(preprocess_function, batched=True) model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint) args = TrainingArguments( "distilbert-finetuned-mnli", evaluation_strategy="epoch", save_strategy="epoch", learning_rate=2e-5, num_train_epochs=3, weight_decay=0.01, ) metric = load_metric("glue", "mnli") def compute_metrics(eval_pred): predictions, labels = eval_pred predictions = np.argmax(predictions, axis=1) return metric.compute( predictions=predictions, references=labels ) trainer = Trainer( model, args, train_dataset=tokenized_datasets["train"], eval_dataset=tokenized_datasets["validation_matched"], compute_metrics=compute_metrics, tokenizer=tokenizer, ) trainer.train()
In [ ]:
for batch in trainer.get_train_dataloader(): break outputs = trainer.model.cpu()(**batch)
In [ ]:
model.num_labels
In [ ]:
from datasets import load_dataset, load_metric from transformers import ( AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, ) raw_datasets = load_dataset("glue", "mnli") model_checkpoint = "distilbert-base-uncased" tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) def preprocess_function(examples): return tokenizer(examples["premise"], examples["hypothesis"], truncation=True) tokenized_datasets = raw_datasets.map(preprocess_function, batched=True) model = AutoModelForSequenceClassification.from_pretrained( model_checkpoint, num_labels=3 ) args = TrainingArguments( "distilbert-finetuned-mnli", evaluation_strategy="epoch", save_strategy="epoch", learning_rate=2e-5, num_train_epochs=3, weight_decay=0.01, ) metric = load_metric("glue", "mnli") def compute_metrics(eval_pred): predictions, labels = eval_pred predictions = np.argmax(predictions, axis=1) return metric.compute( predictions=predictions, references=labels ) trainer = Trainer( model, args, train_dataset=tokenized_datasets["train"], eval_dataset=tokenized_datasets["validation_matched"], compute_metrics=compute_metrics, tokenizer=tokenizer, ) trainer.train()
In [ ]:
for batch in trainer.get_train_dataloader(): break outputs = trainer.model.cpu()(**batch) loss = outputs.loss loss.backward()
In [ ]:
trainer.create_optimizer() trainer.optimizer.step()
In [ ]: