CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
huggingface

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

GitHub Repository: huggingface/notebooks
Path: blob/main/course/videos/push_to_hub_tf.ipynb
Views: 2542
Kernel: Unknown Kernel

This notebook regroups the code sample of the video below, which is a part of the Hugging Face course.

#@title from IPython.display import HTML HTML('<iframe width="560" height="315" src="https://www.youtube.com/embed/pUh5cGmNV8Y?rel=0&amp;controls=0&amp;showinfo=0" frameborder="0" allowfullscreen></iframe>')

Install the Transformers and Datasets libraries to run this notebook.

! pip install datasets transformers[sentencepiece]
from huggingface_hub import notebook_login notebook_login()
from datasets import load_dataset, load_metric raw_datasets = load_dataset("glue", "cola")
raw_datasets
from transformers import AutoTokenizer model_checkpoint = "bert-base-cased" tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
def preprocess_function(examples): return tokenizer(examples["sentence"], truncation=True) tokenized_datasets = raw_datasets.map(preprocess_function, batched=True) tokenized_datasets
from transformers import DataCollatorWithPadding collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors='tf') train_dataset = tokenized_datasets['train'].to_tf_dataset( columns=['attention_mask', 'input_ids', 'labels', 'token_type_ids'], collate_fn=collator, batch_size=32, shuffle=True ) validation_dataset = tokenized_datasets['validation'].to_tf_dataset( columns=['attention_mask', 'input_ids', 'labels', 'token_type_ids'], collate_fn=collator, batch_size=32, shuffle=False )
from transformers import TFAutoModelForSequenceClassification model = TFAutoModelForSequenceClassification.from_pretrained(model_checkpoint)
from transformers import AdamWeightDecay optimizer = AdamWeightDecay(2e-5, weight_decay_rate=0.01) model.compile(optimizer=optimizer)
from transformers import PushToHubCallback callbacks = [PushToHubCallback("model_output/", tokenizer=tokenizer, hub_model_id="bert-fine-tuned-cola")] model.fit(train_dataset, validation_data=validation_dataset, epochs=2, callbacks=callbacks)
model.push_to_hub("bert-fine-tuned-cola", commit_message="End of training")

Labels

label_names = raw_datasets["train"].features["label"].names label_names
model.config.id2label = {str(i): lbl for i, lbl in enumerate(label_names)} model.config.label2id = {lbl: str(i) for i, lbl in enumerate(label_names)}
repo_name = "bert-fine-tuned-cola" model.config.push_to_hub(repo_name)
loaded_model = TFAutoModelForSequenceClassification.from_pretrained('Rocketknight1/bert-fine-tuned-cola')