CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
huggingface

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

GitHub Repository: huggingface/notebooks
Path: blob/main/course/vi/chapter8/section4_tf.ipynb
Views: 2546
Kernel: Unknown Kernel

Gỡ lỗi quy trình huấn luyện

Install the Transformers, Datasets, and Evaluate libraries to run this notebook.

!pip install datasets evaluate transformers[sentencepiece]
from datasets import load_dataset import evaluate from transformers import ( AutoTokenizer, TFAutoModelForSequenceClassification, ) raw_datasets = load_dataset("glue", "mnli") model_checkpoint = "distilbert-base-uncased" tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) def preprocess_function(examples): return tokenizer(examples["premise"], examples["hypothesis"], truncation=True) tokenized_datasets = raw_datasets.map(preprocess_function, batched=True) train_dataset = tokenized_datasets["train"].to_tf_dataset( columns=["input_ids", "labels"], batch_size=16, shuffle=True ) validation_dataset = tokenized_datasets["validation_matched"].to_tf_dataset( columns=["input_ids", "labels"], batch_size=16, shuffle=True ) model = TFAutoModelForSequenceClassification.from_pretrained(model_checkpoint) model.compile(loss="sparse_categorical_crossentropy", optimizer="adam") model.fit(train_dataset)
ValueError: No gradients provided for any variable: ['tf_distil_bert_for_sequence_classification/distilbert/embeddings/word_embeddings/weight:0', '...']
for batch in train_dataset: break
{'attention_mask': <tf.Tensor: shape=(16, 76), dtype=int64, numpy= array([[1, 1, 1, ..., 0, 0, 0], [1, 1, 1, ..., 0, 0, 0], [1, 1, 1, ..., 0, 0, 0], ..., [1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 0, 0, 0], [1, 1, 1, ..., 0, 0, 0]])>, 'label': <tf.Tensor: shape=(16,), dtype=int64, numpy=array([0, 2, 1, 2, 1, 1, 2, 0, 0, 0, 1, 0, 1, 2, 2, 1])>, 'input_ids': <tf.Tensor: shape=(16, 76), dtype=int64, numpy= array([[ 101, 2174, 1010, ..., 0, 0, 0], [ 101, 3174, 2420, ..., 0, 0, 0], [ 101, 2044, 2048, ..., 0, 0, 0], ..., [ 101, 3398, 3398, ..., 2051, 2894, 102], [ 101, 1996, 4124, ..., 0, 0, 0], [ 101, 1999, 2070, ..., 0, 0, 0]])>}
model.compile(optimizer="adam")
246/24543 [..............................] - ETA: 15:52 - loss: nan
model(batch)
TFSequenceClassifierOutput(loss=<tf.Tensor: shape=(16,), dtype=float32, numpy= array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan], dtype=float32)>, logits=<tf.Tensor: shape=(16, 2), dtype=float32, numpy= array([[nan, nan], [nan, nan], [nan, nan], [nan, nan], [nan, nan], [nan, nan], [nan, nan], [nan, nan], [nan, nan], [nan, nan], [nan, nan], [nan, nan], [nan, nan], [nan, nan], [nan, nan], [nan, nan]], dtype=float32)>, hidden_states=None, attentions=None)
model = TFAutoModelForSequenceClassification.from_pretrained(model_checkpoint) model(batch)
TFSequenceClassifierOutput(loss=<tf.Tensor: shape=(16,), dtype=float32, numpy= array([0.6844486 , nan, nan, 0.67127866, 0.7068601 , nan, 0.69309855, nan, 0.65531296, nan, nan, nan, 0.675402 , nan, nan, 0.69831556], dtype=float32)>, logits=<tf.Tensor: shape=(16, 2), dtype=float32, numpy= array([[-0.04761693, -0.06509043], [-0.0481936 , -0.04556257], [-0.0040929 , -0.05848458], [-0.02417453, -0.0684005 ], [-0.02517801, -0.05241832], [-0.04514256, -0.0757378 ], [-0.02656011, -0.02646275], [ 0.00766164, -0.04350497], [ 0.02060014, -0.05655622], [-0.02615328, -0.0447021 ], [-0.05119278, -0.06928903], [-0.02859691, -0.04879177], [-0.02210129, -0.05791225], [-0.02363213, -0.05962167], [-0.05352269, -0.0481673 ], [-0.08141848, -0.07110836]], dtype=float32)>, hidden_states=None, attentions=None)
import numpy as np loss = model(batch).loss.numpy() indices = np.flatnonzero(np.isnan(loss)) indices
array([ 1, 2, 5, 7, 9, 10, 11, 13, 14])
input_ids = batch["input_ids"].numpy() input_ids[indices]
array([[ 101, 2007, 2032, 2001, 1037, 16480, 3917, 2594, 4135, 23212, 3070, 2214, 10170, 1010, 2012, 4356, 1997, 3183, 6838, 12953, 2039, 2000, 1996, 6147, 1997, 2010, 2606, 1012, 102, 6838, 2001, 3294, 6625, 3773, 1996, 2214, 2158, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 101, 1998, 6814, 2016, 2234, 2461, 2153, 1998, 13322, 2009, 1012, 102, 2045, 1005, 1055, 2053, 3382, 2008, 2016, 1005, 2222, 3046, 8103, 2075, 2009, 2153, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 101, 1998, 2007, 1996, 3712, 4634, 1010, 2057, 8108, 2025, 3404, 2028, 1012, 1996, 2616, 18449, 2125, 1999, 1037, 9666, 1997, 4100, 8663, 11020, 6313, 2791, 1998, 2431, 1011, 4301, 1012, 102, 2028, 1005, 1055, 5177, 2110, 1998, 3977, 2000, 2832, 2106, 2025, 2689, 2104, 2122, 6214, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 101, 1045, 2001, 1999, 1037, 13090, 5948, 2007, 2048, 2308, 2006, 2026, 5001, 2043, 2026, 2171, 2001, 2170, 1012, 102, 1045, 2001, 3564, 1999, 2277, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 101, 2195, 4279, 2191, 2039, 1996, 2181, 2124, 2004, 1996, 2225, 7363, 1012, 102, 2045, 2003, 2069, 2028, 2451, 1999, 1996, 2225, 7363, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 101, 2061, 2008, 1045, 2123, 1005, 1056, 2113, 2065, 2009, 2428, 10654, 7347, 2030, 2009, 7126, 2256, 2495, 2291, 102, 2009, 2003, 5094, 2256, 2495, 2291, 2035, 2105, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 101, 2051, 1010, 2029, 3216, 2019, 2503, 3444, 1010, 6732, 1996, 2265, 2038, 19840, 2098, 2125, 9906, 1998, 2003, 2770, 2041, 1997, 4784, 1012, 102, 2051, 6732, 1996, 2265, 2003, 9525, 1998, 4569, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 101, 1996, 10556, 2140, 11515, 2058, 1010, 2010, 2162, 2252, 5689, 2013, 2010, 7223, 1012, 102, 2043, 1996, 10556, 2140, 11515, 2058, 1010, 2010, 2252, 3062, 2000, 1996, 2598, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 101, 13543, 1999, 2049, 6143, 2933, 2443, 102, 2025, 13543, 1999, 6143, 2933, 2003, 2443, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
model.config.num_labels
2
from tensorflow.keras.optimizers import Adam model = TFAutoModelForSequenceClassification.from_pretrained(model_checkpoint) model.compile(optimizer=Adam(5e-5))
model.fit(train_dataset)
319/24543 [..............................] - ETA: 16:07 - loss: 0.9718
input_ids = batch["input_ids"].numpy() tokenizer.decode(input_ids[0])
labels = batch["labels"].numpy() label = labels[0]
for batch in train_dataset: break # Đảm bảo rằng bạn đã chạy model.compile() và đặt trình tối ưu hóa của mình, # và mất mát/chỉ số của bạn nếu bạn đang sử dụng chúng model.fit(batch, epochs=20)