Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Path: blob/main/course/vi/chapter8/section4_tf.ipynb
Views: 2546
Kernel: Unknown Kernel
Gỡ lỗi quy trình huấn luyện
Install the Transformers, Datasets, and Evaluate libraries to run this notebook.
In [ ]:
In [ ]:
ValueError: No gradients provided for any variable: ['tf_distil_bert_for_sequence_classification/distilbert/embeddings/word_embeddings/weight:0', '...']
In [ ]:
{'attention_mask': <tf.Tensor: shape=(16, 76), dtype=int64, numpy=
array([[1, 1, 1, ..., 0, 0, 0],
[1, 1, 1, ..., 0, 0, 0],
[1, 1, 1, ..., 0, 0, 0],
...,
[1, 1, 1, ..., 1, 1, 1],
[1, 1, 1, ..., 0, 0, 0],
[1, 1, 1, ..., 0, 0, 0]])>,
'label': <tf.Tensor: shape=(16,), dtype=int64, numpy=array([0, 2, 1, 2, 1, 1, 2, 0, 0, 0, 1, 0, 1, 2, 2, 1])>,
'input_ids': <tf.Tensor: shape=(16, 76), dtype=int64, numpy=
array([[ 101, 2174, 1010, ..., 0, 0, 0],
[ 101, 3174, 2420, ..., 0, 0, 0],
[ 101, 2044, 2048, ..., 0, 0, 0],
...,
[ 101, 3398, 3398, ..., 2051, 2894, 102],
[ 101, 1996, 4124, ..., 0, 0, 0],
[ 101, 1999, 2070, ..., 0, 0, 0]])>}
In [ ]:
246/24543 [..............................] - ETA: 15:52 - loss: nan
In [ ]:
TFSequenceClassifierOutput(loss=<tf.Tensor: shape=(16,), dtype=float32, numpy=
array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan], dtype=float32)>, logits=<tf.Tensor: shape=(16, 2), dtype=float32, numpy=
array([[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan]], dtype=float32)>, hidden_states=None, attentions=None)
In [ ]:
TFSequenceClassifierOutput(loss=<tf.Tensor: shape=(16,), dtype=float32, numpy=
array([0.6844486 , nan, nan, 0.67127866, 0.7068601 ,
nan, 0.69309855, nan, 0.65531296, nan,
nan, nan, 0.675402 , nan, nan,
0.69831556], dtype=float32)>, logits=<tf.Tensor: shape=(16, 2), dtype=float32, numpy=
array([[-0.04761693, -0.06509043],
[-0.0481936 , -0.04556257],
[-0.0040929 , -0.05848458],
[-0.02417453, -0.0684005 ],
[-0.02517801, -0.05241832],
[-0.04514256, -0.0757378 ],
[-0.02656011, -0.02646275],
[ 0.00766164, -0.04350497],
[ 0.02060014, -0.05655622],
[-0.02615328, -0.0447021 ],
[-0.05119278, -0.06928903],
[-0.02859691, -0.04879177],
[-0.02210129, -0.05791225],
[-0.02363213, -0.05962167],
[-0.05352269, -0.0481673 ],
[-0.08141848, -0.07110836]], dtype=float32)>, hidden_states=None, attentions=None)
In [ ]:
array([ 1, 2, 5, 7, 9, 10, 11, 13, 14])
In [ ]:
array([[ 101, 2007, 2032, 2001, 1037, 16480, 3917, 2594, 4135,
23212, 3070, 2214, 10170, 1010, 2012, 4356, 1997, 3183,
6838, 12953, 2039, 2000, 1996, 6147, 1997, 2010, 2606,
1012, 102, 6838, 2001, 3294, 6625, 3773, 1996, 2214,
2158, 1012, 102, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0],
[ 101, 1998, 6814, 2016, 2234, 2461, 2153, 1998, 13322,
2009, 1012, 102, 2045, 1005, 1055, 2053, 3382, 2008,
2016, 1005, 2222, 3046, 8103, 2075, 2009, 2153, 1012,
102, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0],
[ 101, 1998, 2007, 1996, 3712, 4634, 1010, 2057, 8108,
2025, 3404, 2028, 1012, 1996, 2616, 18449, 2125, 1999,
1037, 9666, 1997, 4100, 8663, 11020, 6313, 2791, 1998,
2431, 1011, 4301, 1012, 102, 2028, 1005, 1055, 5177,
2110, 1998, 3977, 2000, 2832, 2106, 2025, 2689, 2104,
2122, 6214, 1012, 102, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0],
[ 101, 1045, 2001, 1999, 1037, 13090, 5948, 2007, 2048,
2308, 2006, 2026, 5001, 2043, 2026, 2171, 2001, 2170,
1012, 102, 1045, 2001, 3564, 1999, 2277, 1012, 102,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0],
[ 101, 2195, 4279, 2191, 2039, 1996, 2181, 2124, 2004,
1996, 2225, 7363, 1012, 102, 2045, 2003, 2069, 2028,
2451, 1999, 1996, 2225, 7363, 1012, 102, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0],
[ 101, 2061, 2008, 1045, 2123, 1005, 1056, 2113, 2065,
2009, 2428, 10654, 7347, 2030, 2009, 7126, 2256, 2495,
2291, 102, 2009, 2003, 5094, 2256, 2495, 2291, 2035,
2105, 1012, 102, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0],
[ 101, 2051, 1010, 2029, 3216, 2019, 2503, 3444, 1010,
6732, 1996, 2265, 2038, 19840, 2098, 2125, 9906, 1998,
2003, 2770, 2041, 1997, 4784, 1012, 102, 2051, 6732,
1996, 2265, 2003, 9525, 1998, 4569, 1012, 102, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0],
[ 101, 1996, 10556, 2140, 11515, 2058, 1010, 2010, 2162,
2252, 5689, 2013, 2010, 7223, 1012, 102, 2043, 1996,
10556, 2140, 11515, 2058, 1010, 2010, 2252, 3062, 2000,
1996, 2598, 1012, 102, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0],
[ 101, 13543, 1999, 2049, 6143, 2933, 2443, 102, 2025,
13543, 1999, 6143, 2933, 2003, 2443, 102, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0]])
In [ ]:
2
In [ ]:
In [ ]:
319/24543 [..............................] - ETA: 16:07 - loss: 0.9718
In [ ]:
In [ ]:
In [ ]: