CoCalc -- gpt2.zh.json

GitHub Repository: labmlai/annotated_deep_learning_paper_implementations
Path: blob/master/translate_cache/lora/gpt2.zh.json
⁴⁹²³ views
1
{
2
 "<h1>GPT-2 with <a href=\"index.html\">LoRA modules</a></h1>\n<p>Here&#x27;s <a href=\"experiment.html\">the training code</a> for training a GPT2 model with LoRA  on Tiny Shakespeare dataset.</p>\n": "<h1>GPT-2 with <a href=\"index.html\">LoRA modules</a></h1>\n<p>Here&#x27;s <a href=\"experiment.html\">the training code</a> for training a GPT2 model with LoRA  on Tiny Shakespeare dataset.</p>\n",
3
 "<h2>GPT2 Model</h2>\n": "<h2>GPT2 Model</h2>\n",
4
 "<h3>Decoder block</h3>\n": "<h3>Decoder block</h3>\n",
5
 "<h3>Feedforward Network</h3>\n": "<h3>Feedforward Network</h3>\n",
6
 "<h3>Multi-Head Attention</h3>\n": "<h3>Multi-Head Attention</h3>\n",
7
 "<p>Add position embeddings </p>\n": "<p>Add position embeddings </p>\n",
8
 "<p>Apply causal attention </p>\n": "<p>Apply causal attention </p>\n",
9
 "<p>Attention </p>\n": "<p>Attention </p>\n",
10
 "<p>Attention layer </p>\n": "<p>Attention layer </p>\n",
11
 "<p>Attention pre-normalization layer </p>\n": "<p>Attention pre-normalization layer </p>\n",
12
 "<p>Decoder blocks </p>\n": "<p>Decoder blocks </p>\n",
13
 "<p>FFN </p>\n": "<p>FFN </p>\n",
14
 "<p>FFN pre-normalization layer </p>\n": "<p>FFN pre-normalization layer </p>\n",
15
 "<p>Feed-forward network </p>\n": "<p>Feed-forward network </p>\n",
16
 "<p>Final layer norm </p>\n": "<p>Final layer norm </p>\n",
17
 "<p>Final normalization </p>\n": "<p>Final normalization </p>\n",
18
 "<p>Final project </p>\n": "<p>Final project </p>\n",
19
 "<p>Get logits from projection layer </p>\n": "<p>Get logits from projection layer </p>\n",
20
 "<p>Get position embeddings </p>\n": "<p>Get position embeddings </p>\n",
21
 "<p>Get position ids </p>\n": "<p>Get position ids </p>\n",
22
 "<p>Get query, key and value </p>\n": "<p>Get query, key and value </p>\n",
23
 "<p>Get token embeddings </p>\n": "<p>Get token embeddings </p>\n",
24
 "<p>Linear transformation for QKV </p>\n": "<p>Linear transformation for QKV </p>\n",
25
 "<p>Output projection </p>\n": "<p>Output projection </p>\n",
26
 "<p>Projection layer to logit space </p>\n": "<p>Projection layer to logit space </p>\n",
27
 "<p>Reorder to <span translate=no>_^_0_^_</span> </p>\n": "<p>Reorder to <span translate=no>_^_0_^_</span> </p>\n",
28
 "<p>Run through transformer blocks </p>\n": "<p>Run through transformer blocks </p>\n",
29
 "<p>Split last dimension to <span translate=no>_^_0_^_</span> </p>\n": "<p>Split last dimension to <span translate=no>_^_0_^_</span> </p>\n",
30
 "<p>The linear layers and the activation </p>\n": "<p>The linear layers and the activation </p>\n",
31
 "<p>Token and absolute positional embeddings </p>\n": "<p>Token and absolute positional embeddings </p>\n",
32
 "<p>Transform them from shape <span translate=no>_^_0_^_</span> to <span translate=no>_^_1_^_</span> </p>\n": "<p>Transform them from shape <span translate=no>_^_0_^_</span> to <span translate=no>_^_1_^_</span> </p>\n",
33
 "<ul><li><span translate=no>_^_0_^_</span>  has shape <span translate=no>_^_1_^_</span></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>  has shape <span translate=no>_^_1_^_</span></li></ul>\n",
34
 "<ul><li><span translate=no>_^_0_^_</span>  is the embeddings tensor with shape <span translate=no>_^_1_^_</span></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>  is the embeddings tensor with shape <span translate=no>_^_1_^_</span></li></ul>\n",
35
 "<ul><li><span translate=no>_^_0_^_</span>  is the number of dimensions </li>\n<li><span translate=no>_^_1_^_</span>  is the size of the hidden dimension </li>\n<li><span translate=no>_^_2_^_</span>  is the lora rank</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>  is the number of dimensions </li>\n<li><span translate=no>_^_1_^_</span>  is the size of the hidden dimension </li>\n<li><span translate=no>_^_2_^_</span>  is the lora rank</li></ul>\n",
36
 "<ul><li><span translate=no>_^_0_^_</span>  is the number of dimensions in the embeddings </li>\n<li><span translate=no>_^_1_^_</span>  is the number of attention heads </li>\n<li><span translate=no>_^_2_^_</span>  is the number of decoder layers </li>\n<li><span translate=no>_^_3_^_</span>  is the number of positional embeddings </li>\n<li><span translate=no>_^_4_^_</span>  is the layer norm epsilon </li>\n<li><span translate=no>_^_5_^_</span>  is the vocabulary size </li>\n<li><span translate=no>_^_6_^_</span>  is the lora rank</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>  is the number of dimensions in the embeddings </li>\n<li><span translate=no>_^_1_^_</span>  is the number of attention heads </li>\n<li><span translate=no>_^_2_^_</span>  is the number of decoder layers </li>\n<li><span translate=no>_^_3_^_</span>  is the number of positional embeddings </li>\n<li><span translate=no>_^_4_^_</span>  is the layer norm epsilon </li>\n<li><span translate=no>_^_5_^_</span>  is the vocabulary size </li>\n<li><span translate=no>_^_6_^_</span>  is the lora rank</li></ul>\n",
37
 "<ul><li><span translate=no>_^_0_^_</span>  is the number of dimensions in the embeddings </li>\n<li><span translate=no>_^_1_^_</span>  is the number of heads </li>\n<li><span translate=no>_^_2_^_</span>  is the layer norm epsilon </li>\n<li><span translate=no>_^_3_^_</span>  is the lora rank</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>  is the number of dimensions in the embeddings </li>\n<li><span translate=no>_^_1_^_</span>  is the number of heads </li>\n<li><span translate=no>_^_2_^_</span>  is the layer norm epsilon </li>\n<li><span translate=no>_^_3_^_</span>  is the lora rank</li></ul>\n",
38
 "<ul><li><span translate=no>_^_0_^_</span>  is the number of dimensions in the embeddings </li>\n<li><span translate=no>_^_1_^_</span>  is the number of heads </li>\n<li><span translate=no>_^_2_^_</span>  is the lora rank</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>  is the number of dimensions in the embeddings </li>\n<li><span translate=no>_^_1_^_</span>  is the number of heads </li>\n<li><span translate=no>_^_2_^_</span>  is the lora rank</li></ul>\n",
39
 "<ul><li><span translate=no>_^_0_^_</span>  is the tensor with shape <span translate=no>_^_1_^_</span></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>  is the tensor with shape <span translate=no>_^_1_^_</span></li></ul>\n",
40
 "GPT-2 implementation with LoRA modules": "GPT-2 implementation with LoRA modules",
41
 "GPT-2 with LoRA": "GPT-2 with LoRA"
42
}
43
Product

Resources

Company