CoCalc -- draft.py

GitHub Repository: aamini/introtodeeplearning
Path: blob/master/xtra_labs/llm_finetune/draft.py
⁵⁴⁹ views
1
"""
2
Drafting lab flow in script format using PyTorch
3
"""
4
from datasets import load_dataset
5
import math
6
import numpy as np
7
import pandas as pd
8
import random
9
import torch
10
import torch.nn as nn
11
import torch.nn.functional as F
12
from torch.nn import CrossEntropyLoss
13
from torch.optim import Adam
14
import transformers
15
from trl import SFTTrainer
16
from tqdm import tqdm
17

18
from utils import run_benchmark, make_spider_plot
19

20
# Part 1
21

22
# TEXT: overview of LLM lab
23
# Load pretrained LLM (medium size model)
24

25
# model_name = "facebook/opt-1.3b"
26
model_name = "facebook/opt-125m"
27
model = transformers.AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
28
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
29

30
# TEXT: explain tokenizer
31
# Include cell for tokenizer inspection
32

33
# TEXT: explain how LLMs are trained for next token prediction 
34
# Write a function to predict next token
35
def predict_next_token(probs, tokenizer):
36
    new_token = np.random.choice(len(probs), p=probs.numpy())
37
    print(tokenizer.decode(new_token), end='', flush=True)
38
    return new_token
39

40
# TEXT: explain that next token prediction must be called multiple times for inference
41
# Call in loop for autoregressive inference
42
def generate(start_text, model, tokenizer, num_steps=20, temp=1.): 
43
    print(start_text, end="")
44
    x = tokenizer.encode(start_text)
45
    num_start = len(x)
46

47
    for i in range(num_steps):
48
        input_tensor = torch.tensor(x).view(1, -1).to("cuda")
49
        logits = model(input_tensor).logits
50
        probs = F.softmax(logits/temp, dim=-1)[0, -1, :].cpu().detach()
51

52
        new_token = predict_next_token(probs, tokenizer)
53
        x.append(new_token)
54
    
55
    output = tokenizer.decode(x[num_start:])
56
    return output
57

58
def generate_pt(model, tokenizer, text, num_steps=50, until=None, temp=1.): 
59
    device = model.device
60
    print(text, end='', flush=True)
61
    x = tokenizer.encode(text)
62
    enc_until = tokenizer.encode(until)[1:]
63
    num_start = len(x)
64

65
    decoded = tokenizer.decode(x)
66

67
    for step in range(num_steps): 
68
        with torch.no_grad():
69
            input_tensor = torch.reshape(torch.LongTensor(x), [1, -1]).to(device)
70
            logits = model(input_tensor).logits
71
            probs = F.softmax(logits/temp, dim=-1)[0, -1, :]
72
        probs = probs.detach().cpu().numpy()
73

74
        new_token = np.random.choice(len(probs), p=probs)
75
        x.append(new_token)
76

77
        new_decoded = tokenizer.decode(x)
78
        new_part = new_decoded[len(decoded):]
79
        decoded = new_decoded
80

81
        print(new_part, end='', flush=True)
82
        text += new_part
83

84
        if len(x) >= len(until) and text[-len(until):] == until:
85
            break 
86
        
87
    
88
    output = tokenizer.decode(x[num_start:])
89
    print("\n", flush=True)
90
    return output
91

92
# Test autoregressive generation
93
# while True: 
94
#     print("\n\n\n\n\n")
95
#     input_text = input("Prompt: ")
96
#     output = generate(input_text, model, tokenizer)
97

98
# TEXT: some background on LLM benchmarking
99
# Load benchmark dataset and evaluate model
100
benchmark_dataset = pd.read_csv("benchmark.csv")
101
# category_accs_1300m, avg_acc_1300m = run_benchmark(model, tokenizer, benchmark_dataset)
102

103
# TEXT: ask them to make a prediction on how accuracy will be affected by different model sizes
104

105
# Benchmark smaller model
106
# model_name_350m = "facebook/opt-350m" 
107
# model_350m = transformers.AutoModelForCausalLM.from_pretrained(model_name_350m, device_map="auto")
108
# tokenizer_350m = transformers.AutoTokenizer.from_pretrained(model_name_350m)
109

110
# category_accs_350m, avg_acc_350m = run_benchmark(model_350m, tokenizer_350m, benchmark_dataset)
111

112
# Benchmark larger model
113
# model_name_2700m = "facebook/opt-2.7b" 
114
# model_2700m = transformers.AutoModelForCausalLM.from_pretrained(model_name_2700m, device_map="auto")
115
# tokenizer_2700m = transformers.AutoTokenizer.from_pretrained(model_name_2700m)
116

117
# category_accs_2700m, avg_acc_2700m = run_benchmark(model_2700m, tokenizer_2700m, benchmark_dataset)
118

119
# Spider plot
120

121
# benchmark_data = {"350M-Model": category_accs_350m, "1300M-Model": category_accs_1300m, "2700M-Model": category_accs_2700m}
122
# benchmark_data = {"350M-Model": category_accs_1300m}
123
# make_spider_plot(benchmark_data)
124

125
def print_lora_params(module, layer_type):
126
    summ = 0
127
    for name, child in module.named_children():
128
        if isinstance(child, layer_type):
129
            num_params = sum(p.numel() for p in child.parameters() if p.requires_grad)
130

131
            print(name, num_params, child.in_features, child.out_features, (child.in_features * 8 + child.out_features * 8 == num_params))
132
            
133
            summ += num_params
134
        else:
135
            summ += print_lora_params(child, layer_type)
136
    
137
    return summ
138

139
# Part 2
140

141
# inspect current model
142
# print(model)
143

144
# summ = print_lora_params(model, nn.Linear)
145

146
# print("with function", summ)
147

148
# print("without function", sum(p.numel() for p in model.parameters() if p.requires_grad))
149

150
# # freeze all parameter gradients
151
for param in model.parameters():
152
    param.requires_grad = False
153

154
# new LoRA linear layer class
155
class LoRALinear(nn.Module):
156
    def __init__(
157
            self, 
158
            in_features: int,
159
            out_features: int,
160
            pretrained_weight: torch.Tensor,
161
            pretrained_bias: torch.Tensor,
162
            r: int = 8,
163
            lora_alpha: int = 8,
164
            lora_dropout: float = 0.1,
165
            **kwargs
166
    ):
167
        super(LoRALinear, self).__init__()
168

169
        self.r = r
170
        self.in_features = in_features
171
        self.out_features = out_features
172
        self.lora_alpha = lora_alpha
173

174
        self.weight = nn.Parameter(pretrained_weight)
175
        self.weight.requires_grad = False
176

177
        if pretrained_bias is not None:
178
            self.bias = nn.Parameter(pretrained_bias)
179
            self.bias.requires_grad = False
180
        else:
181
            self.bias = None
182

183
        # from https://github.com/microsoft/LoRA/blob/main/loralib/layers.py
184
        self.lora_A = nn.Parameter(self.weight.new_zeros((r, in_features)))
185
        self.lora_B = nn.Parameter(self.weight.new_zeros((out_features, r)))
186
        self.scaling = self.lora_alpha / self.r
187
        self.lora_dropout = nn.Dropout(p=lora_dropout)
188
        
189
    def forward(self, x: torch.Tensor):
190
        result = F.linear(x, self.weight, bias=self.bias)
191
        result += (self.lora_dropout(x) @ self.lora_A.transpose(0, 1) @ self.lora_B.transpose(0, 1)) * self.scaling
192
        return result
193

194
# replace linear layers in model recursively
195
def replace_linear_with_lora(module):
196
    for name, child in module.named_children():
197
        if isinstance(child, nn.Linear):
198
            setattr(module, name, LoRALinear(child.in_features, child.out_features, child.weight, child.bias))
199
        else:
200
            replace_linear_with_lora(child)
201

202
replace_linear_with_lora(model)
203

204

205

206
# summ = print_lora_params(model, LoRALinear)
207

208
# print("with function", summ)
209

210
# print("without function", sum(p.numel() for p in model.parameters() if p.requires_grad))
211

212

213
# inspect new model
214
# print(model)
215

216
# load chat dataset
217
dataset_name = "timdettmers/openassistant-guanaco"
218
ft_dataset = load_dataset(dataset_name, split="train")
219

220
# train model (barebones loop)
221
context_length = 768
222
loss_fn = CrossEntropyLoss()
223

224
learning_rate = 1e-4
225
optimizer = Adam(model.parameters(), lr=learning_rate)
226
num_epochs = 5
227

228
model = model.to("cuda")
229

230
### Train the model 
231
# Define some training args
232
args = transformers.TrainingArguments("/home/dnori/introtodeeplearning/xtra_labs/llm_finetune/outputs", 
233
    per_device_train_batch_size=1, 
234
    logging_first_step=True,
235
    logging_steps=20,
236
    save_steps=100,
237
)
238

239
# Define a callback to check the progress on a sample question
240
class PrinterCallback(transformers.TrainerCallback):
241
    def on_log(self, args, state, control, model, logs=None, **kwargs):
242
        start_text = "### Human: When the weather is sunny, what color is the sky?### Assistant:"
243
        generate_pt(model, tokenizer, start_text, num_steps=200, until="###")
244

245
# Actually train the model
246
trainer = SFTTrainer(
247
    model,
248
    args=args,
249
    train_dataset=ft_dataset,
250
    dataset_text_field="text",
251
    max_seq_length=context_length,
252
    callbacks=[PrinterCallback()]
253
)
254
trainer.train()
255

256

257
# for epoch in range(num_epochs):
258
#     total_loss = 0
259
#     num_batches = 0
260

261
#     for batch in tqdm(ft_dataset):
262
#         prompt = batch["text"]
263
        
264
#         # encode with tokenizer
265
#         x = tokenizer.encode(prompt)
266
#         x_tensor = torch.tensor(x).view(1, -1).to("cuda")
267
#         max_len = min(context_length, x_tensor.shape[1]-1)
268
#         selected_len = random.randint(1,max_len)
269

270
#         input_tensor = x_tensor[:,:selected_len]
271
#         target_tensor = x_tensor[0,1:selected_len+1]
272

273
#          # zero gradients
274
#         optimizer.zero_grad()
275

276
#         # run through model
277
#         logits = model(input_tensor).logits[0]
278

279
#         # apply loss
280
#         loss = loss_fn(logits, target_tensor)
281

282
#         # backpropagation
283
#         loss.backward()
284

285
#         # optimizer step
286
#         optimizer.step()
287

288
#         total_loss += loss.item()
289
#         num_batches += 1
290

291
#     # Print average loss for the epoch
292
#     average_loss = total_loss / num_batches
293
#     print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {average_loss}")
294

295
# # evaluate finetuned model on benchmark
296
# category_accs_1300m_ft, avg_acc_1300m_ft = run_benchmark(model, tokenizer, benchmark_dataset)
297

298
# add to spider plot 
299
# benchmark_data = {"350M-Model": category_accs_350m, "1300M-Model": category_accs_1300m, "1300M-Model-Finetuned": category_accs_1300m_ft, "2700M-Model": category_accs_2700m}
300
# benchmark_data = {"350M-Model": category_accs_1300m, "350M-Model-Finetuned": category_accs_1300m_ft}
301
# make_spider_plot(benchmark_data)
302
Product

Resources

Company