Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
aamini
GitHub Repository: aamini/introtodeeplearning
Path: blob/master/xtra_labs/llm_finetune/draft.py
549 views
1
"""
2
Drafting lab flow in script format using PyTorch
3
"""
4
from datasets import load_dataset
5
import math
6
import numpy as np
7
import pandas as pd
8
import random
9
import torch
10
import torch.nn as nn
11
import torch.nn.functional as F
12
from torch.nn import CrossEntropyLoss
13
from torch.optim import Adam
14
import transformers
15
from trl import SFTTrainer
16
from tqdm import tqdm
17
18
from utils import run_benchmark, make_spider_plot
19
20
# Part 1
21
22
# TEXT: overview of LLM lab
23
# Load pretrained LLM (medium size model)
24
25
# model_name = "facebook/opt-1.3b"
26
model_name = "facebook/opt-125m"
27
model = transformers.AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
28
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
29
30
# TEXT: explain tokenizer
31
# Include cell for tokenizer inspection
32
33
# TEXT: explain how LLMs are trained for next token prediction
34
# Write a function to predict next token
35
def predict_next_token(probs, tokenizer):
36
new_token = np.random.choice(len(probs), p=probs.numpy())
37
print(tokenizer.decode(new_token), end='', flush=True)
38
return new_token
39
40
# TEXT: explain that next token prediction must be called multiple times for inference
41
# Call in loop for autoregressive inference
42
def generate(start_text, model, tokenizer, num_steps=20, temp=1.):
43
print(start_text, end="")
44
x = tokenizer.encode(start_text)
45
num_start = len(x)
46
47
for i in range(num_steps):
48
input_tensor = torch.tensor(x).view(1, -1).to("cuda")
49
logits = model(input_tensor).logits
50
probs = F.softmax(logits/temp, dim=-1)[0, -1, :].cpu().detach()
51
52
new_token = predict_next_token(probs, tokenizer)
53
x.append(new_token)
54
55
output = tokenizer.decode(x[num_start:])
56
return output
57
58
def generate_pt(model, tokenizer, text, num_steps=50, until=None, temp=1.):
59
device = model.device
60
print(text, end='', flush=True)
61
x = tokenizer.encode(text)
62
enc_until = tokenizer.encode(until)[1:]
63
num_start = len(x)
64
65
decoded = tokenizer.decode(x)
66
67
for step in range(num_steps):
68
with torch.no_grad():
69
input_tensor = torch.reshape(torch.LongTensor(x), [1, -1]).to(device)
70
logits = model(input_tensor).logits
71
probs = F.softmax(logits/temp, dim=-1)[0, -1, :]
72
probs = probs.detach().cpu().numpy()
73
74
new_token = np.random.choice(len(probs), p=probs)
75
x.append(new_token)
76
77
new_decoded = tokenizer.decode(x)
78
new_part = new_decoded[len(decoded):]
79
decoded = new_decoded
80
81
print(new_part, end='', flush=True)
82
text += new_part
83
84
if len(x) >= len(until) and text[-len(until):] == until:
85
break
86
87
88
output = tokenizer.decode(x[num_start:])
89
print("\n", flush=True)
90
return output
91
92
# Test autoregressive generation
93
# while True:
94
# print("\n\n\n\n\n")
95
# input_text = input("Prompt: ")
96
# output = generate(input_text, model, tokenizer)
97
98
# TEXT: some background on LLM benchmarking
99
# Load benchmark dataset and evaluate model
100
benchmark_dataset = pd.read_csv("benchmark.csv")
101
# category_accs_1300m, avg_acc_1300m = run_benchmark(model, tokenizer, benchmark_dataset)
102
103
# TEXT: ask them to make a prediction on how accuracy will be affected by different model sizes
104
105
# Benchmark smaller model
106
# model_name_350m = "facebook/opt-350m"
107
# model_350m = transformers.AutoModelForCausalLM.from_pretrained(model_name_350m, device_map="auto")
108
# tokenizer_350m = transformers.AutoTokenizer.from_pretrained(model_name_350m)
109
110
# category_accs_350m, avg_acc_350m = run_benchmark(model_350m, tokenizer_350m, benchmark_dataset)
111
112
# Benchmark larger model
113
# model_name_2700m = "facebook/opt-2.7b"
114
# model_2700m = transformers.AutoModelForCausalLM.from_pretrained(model_name_2700m, device_map="auto")
115
# tokenizer_2700m = transformers.AutoTokenizer.from_pretrained(model_name_2700m)
116
117
# category_accs_2700m, avg_acc_2700m = run_benchmark(model_2700m, tokenizer_2700m, benchmark_dataset)
118
119
# Spider plot
120
121
# benchmark_data = {"350M-Model": category_accs_350m, "1300M-Model": category_accs_1300m, "2700M-Model": category_accs_2700m}
122
# benchmark_data = {"350M-Model": category_accs_1300m}
123
# make_spider_plot(benchmark_data)
124
125
def print_lora_params(module, layer_type):
126
summ = 0
127
for name, child in module.named_children():
128
if isinstance(child, layer_type):
129
num_params = sum(p.numel() for p in child.parameters() if p.requires_grad)
130
131
print(name, num_params, child.in_features, child.out_features, (child.in_features * 8 + child.out_features * 8 == num_params))
132
133
summ += num_params
134
else:
135
summ += print_lora_params(child, layer_type)
136
137
return summ
138
139
# Part 2
140
141
# inspect current model
142
# print(model)
143
144
# summ = print_lora_params(model, nn.Linear)
145
146
# print("with function", summ)
147
148
# print("without function", sum(p.numel() for p in model.parameters() if p.requires_grad))
149
150
# # freeze all parameter gradients
151
for param in model.parameters():
152
param.requires_grad = False
153
154
# new LoRA linear layer class
155
class LoRALinear(nn.Module):
156
def __init__(
157
self,
158
in_features: int,
159
out_features: int,
160
pretrained_weight: torch.Tensor,
161
pretrained_bias: torch.Tensor,
162
r: int = 8,
163
lora_alpha: int = 8,
164
lora_dropout: float = 0.1,
165
**kwargs
166
):
167
super(LoRALinear, self).__init__()
168
169
self.r = r
170
self.in_features = in_features
171
self.out_features = out_features
172
self.lora_alpha = lora_alpha
173
174
self.weight = nn.Parameter(pretrained_weight)
175
self.weight.requires_grad = False
176
177
if pretrained_bias is not None:
178
self.bias = nn.Parameter(pretrained_bias)
179
self.bias.requires_grad = False
180
else:
181
self.bias = None
182
183
# from https://github.com/microsoft/LoRA/blob/main/loralib/layers.py
184
self.lora_A = nn.Parameter(self.weight.new_zeros((r, in_features)))
185
self.lora_B = nn.Parameter(self.weight.new_zeros((out_features, r)))
186
self.scaling = self.lora_alpha / self.r
187
self.lora_dropout = nn.Dropout(p=lora_dropout)
188
189
def forward(self, x: torch.Tensor):
190
result = F.linear(x, self.weight, bias=self.bias)
191
result += (self.lora_dropout(x) @ self.lora_A.transpose(0, 1) @ self.lora_B.transpose(0, 1)) * self.scaling
192
return result
193
194
# replace linear layers in model recursively
195
def replace_linear_with_lora(module):
196
for name, child in module.named_children():
197
if isinstance(child, nn.Linear):
198
setattr(module, name, LoRALinear(child.in_features, child.out_features, child.weight, child.bias))
199
else:
200
replace_linear_with_lora(child)
201
202
replace_linear_with_lora(model)
203
204
205
206
# summ = print_lora_params(model, LoRALinear)
207
208
# print("with function", summ)
209
210
# print("without function", sum(p.numel() for p in model.parameters() if p.requires_grad))
211
212
213
# inspect new model
214
# print(model)
215
216
# load chat dataset
217
dataset_name = "timdettmers/openassistant-guanaco"
218
ft_dataset = load_dataset(dataset_name, split="train")
219
220
# train model (barebones loop)
221
context_length = 768
222
loss_fn = CrossEntropyLoss()
223
224
learning_rate = 1e-4
225
optimizer = Adam(model.parameters(), lr=learning_rate)
226
num_epochs = 5
227
228
model = model.to("cuda")
229
230
### Train the model
231
# Define some training args
232
args = transformers.TrainingArguments("/home/dnori/introtodeeplearning/xtra_labs/llm_finetune/outputs",
233
per_device_train_batch_size=1,
234
logging_first_step=True,
235
logging_steps=20,
236
save_steps=100,
237
)
238
239
# Define a callback to check the progress on a sample question
240
class PrinterCallback(transformers.TrainerCallback):
241
def on_log(self, args, state, control, model, logs=None, **kwargs):
242
start_text = "### Human: When the weather is sunny, what color is the sky?### Assistant:"
243
generate_pt(model, tokenizer, start_text, num_steps=200, until="###")
244
245
# Actually train the model
246
trainer = SFTTrainer(
247
model,
248
args=args,
249
train_dataset=ft_dataset,
250
dataset_text_field="text",
251
max_seq_length=context_length,
252
callbacks=[PrinterCallback()]
253
)
254
trainer.train()
255
256
257
# for epoch in range(num_epochs):
258
# total_loss = 0
259
# num_batches = 0
260
261
# for batch in tqdm(ft_dataset):
262
# prompt = batch["text"]
263
264
# # encode with tokenizer
265
# x = tokenizer.encode(prompt)
266
# x_tensor = torch.tensor(x).view(1, -1).to("cuda")
267
# max_len = min(context_length, x_tensor.shape[1]-1)
268
# selected_len = random.randint(1,max_len)
269
270
# input_tensor = x_tensor[:,:selected_len]
271
# target_tensor = x_tensor[0,1:selected_len+1]
272
273
# # zero gradients
274
# optimizer.zero_grad()
275
276
# # run through model
277
# logits = model(input_tensor).logits[0]
278
279
# # apply loss
280
# loss = loss_fn(logits, target_tensor)
281
282
# # backpropagation
283
# loss.backward()
284
285
# # optimizer step
286
# optimizer.step()
287
288
# total_loss += loss.item()
289
# num_batches += 1
290
291
# # Print average loss for the epoch
292
# average_loss = total_loss / num_batches
293
# print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {average_loss}")
294
295
# # evaluate finetuned model on benchmark
296
# category_accs_1300m_ft, avg_acc_1300m_ft = run_benchmark(model, tokenizer, benchmark_dataset)
297
298
# add to spider plot
299
# benchmark_data = {"350M-Model": category_accs_350m, "1300M-Model": category_accs_1300m, "1300M-Model-Finetuned": category_accs_1300m_ft, "2700M-Model": category_accs_2700m}
300
# benchmark_data = {"350M-Model": category_accs_1300m, "350M-Model-Finetuned": category_accs_1300m_ft}
301
# make_spider_plot(benchmark_data)
302