Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
KoboldAI
GitHub Repository: KoboldAI/KoboldAI-Client
Path: blob/main/prompt_tuner.py
471 views
1
import abc
2
import os
3
import sys
4
import math
5
import numpy as np
6
import termcolor
7
import contextlib
8
import traceback
9
import random
10
import zipfile
11
import json
12
import uuid
13
import datetime
14
import base64
15
import pickle
16
import hashlib
17
import itertools
18
import functools
19
import bisect
20
import eventlet
21
import packaging
22
import gc
23
import time
24
from tqdm.auto import tqdm
25
import torch
26
import torch.nn.functional as F
27
from torch.nn import Embedding, CrossEntropyLoss
28
import transformers
29
from transformers import __version__ as transformers_version
30
from transformers import AutoTokenizer, GPT2Tokenizer, AutoConfig, AutoModelForCausalLM, GPTNeoForCausalLM, PreTrainedModel, modeling_utils
31
import accelerate
32
import accelerate.utils
33
from mkultra.tuning import GPTPromptTuningMixin, GPTNeoPromptTuningLM
34
from mkultra.soft_prompt import SoftPrompt
35
from typing import Dict, List, Optional, TextIO, Union
36
37
import logging
38
logging.getLogger("urllib3").setLevel(logging.ERROR)
39
40
import breakmodel
41
import torch_lazy_loader
42
import utils
43
44
use_breakmodel = True
45
46
47
class colors:
48
PURPLE = '\033[95m'
49
BLUE = '\033[94m'
50
CYAN = '\033[96m'
51
GREEN = '\033[92m'
52
YELLOW = '\033[93m'
53
RED = '\033[91m'
54
END = '\033[0m'
55
UNDERLINE = '\033[4m'
56
57
class Send_to_socketio(object):
58
def write(self, bar):
59
print(bar, end="")
60
time.sleep(0.01)
61
try:
62
if utils.emit is not None:
63
utils.emit('from_server', {'cmd': 'model_load_status', 'data': bar.replace(" ", " ")}, broadcast=True)
64
except:
65
pass
66
67
def patch_transformers_download():
68
global transformers
69
import copy, requests, tqdm, time
70
class Send_to_socketio(object):
71
def write(self, bar):
72
bar = bar.replace("\r", "").replace("\n", "")
73
if bar != "":
74
try:
75
print(bar, end="\r")
76
if utils.emit is not None:
77
utils.emit('from_server', {'cmd': 'model_load_status', 'data': bar.replace(" ", " ")}, broadcast=True)
78
eventlet.sleep(seconds=0)
79
except:
80
pass
81
def http_get(
82
url: str,
83
temp_file: transformers.utils.hub.BinaryIO,
84
proxies=None,
85
resume_size=0,
86
headers: transformers.utils.hub.Optional[transformers.utils.hub.Dict[str, str]] = None,
87
file_name: transformers.utils.hub.Optional[str] = None,
88
):
89
"""
90
Download remote file. Do not gobble up errors.
91
"""
92
headers = copy.deepcopy(headers)
93
if resume_size > 0:
94
headers["Range"] = f"bytes={resume_size}-"
95
r = requests.get(url, stream=True, proxies=proxies, headers=headers)
96
transformers.utils.hub._raise_for_status(r)
97
content_length = r.headers.get("Content-Length")
98
total = resume_size + int(content_length) if content_length is not None else None
99
# `tqdm` behavior is determined by `utils.logging.is_progress_bar_enabled()`
100
# and can be set using `utils.logging.enable/disable_progress_bar()`
101
if url[-11:] != 'config.json':
102
progress = tqdm.tqdm(
103
unit="B",
104
unit_scale=True,
105
unit_divisor=1024,
106
total=total,
107
initial=resume_size,
108
desc=f"Downloading {file_name}" if file_name is not None else "Downloading",
109
file=Send_to_socketio(),
110
)
111
for chunk in r.iter_content(chunk_size=1024):
112
if chunk: # filter out keep-alive new chunks
113
if url[-11:] != 'config.json':
114
progress.update(len(chunk))
115
temp_file.write(chunk)
116
if url[-11:] != 'config.json':
117
progress.close()
118
119
transformers.utils.hub.http_get = http_get
120
121
122
def patch_transformers():
123
global transformers
124
125
patch_transformers_download()
126
127
old_from_pretrained = PreTrainedModel.from_pretrained.__func__
128
@classmethod
129
def new_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
130
utils.num_shards = None
131
utils.current_shard = 0
132
utils.from_pretrained_model_name = pretrained_model_name_or_path
133
utils.from_pretrained_index_filename = None
134
utils.from_pretrained_kwargs = kwargs
135
utils.bar = None
136
if utils.args is None or not utils.args.no_aria2:
137
utils.aria2_hook(pretrained_model_name_or_path, **kwargs)
138
return old_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
139
if(not hasattr(PreTrainedModel, "_kai_patched")):
140
PreTrainedModel.from_pretrained = new_from_pretrained
141
PreTrainedModel._kai_patched = True
142
if(hasattr(modeling_utils, "get_checkpoint_shard_files")):
143
old_get_checkpoint_shard_files = modeling_utils.get_checkpoint_shard_files
144
def new_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs):
145
utils.num_shards = utils.get_num_shards(index_filename)
146
utils.from_pretrained_index_filename = index_filename
147
return old_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs)
148
modeling_utils.get_checkpoint_shard_files = new_get_checkpoint_shard_files
149
150
# Some versions of transformers 4.17.0.dev0 are affected by
151
# https://github.com/huggingface/transformers/issues/15736
152
# This is a workaround for those versions of transformers.
153
if(transformers_version == "4.17.0.dev0"):
154
try:
155
from transformers.models.xglm.modeling_xglm import XGLMSinusoidalPositionalEmbedding
156
except ImportError:
157
pass
158
else:
159
@torch.no_grad()
160
def new_forward(self, input_ids: torch.Tensor = None, inputs_embeds: torch.Tensor = None, past_key_values_length: int = 0):
161
bsz, seq_len = inputs_embeds.size()[:-1]
162
input_shape = inputs_embeds.size()[:-1]
163
sequence_length = input_shape[1]
164
position_ids = torch.arange(
165
past_key_values_length + self.padding_idx + 1, past_key_values_length + sequence_length + self.padding_idx + 1, dtype=torch.long, device=inputs_embeds.device
166
).unsqueeze(0).expand(input_shape).contiguous()
167
max_pos = self.padding_idx + 1 + seq_len + past_key_values_length
168
if max_pos > self.weights.size(0):
169
self.make_weights(max_pos + self.offset, self.embedding_dim, self.padding_idx)
170
return self.weights.index_select(0, position_ids.view(-1)).view(bsz, seq_len, -1).detach()
171
XGLMSinusoidalPositionalEmbedding.forward = new_forward
172
173
174
# Fix a bug in OPTForCausalLM where self.lm_head is the wrong size
175
if(packaging.version.parse("4.19.0.dev0") <= packaging.version.parse(transformers_version) < packaging.version.parse("4.20.0")):
176
try:
177
from transformers import OPTForCausalLM, OPTModel
178
except ImportError:
179
pass
180
else:
181
# This is the same as the original __init__ but with
182
# config.hidden_size
183
# replaced with
184
# config.word_embed_proj_dim
185
def new_init(self, config):
186
super(OPTForCausalLM, self).__init__(config)
187
self.model = OPTModel(config)
188
self.lm_head = torch.nn.Linear(config.word_embed_proj_dim, config.vocab_size, bias=False)
189
self.post_init()
190
OPTForCausalLM.__init__ = new_init
191
192
193
def device_list(n_layers, primary=None, selected=None):
194
device_count = torch.cuda.device_count()
195
if(device_count < 2):
196
primary = None
197
gpu_blocks = breakmodel.gpu_blocks + (device_count - len(breakmodel.gpu_blocks))*[0]
198
print(f"{colors.YELLOW} DEVICE ID | LAYERS | DEVICE NAME{colors.END}")
199
for i in range(device_count):
200
name = torch.cuda.get_device_name(i)
201
if(len(name) > 47):
202
name = "..." + name[-44:]
203
row_color = colors.END
204
sep_color = colors.YELLOW
205
print(f"{row_color}{colors.YELLOW + '->' + row_color if i == selected else ' '} {'(primary)' if i == primary else ' '*9} {i:3} {sep_color}|{row_color} {gpu_blocks[i]:3} {sep_color}|{row_color} {name}{colors.END}")
206
row_color = colors.END
207
sep_color = colors.YELLOW
208
print(f"{row_color}{colors.YELLOW + '->' + row_color if -1 == selected else ' '} {' '*9} N/A {sep_color}|{row_color} {breakmodel.disk_blocks:3} {sep_color}|{row_color} (Disk cache){colors.END}")
209
print(f"{row_color} {' '*9} N/A {sep_color}|{row_color} {n_layers:3} {sep_color}|{row_color} (CPU){colors.END}")
210
211
212
def move_model_to_devices(model, usegpu, gpu_device):
213
global generator
214
215
if(not use_breakmodel):
216
if(usegpu):
217
model = model.half().to(gpu_device)
218
else:
219
model = model.to('cpu').float()
220
generator = model.generate
221
return
222
223
for key, value in model.state_dict().items():
224
target_dtype = torch.float32 if breakmodel.primary_device == "cpu" else torch.float16
225
if(value.dtype is not target_dtype):
226
accelerate.utils.set_module_tensor_to_device(model, key, target_dtype)
227
disk_blocks = breakmodel.disk_blocks
228
gpu_blocks = breakmodel.gpu_blocks
229
ram_blocks = len(utils.layers_module_names) - sum(gpu_blocks)
230
cumulative_gpu_blocks = tuple(itertools.accumulate(gpu_blocks))
231
device_map = {}
232
for name in utils.layers_module_names:
233
layer = int(name.rsplit(".", 1)[1])
234
device = ("disk" if layer < disk_blocks else "cpu") if layer < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, layer - ram_blocks)
235
device_map[name] = device
236
for name in utils.get_missing_module_names(model, list(device_map.keys())):
237
device_map[name] = breakmodel.primary_device
238
breakmodel.dispatch_model_ex(model, device_map, main_device=breakmodel.primary_device, offload_buffers=True, offload_dir="accelerate-disk-cache")
239
gc.collect()
240
generator = model.generate
241
return
242
243
244
_PromptTuningPreTrainedModel = Union["UniversalPromptTuningMixin", GPTPromptTuningMixin, transformers.PreTrainedModel]
245
246
class _WTEDummy:
247
def __init__(self, model: transformers.PreTrainedModel):
248
self.model = model
249
250
@property
251
def wte(self: "_WTEDummy"):
252
return self.model.get_input_embeddings()
253
254
@wte.setter
255
def wte(self: "_WTEDummy", v):
256
self.model.set_input_embeddings(v)
257
258
class _WTEMixin:
259
@property
260
def wte(self: Union["_WTEMixin", transformers.PreTrainedModel]):
261
return self.get_input_embeddings()
262
263
@wte.setter
264
def wte(self: Union["_WTEMixin", transformers.PreTrainedModel], v):
265
self.set_input_embeddings(v)
266
267
268
class UniversalPromptTuningMixin:
269
@classmethod
270
def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
271
model: _PromptTuningPreTrainedModel = super().from_pretrained(pretrained_model_name_or_path, **kwargs)
272
273
if not hasattr(model, "transformer"):
274
model.transformer = _WTEDummy(model)
275
elif not hasattr(model.transformer, "wte"):
276
assert isinstance(model.transformer, type)
277
model.transformer.__class__ = type("_UniversalPromptTuning" + model.transformer.__class__.__name__, (_WTEMixin, model.transformer.__class__), {})
278
279
model.__class__ = type("_UniversalPromptTuning" + model.__class__.__name__, (UniversalPromptTuningMixin, model.__class__), {})
280
281
for param in model.parameters():
282
param.requires_grad = False
283
model.initialize_soft_prompt()
284
285
return model
286
287
def forward(
288
self: _PromptTuningPreTrainedModel,
289
input_ids: Optional[torch.Tensor] = None,
290
attention_mask: Optional[torch.Tensor] = None,
291
labels: Optional[torch.Tensor] = None,
292
use_cache: Optional[bool] = None,
293
return_dict: Optional[bool] = None,
294
**kwargs,
295
):
296
assert input_ids is not None
297
assert input_ids.ndim == 2
298
299
input_ids = F.pad(input_ids, (self.learned_embedding.size(0), 0, 0, 0), value=self.transformer.wte.weight.size(0) // 2)
300
301
if labels is not None:
302
labels = self._extend_labels(labels)
303
304
if attention_mask is not None:
305
attention_mask = self._extend_attention_mask(attention_mask)
306
307
old_embedding_call = Embedding.__call__
308
model = self
309
310
def new_embedding_call(self, input_ids, *args, **kwargs):
311
inputs_embeds = old_embedding_call(self, input_ids, *args, **kwargs)
312
if model.transformer.wte is self:
313
assert inputs_embeds.ndim == 3
314
inputs_embeds[:, :model.learned_embedding.size(0), :] = model.learned_embedding[None]
315
return inputs_embeds
316
317
Embedding.__call__ = new_embedding_call
318
319
try:
320
return super().forward(
321
input_ids=input_ids,
322
attention_mask=attention_mask,
323
labels=labels,
324
use_cache=use_cache,
325
return_dict=return_dict,
326
)
327
finally:
328
Embedding.__call__ = old_embedding_call
329
330
for k in dir(GPTPromptTuningMixin):
331
v = getattr(GPTPromptTuningMixin, k)
332
_v = getattr(UniversalPromptTuningMixin, k, None)
333
if _v is None or (_v is getattr(object, k, None) and callable(_v) and not isinstance(_v, type)):
334
setattr(UniversalPromptTuningMixin, k, v)
335
336
337
class AutoPromptTuningLM(UniversalPromptTuningMixin, transformers.AutoModelForCausalLM):
338
def __init__(self, config):
339
super().__init__(config)
340
341
342
default_quiet = False
343
344
345
def get_tokenizer(model_id, revision=None) -> transformers.PreTrainedTokenizerBase:
346
if(os.path.isdir(model_id)):
347
try:
348
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache", use_fast=False)
349
except Exception as e:
350
try:
351
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache")
352
except Exception as e:
353
try:
354
tokenizer = GPT2Tokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache")
355
except Exception as e:
356
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=revision, cache_dir="cache")
357
elif(os.path.isdir("models/{}".format(model_id.replace('/', '_')))):
358
try:
359
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(model_id.replace('/', '_')), revision=revision, cache_dir="cache", use_fast=False)
360
except Exception as e:
361
try:
362
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(model_id.replace('/', '_')), revision=revision, cache_dir="cache")
363
except Exception as e:
364
try:
365
tokenizer = GPT2Tokenizer.from_pretrained("models/{}".format(model_id.replace('/', '_')), revision=revision, cache_dir="cache")
366
except Exception as e:
367
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=revision, cache_dir="cache")
368
else:
369
try:
370
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache", use_fast=False)
371
except Exception as e:
372
try:
373
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache")
374
except Exception as e:
375
try:
376
tokenizer = GPT2Tokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache")
377
except Exception as e:
378
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=revision, cache_dir="cache")
379
380
@contextlib.contextmanager
381
def _kai_no_prefix():
382
add_bos_token = getattr(tokenizer, "add_bos_token", False)
383
add_prefix_space = getattr(tokenizer, "add_prefix_space", False)
384
tokenizer.add_bos_token = False
385
tokenizer.add_prefix_space = False
386
try:
387
yield
388
finally:
389
tokenizer.add_bos_token = add_bos_token
390
tokenizer.add_prefix_space = add_prefix_space
391
392
tokenizer._kai_no_prefix = _kai_no_prefix
393
return tokenizer
394
395
396
class ConfigurationError(Exception):
397
def __init__(self, msg: str = "Unknown error", code: int = 1, quiet: Optional[bool] = None):
398
if quiet is None:
399
quiet = default_quiet
400
super().__init__(msg)
401
self.code = code
402
self.quiet = quiet
403
404
405
class TrainerBase(abc.ABC):
406
@abc.abstractmethod
407
def startup(self, step: int) -> None:
408
...
409
410
@abc.abstractmethod
411
def get_batch(self, step: int, size: int) -> np.ndarray:
412
...
413
414
@abc.abstractmethod
415
def get_num_sequences(self) -> int:
416
...
417
418
@abc.abstractmethod
419
def get_initial_soft_embeddings(self, model: transformers.PreTrainedModel) -> SoftPrompt:
420
...
421
422
@abc.abstractmethod
423
def tokenize_dataset_callback(self, tokenizer: transformers.PreTrainedTokenizerBase, text: str) -> List[int]:
424
...
425
426
class TrainerData:
427
def __init__(self):
428
self.__lazy_load_spec: Optional[dict] = None
429
self.model_spec: Optional[dict] = None
430
self.tokenizer_id: Optional[str] = None
431
self.newlinemode: Optional[str] = None
432
self.ckpt_path: Optional[str] = None
433
self.save_file: Optional[str] = None
434
self.params: Optional[dict] = None
435
self.stparams: Optional[dict] = None
436
self.gradient_accumulation_steps = -1
437
self.soft_in_dim = -1
438
self.prompt_method = "tokens"
439
self.prompt_seed = 42
440
441
@property
442
def lazy_load_spec(self):
443
print("WARNING: `TrainerData.lazy_load_spec` is currently unused", file=sys.stderr)
444
return self.__lazy_load_spec
445
446
@lazy_load_spec.setter
447
def lazy_load_spec(self, value: Optional[dict]):
448
print("WARNING: `TrainerData.lazy_load_spec` is currently unused", file=sys.stderr)
449
self.__lazy_load_spec = value
450
451
@property
452
def kaiming_size(self): # backwards compatibility
453
return self.soft_in_dim
454
455
@kaiming_size.setter
456
def kaiming_size(self, value: int): # backwards compatibility
457
self.prompt_method = "kaiming"
458
self.soft_in_dim = value
459
460
data: TrainerData
461
462
def __init__(self, universe: Optional[int] = None, quiet=False):
463
self.quiet = quiet
464
self.universe = universe
465
self.data = self.TrainerData()
466
self._spmodule: Optional[str] = None
467
if universe is not None:
468
print("WARNING: The `universe` argument of `TrainerBase.__init__` is currently unused", file=sys.stderr)
469
470
def raise_configuration_error(self, msg, **kwargs):
471
if "quiet" not in kwargs:
472
kwargs["quiet"] = self.quiet
473
raise ConfigurationError(msg, **kwargs)
474
475
def _get_model_config(self) -> transformers.configuration_utils.PretrainedConfig:
476
REVISION = None
477
if(os.path.isdir(self.data.ckpt_path)):
478
model_config = AutoConfig.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache")
479
elif(os.path.isdir("models/{}".format(self.data.ckpt_path.replace('/', '_')))):
480
model_config = AutoConfig.from_pretrained("models/{}".format(self.data.ckpt_path.replace('/', '_')), revision=REVISION, cache_dir="cache")
481
else:
482
model_config = AutoConfig.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache")
483
return model_config
484
485
def get_hf_checkpoint_metadata(self) -> bool:
486
params = {}
487
model_config = self._get_model_config()
488
params["tokenizer_id"] = self.data.ckpt_path
489
tokenizer = get_tokenizer(self.data.ckpt_path)
490
params["newlinemode"] = params.get(
491
"newlinemode", "s" if model_config.model_type == "xglm" else "n"
492
)
493
params["max_batch_size"] = 2048
494
with tokenizer._kai_no_prefix():
495
params["eos_token"] = (
496
[50259, 50259] if model_config.model_type == "xglm" and model_config.eos_token_id == 50259 else [model_config.eos_token_id]
497
)
498
params["seq"] = 2048
499
self.data.params = params
500
return True
501
502
def get_tokenizer(self) -> transformers.PreTrainedTokenizerBase:
503
return get_tokenizer(self.data.ckpt_path)
504
505
def save_data(self):
506
pass
507
508
def export_to_kobold(self, output_file: str, name: str, author: str, supported: str, description: str):
509
try:
510
z = torch.load(self.data.save_file)
511
assert z["step"] > 0
512
assert z["tensor"].ndim == 2 and "opt_state" in z
513
assert z["tensor"].shape[0] < self.data.params["max_batch_size"]
514
self.data.soft_in_dim = z["tensor"].shape[0]
515
except AssertionError:
516
self.raise_configuration_error("MKUSP file is corrupted.", code=14)
517
518
tensor = z["tensor"]
519
520
meta = {
521
"name": name,
522
"author": author,
523
"supported": supported,
524
"description": description,
525
}
526
if len(meta["author"].strip()) == 0:
527
meta.pop("author")
528
meta["supported"] = list(map(lambda m: m.strip(), supported.split(",")))
529
530
with zipfile.ZipFile(output_file, "w", compression=zipfile.ZIP_LZMA) as z:
531
with z.open("tensor.npy", "w") as f:
532
np.save(f, tensor.detach().cpu().numpy(), allow_pickle=False)
533
with zipfile.ZipFile(output_file, "a", compression=zipfile.ZIP_STORED) as z:
534
with z.open("meta.json", "w") as f:
535
f.write(json.dumps(meta, indent=2).encode("utf-8"))
536
537
def export_to_mkultra(self, output_file: str, soft_prompt_name: str, soft_prompt_description: str):
538
try:
539
z = torch.load(self.data.save_file)
540
assert z["step"] > 0
541
assert z["tensor"].ndim == 2 and "opt_state" in z
542
assert z["tensor"].shape[0] < self.data.params["max_batch_size"]
543
self.data.soft_in_dim = z["tensor"].shape[0]
544
_step = z["step"]
545
except AssertionError:
546
self.raise_configuration_error("MKUSP file is corrupted.", code=14)
547
548
tensor = z["tensor"]
549
550
with open(output_file, "w") as f:
551
json.dump(
552
{
553
"metadata": {
554
"step": _step,
555
"loss": float(z["loss"]),
556
"uuid": str(uuid.uuid4()),
557
"name": soft_prompt_name,
558
"description": soft_prompt_description,
559
"epoch": datetime.datetime.now().timestamp(),
560
},
561
"tensor": base64.b64encode(
562
pickle.dumps(
563
tensor.detach().cpu(),
564
protocol=4,
565
),
566
).decode("ascii"),
567
},
568
f,
569
)
570
571
def tokenize_dataset(
572
self,
573
dataset_path: Union[str, TextIO],
574
output_file: Union[str, TextIO],
575
batch_size=2048,
576
epochs=1,
577
use_ftfy=True,
578
shuffle_seed: Optional[Union[int, float, str, bytes, bytearray]] = 1729,
579
):
580
dataset_path = dataset_path.replace("\\", "/")
581
output_file = output_file.replace("\\", "/")
582
if not isinstance(batch_size, int) or batch_size < 1:
583
self.raise_configuration_error(
584
"batch_size must be an integer greater than zero.", code=9
585
)
586
if (
587
not isinstance(epochs, int) and not isinstance(epochs, float)
588
) or epochs <= 0:
589
self.raise_configuration_error(
590
"epochs must be an int or float greater than zero.", code=10
591
)
592
if isinstance(output_file, str) and output_file.endswith("/"):
593
self.raise_configuration_error(
594
"output_file should be the path to a file, not a directory.", code=11
595
)
596
if isinstance(dataset_path, str) and not os.path.exists(dataset_path):
597
self.raise_configuration_error(
598
"dataset_path is not set to a valid file or directory.", code=12
599
)
600
601
if use_ftfy:
602
import ftfy
603
604
tokenizer = self.get_tokenizer()
605
606
batch_size = min(
607
batch_size,
608
self.data.params["max_batch_size"] - self.data.soft_in_dim,
609
)
610
assert batch_size >= 0
611
print(
612
termcolor.colored(
613
"\nIf you see a warning somewhere below about token indices, ignore it. That warning is normal.\n",
614
"magenta",
615
)
616
)
617
print("Batch size:", batch_size)
618
print(termcolor.colored("Tokenizing your dataset...\n", "magenta"))
619
620
if not isinstance(dataset_path, str):
621
files = [dataset_path]
622
elif os.path.isfile(dataset_path):
623
files = [dataset_path]
624
else:
625
files = sorted(
626
os.path.join(dataset_path, filename)
627
for filename in os.listdir(dataset_path)
628
)
629
if shuffle_seed is not None:
630
random.Random(shuffle_seed).shuffle(files)
631
tokens = []
632
eos = tokenizer.decode(self.data.params["eos_token"])
633
for path in files:
634
if isinstance(path, str):
635
f = open(path)
636
else:
637
f = path
638
try:
639
text = f.read()
640
if use_ftfy:
641
text = ftfy.fix_text(text)
642
text = text.replace("<|endoftext|>", eos)
643
tokens.extend(self.tokenize_dataset_callback(tokenizer, text))
644
finally:
645
if isinstance(path, str):
646
f.close()
647
648
print("Dataset size (in tokens):", len(tokens))
649
if len(tokens) < batch_size + 1:
650
self.raise_configuration_error(
651
"Your dataset is too small! The number of tokens has to be greater than the batch size. Try increasing the epochs.",
652
code=13,
653
)
654
tail = len(tokens) % (batch_size + 1)
655
if tail:
656
print(
657
f"We're removing the last {tail} tokens from your dataset to make the length a multiple of {batch_size+1}."
658
)
659
tokens = tokens[:-tail]
660
661
tokens = np.array(tokens, dtype=np.uint16).reshape((-1, batch_size + 1))
662
sequences_per_epoch = tokens.shape[0]
663
_epochs = math.ceil(epochs)
664
if _epochs > 1:
665
rng = np.random.Generator(np.random.PCG64(1729))
666
tokens = np.concatenate(
667
(
668
tokens,
669
*(rng.permutation(tokens, axis=0) for i in range(_epochs - 1)),
670
),
671
axis=0,
672
)
673
tokens = tokens[: math.ceil(epochs * sequences_per_epoch)]
674
print(f"Total sequences in your dataset: {tokens.shape[0]}")
675
676
if isinstance(output_file, str):
677
f = open(output_file, "w")
678
else:
679
f = output_file
680
try:
681
np.save(output_file, tokens)
682
finally:
683
if isinstance(output_file, str):
684
f.close()
685
686
def train(
687
self,
688
breakmodel_primary_device: Optional[Union[str, int, torch.device]] = None,
689
breakmodel_gpulayers: Optional[List[int]] = None,
690
breakmodel_disklayers = 0,
691
):
692
if breakmodel_gpulayers is None:
693
breakmodel_gpulayers = []
694
if breakmodel_primary_device is None:
695
breakmodel_primary_device = 0 if sum(x if x >= 0 else 1 for x in breakmodel_gpulayers) else "cpu"
696
697
if self.data.params is not None and "max_batch_size" not in self.data.params:
698
self.data.params["max_batch_size"] = 2048
699
700
if not os.path.exists(self.data.save_file):
701
print("We are starting a brand new soft-tuning session.\n")
702
self.startup(step=-1)
703
if self.data.soft_in_dim <= 0:
704
self.raise_configuration_error(
705
"You have not set a soft prompt size.", code=6
706
)
707
step = 0
708
else:
709
# If we're resuming a soft-tuning session, the soft prompt tensor is
710
# already in the save file and we just have to decode it.
711
try:
712
z = torch.load(self.data.save_file)
713
assert z["step"] > 0
714
assert z["tensor"].ndim == 2 and "opt_state" in z
715
assert z["tensor"].shape[0] < self.data.params["max_batch_size"]
716
self.data.soft_in_dim = z["tensor"].shape[0]
717
step = z["step"]
718
opt_state = z["opt_state"]
719
except AssertionError:
720
self.raise_configuration_error("MKUSP file is corrupted.", code=14)
721
print(f"We're resuming a previous soft-tuning session at step {step+1}.\n")
722
self.startup(step=step + 1)
723
soft_embeddings = z["tensor"]
724
725
REVISION = None
726
727
patch_transformers()
728
729
model: _PromptTuningPreTrainedModel
730
731
model_config = self._get_model_config()
732
n_layers = utils.num_layers(model_config)
733
breakmodel_gpulayers = [x if x >= 0 else n_layers for x in breakmodel_gpulayers]
734
735
convert_to_float16 = True
736
hascuda = torch.cuda.is_available()
737
usegpu = hascuda and not breakmodel_disklayers and len(breakmodel_gpulayers) == 1 and breakmodel_gpulayers[0] == n_layers
738
gpu_device = breakmodel_primary_device
739
use_breakmodel = bool(hascuda or breakmodel_disklayers or sum(breakmodel_gpulayers))
740
741
assert len(breakmodel_gpulayers) <= torch.cuda.device_count()
742
assert sum(breakmodel_gpulayers) + breakmodel_disklayers <= n_layers
743
744
breakmodel.gpu_blocks = breakmodel_gpulayers
745
breakmodel.disk_blocks = breakmodel_disklayers
746
disk_blocks = breakmodel.disk_blocks
747
gpu_blocks = breakmodel.gpu_blocks
748
ram_blocks = ram_blocks = n_layers - sum(gpu_blocks)
749
cumulative_gpu_blocks = tuple(itertools.accumulate(gpu_blocks))
750
751
device_list(ram_blocks, primary=breakmodel.primary_device)
752
753
def lazy_load_callback(model_dict: Dict[str, Union[torch_lazy_loader.LazyTensor, torch.Tensor]], f, **_):
754
if lazy_load_callback.nested:
755
return
756
lazy_load_callback.nested = True
757
758
device_map: Dict[str, Union[str, int]] = {}
759
760
@functools.lru_cache(maxsize=None)
761
def get_original_key(key):
762
return max((original_key for original_key in utils.module_names if original_key.endswith(key)), key=len)
763
764
for key, value in model_dict.items():
765
original_key = get_original_key(key)
766
if isinstance(value, torch_lazy_loader.LazyTensor) and not any(original_key.startswith(n) for n in utils.layers_module_names):
767
device_map[key] = gpu_device if hascuda and usegpu else "cpu" if not hascuda or not use_breakmodel else breakmodel.primary_device
768
else:
769
layer = int(max((n for n in utils.layers_module_names if original_key.startswith(n)), key=len).rsplit(".", 1)[1])
770
device = gpu_device if hascuda and usegpu else "disk" if layer < disk_blocks and layer < ram_blocks else "cpu" if not hascuda or not use_breakmodel else "shared" if layer < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, layer - ram_blocks)
771
device_map[key] = device
772
773
if utils.num_shards is None or utils.current_shard == 0:
774
utils.offload_index = {}
775
if os.path.isdir("accelerate-disk-cache"):
776
# Delete all of the files in the disk cache folder without deleting the folder itself to allow people to create symbolic links for this folder
777
# (the folder doesn't contain any subfolders so os.remove will do just fine)
778
for filename in os.listdir("accelerate-disk-cache"):
779
try:
780
os.remove(os.path.join("accelerate-disk-cache", filename))
781
except OSError:
782
pass
783
os.makedirs("accelerate-disk-cache", exist_ok=True)
784
if utils.num_shards is not None:
785
num_tensors = len(utils.get_sharded_checkpoint_num_tensors(utils.from_pretrained_model_name, utils.from_pretrained_index_filename, **utils.from_pretrained_kwargs))
786
else:
787
num_tensors = len(device_map)
788
print(flush=True)
789
utils.bar = tqdm(total=num_tensors, desc="Loading model tensors", file=Send_to_socketio())
790
791
with zipfile.ZipFile(f, "r") as z:
792
try:
793
last_storage_key = None
794
f = None
795
current_offset = 0
796
able_to_pin_layers = True
797
if utils.num_shards is not None:
798
utils.current_shard += 1
799
for key in sorted(device_map.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)):
800
storage_key = model_dict[key].key
801
if storage_key != last_storage_key or model_dict[key].seek_offset < current_offset:
802
last_storage_key = storage_key
803
if isinstance(f, zipfile.ZipExtFile):
804
f.close()
805
f = z.open(f"archive/data/{storage_key}")
806
current_offset = 0
807
if current_offset != model_dict[key].seek_offset:
808
f.read(model_dict[key].seek_offset - current_offset)
809
current_offset = model_dict[key].seek_offset
810
device = device_map[key]
811
size = functools.reduce(lambda x, y: x * y, model_dict[key].shape, 1)
812
dtype = model_dict[key].dtype
813
nbytes = size if dtype is torch.bool else size * ((torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits >> 3)
814
#print(f"Transferring <{key}> to {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ", end="", flush=True)
815
model_dict[key] = model_dict[key].materialize(f, map_location="cpu")
816
# if model_dict[key].dtype is torch.float32:
817
# fp32_model = True
818
if convert_to_float16 and breakmodel.primary_device != "cpu" and hascuda and (use_breakmodel or usegpu) and model_dict[key].dtype is torch.float32:
819
model_dict[key] = model_dict[key].to(torch.float16)
820
if breakmodel.primary_device == "cpu" or (not usegpu and not use_breakmodel and model_dict[key].dtype is torch.float16):
821
model_dict[key] = model_dict[key].to(torch.float32)
822
if device == "shared":
823
model_dict[key] = model_dict[key].to("cpu").detach_()
824
if able_to_pin_layers:
825
try:
826
model_dict[key] = model_dict[key].pin_memory()
827
except:
828
able_to_pin_layers = False
829
elif device == "disk":
830
accelerate.utils.offload_weight(model_dict[key], get_original_key(key), "accelerate-disk-cache", index=utils.offload_index)
831
model_dict[key] = model_dict[key].to("meta")
832
else:
833
model_dict[key] = model_dict[key].to(device)
834
#print("OK", flush=True)
835
current_offset += nbytes
836
utils.bar.update(1)
837
finally:
838
if utils.num_shards is None or utils.current_shard >= utils.num_shards:
839
if utils.offload_index:
840
for name, tensor in utils.named_buffers:
841
if name not in utils.offload_index:
842
accelerate.utils.offload_weight(tensor, name, "accelerate-disk-cache", index=utils.offload_index)
843
accelerate.utils.save_offload_index(utils.offload_index, "accelerate-disk-cache")
844
utils.bar.close()
845
utils.bar = None
846
lazy_load_callback.nested = False
847
if isinstance(f, zipfile.ZipExtFile):
848
f.close()
849
850
lazy_load_callback.nested = False
851
852
# Since we're using lazy loader, we need to figure out what the model's hidden layers are called
853
with torch_lazy_loader.use_lazy_torch_load(dematerialized_modules=True, use_accelerate_init_empty_weights=True):
854
try:
855
metamodel = AutoModelForCausalLM.from_config(model_config)
856
except Exception as e:
857
metamodel = GPTNeoForCausalLM.from_config(model_config)
858
utils.layers_module_names = utils.get_layers_module_names(metamodel)
859
utils.module_names = list(metamodel.state_dict().keys())
860
utils.named_buffers = list(metamodel.named_buffers(recurse=True))
861
862
with torch_lazy_loader.use_lazy_torch_load(callback=lazy_load_callback, dematerialized_modules=True):
863
if(os.path.isdir(self.data.ckpt_path)):
864
try:
865
model = AutoPromptTuningLM.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache")
866
except Exception as e:
867
if("out of memory" in traceback.format_exc().lower()):
868
raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
869
model = GPTNeoPromptTuningLM.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache")
870
elif(os.path.isdir("models/{}".format(self.data.ckpt_path.replace('/', '_')))):
871
try:
872
model = AutoPromptTuningLM.from_pretrained("models/{}".format(self.data.ckpt_path.replace('/', '_')), revision=REVISION, cache_dir="cache")
873
except Exception as e:
874
if("out of memory" in traceback.format_exc().lower()):
875
raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
876
model = GPTNeoPromptTuningLM.from_pretrained("models/{}".format(self.data.ckpt_path.replace('/', '_')), revision=REVISION, cache_dir="cache")
877
else:
878
try:
879
model = AutoPromptTuningLM.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache")
880
except Exception as e:
881
if("out of memory" in traceback.format_exc().lower()):
882
raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
883
model = GPTNeoPromptTuningLM.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache")
884
885
if(hascuda):
886
if(usegpu):
887
model = model.half().to(gpu_device)
888
elif(use_breakmodel): # Use both RAM and VRAM (breakmodel)
889
move_model_to_devices(model, usegpu, gpu_device)
890
elif(__import__("breakmodel").disk_blocks > 0):
891
move_model_to_devices(model, usegpu, gpu_device)
892
else:
893
model = model.to('cpu').float()
894
elif(__import__("breakmodel").disk_blocks > 0):
895
move_model_to_devices(model, usegpu, gpu_device)
896
else:
897
model.to('cpu').float()
898
899
if step == 0:
900
soft_embeddings = self.get_initial_soft_embeddings(model)
901
else:
902
soft_embeddings = SoftPrompt.from_inputs_embeds(soft_embeddings)
903
model.set_soft_prompt(soft_embeddings)
904
905
steps = self.get_num_sequences() // self.data.gradient_accumulation_steps
906
warmup_steps = max(1, round(steps * self.data.stparams["warmup"]))
907
908
beta1: Optional[float] = self.data.stparams.get("beta1", 0.0)
909
if beta1 == 0.0:
910
beta1 = None
911
optimizer = transformers.Adafactor(
912
params=(model.get_soft_params(),),
913
scale_parameter=False,
914
relative_step=False,
915
warmup_init=False,
916
lr=self.data.stparams["lr"],
917
beta1=beta1,
918
decay_rate=self.data.stparams.get("decay_rate", -0.8),
919
weight_decay=self.data.stparams.get("weight_decay", 0.1),
920
)
921
if step != 0:
922
optimizer.load_state_dict(opt_state)
923
scheduler = transformers.get_cosine_with_hard_restarts_schedule_with_warmup(
924
optimizer=optimizer,
925
num_warmup_steps=warmup_steps,
926
num_training_steps=steps - warmup_steps,
927
num_cycles=(steps - warmup_steps) // self.data.stparams.get("training_steps_per_cycle", 56),
928
)
929
930
torch.cuda.empty_cache()
931
optimizer.state['step'] = step
932
cross_entropy_loss = CrossEntropyLoss()
933
934
def save_mkusp(
935
loss,
936
grad_norm,
937
):
938
with open(self.data.save_file, "wb") as f:
939
torch.save(
940
{
941
"tensor": soft_embeddings.get_inputs_embeds(),
942
"opt_state": optimizer.state_dict(),
943
"step": step,
944
"loss": loss,
945
"grad_norm": grad_norm,
946
},
947
f,
948
)
949
self.save_data()
950
951
bar1 = tqdm(initial=step + 1, total=steps, desc="CURRENT TRAINING STEP")
952
953
while step < steps:
954
step += 1
955
model.train()
956
957
total_loss = total_grad = total_grad_norm = 0
958
959
# Get the next sequences from the dataset
960
block = torch.tensor(np.int32(self.get_batch(step, self.data.gradient_accumulation_steps))).to(model.transformer.wte.weight.device)
961
962
for sequence in tqdm(block, desc="GRADIENT ACCUMULATION", leave=False):
963
# input_ids is the context to the model (without the soft prompt) and labels is what we expect the model to generate (the -100s represent soft prompt tokens for which loss is not calculated)
964
input_ids = sequence[:-1].unsqueeze(0).detach()
965
labels = torch.cat((torch.full((model.get_soft_params().size(0) - 1,), -100, device=sequence.device), sequence), dim=-1).unsqueeze(0).detach()
966
967
# Give the context to the model and compare the model's output logits with the labels to compute the loss
968
logits = model(input_ids=input_ids, labels=input_ids).logits
969
loss: torch.Tensor = cross_entropy_loss(logits.view(-1, model.transformer.wte.weight.size(0)), labels.view(-1))
970
total_loss += loss.detach()
971
972
# Compute the gradient of the loss function and add it to model.get_soft_params().grad (model.get_soft_params().grad += gradient)
973
loss.backward()
974
975
total_grad_norm += torch.linalg.norm(model.get_soft_params().grad.detach() - total_grad)
976
total_grad = model.get_soft_params().grad.detach()
977
978
del input_ids
979
del labels
980
del logits
981
torch.cuda.empty_cache()
982
983
mean_loss = (total_loss / self.data.gradient_accumulation_steps).item()
984
mean_grad_norm = (total_grad_norm / self.data.gradient_accumulation_steps).item()
985
986
# Apply the optimization algorithm using the accumulated gradients, which changes the contents of the soft prompt matrix very slightly to reduce the loss
987
optimizer.step()
988
lr = optimizer.param_groups[0]["lr"]
989
scheduler.step()
990
optimizer.zero_grad()
991
992
# Save checkpoint every few steps
993
if step == 1 or step % self.data.stparams["save_every"] == 0:
994
save_mkusp(mean_loss, mean_grad_norm)
995
996
bar1.set_postfix({"loss": mean_loss, "grad_norm": mean_grad_norm, "learning_rate": lr})
997
bar1.update()
998
999
1000
class BasicTrainer(TrainerBase):
1001
class TrainerData(TrainerBase.TrainerData):
1002
def __init__(self):
1003
super().__init__()
1004
self.dataset_file: Optional[str] = None
1005
self.initial_softprompt: Optional[List[int]] = None
1006
1007
data: "BasicTrainer.TrainerData"
1008
1009
def __init__(self, *args, **kwargs):
1010
super().__init__(*args, **kwargs)
1011
self.dataset: Optional[np.ndarray] = None
1012
1013
def startup(self, step: int) -> None:
1014
if self.get_num_sequences() < self.data.gradient_accumulation_steps:
1015
self.raise_configuration_error(
1016
"Your dataset is too small! gradient_accumulation_steps must be less than or equal to the number of sequences.",
1017
code=101,
1018
)
1019
if (
1020
self.data.prompt_method == "tokens"
1021
and step < 0
1022
and self.data.initial_softprompt is None
1023
):
1024
self.raise_configuration_error(
1025
"You have not set an initial soft prompt string.", code=103
1026
)
1027
if self.data.prompt_method == "tokens" and step < 0:
1028
self.data.soft_in_dim = len(self.data.initial_softprompt)
1029
1030
def get_batch(self, step: int, size: int) -> np.ndarray:
1031
return self.dataset[(step - 1) * size : step * size]
1032
1033
def get_num_sequences(self) -> int:
1034
if self.dataset is None:
1035
if self.data.dataset_file is None or not os.path.exists(
1036
self.data.dataset_file
1037
):
1038
self.raise_configuration_error(
1039
f"Dataset file not found at {repr(self.data.dataset_file)}",
1040
code=102,
1041
)
1042
self.dataset = np.load(self.data.dataset_file, mmap_mode="r")
1043
assert self.dataset.ndim >= 2
1044
assert self.dataset.shape[0] >= 2
1045
return self.dataset.shape[0]
1046
1047
def get_initial_soft_embeddings(self, model: transformers.PreTrainedModel) -> SoftPrompt:
1048
if self.data.prompt_method == "vocab_sample":
1049
rng = np.random.Generator(
1050
np.random.PCG64(
1051
[
1052
self.data.prompt_seed,
1053
int.from_bytes(hashlib.sha256(model.config.model_type.encode("utf8")).digest()[:4], "little"),
1054
]
1055
)
1056
)
1057
tokenizer = self.get_tokenizer()
1058
with tokenizer._kai_no_prefix():
1059
special_tokens = set(
1060
itertools.chain.from_iterable(
1061
tokenizer.encode(str(v))
1062
for v in tokenizer.special_tokens_map_extended.values()
1063
)
1064
)
1065
sample_space = [
1066
k for k in range(model.get_input_embeddings().weight.shape[-2]) if k not in special_tokens
1067
]
1068
sample = rng.choice(sample_space, self.data.soft_in_dim, False)
1069
return SoftPrompt.from_inputs_embeds(model.get_input_embeddings()(torch.tensor(sample, dtype=torch.int32, device=model.get_input_embeddings().weight.device)))
1070
elif self.data.prompt_method == "tokens":
1071
return SoftPrompt.from_inputs_embeds(model.get_input_embeddings()(torch.tensor(self.data.initial_softprompt, dtype=torch.int32, device=model.get_input_embeddings().weight.device)))
1072
self.raise_configuration_error(
1073
f"Unknown prompt method {repr(self.data.prompt_method)}", code=104
1074
)
1075
1076
def tokenize_dataset_callback(
1077
self, tokenizer: transformers.PreTrainedTokenizerBase, text: str
1078
) -> List[int]:
1079
if self.data.newlinemode == "s":
1080
text = text.replace("\n", "</s>")
1081
with tokenizer._kai_no_prefix():
1082
return tokenizer.encode(text) + self.data.params["eos_token"]
1083
1084