CoCalc -- prompt

GitHub Repository: KoboldAI/KoboldAI-Client
Path: blob/main/prompt_tuner.py
⁴⁷¹ views
1
import abc
2
import os
3
import sys
4
import math
5
import numpy as np
6
import termcolor
7
import contextlib
8
import traceback
9
import random
10
import zipfile
11
import json
12
import uuid
13
import datetime
14
import base64
15
import pickle
16
import hashlib
17
import itertools
18
import functools
19
import bisect
20
import eventlet
21
import packaging
22
import gc
23
import time
24
from tqdm.auto import tqdm
25
import torch
26
import torch.nn.functional as F
27
from torch.nn import Embedding, CrossEntropyLoss
28
import transformers
29
from transformers import __version__ as transformers_version
30
from transformers import AutoTokenizer, GPT2Tokenizer, AutoConfig, AutoModelForCausalLM, GPTNeoForCausalLM, PreTrainedModel, modeling_utils
31
import accelerate
32
import accelerate.utils
33
from mkultra.tuning import GPTPromptTuningMixin, GPTNeoPromptTuningLM
34
from mkultra.soft_prompt import SoftPrompt
35
from typing import Dict, List, Optional, TextIO, Union
36

37
import logging
38
logging.getLogger("urllib3").setLevel(logging.ERROR)
39

40
import breakmodel
41
import torch_lazy_loader
42
import utils
43

44
use_breakmodel = True
45

46

47
class colors:
48
    PURPLE    = '\033[95m'
49
    BLUE      = '\033[94m'
50
    CYAN      = '\033[96m'
51
    GREEN     = '\033[92m'
52
    YELLOW    = '\033[93m'
53
    RED       = '\033[91m'
54
    END       = '\033[0m'
55
    UNDERLINE = '\033[4m'
56

57
class Send_to_socketio(object):
58
    def write(self, bar):
59
        print(bar, end="")
60
        time.sleep(0.01)
61
        try:
62
            if utils.emit is not None:
63
                utils.emit('from_server', {'cmd': 'model_load_status', 'data': bar.replace(" ", "&nbsp;")}, broadcast=True)
64
        except:
65
            pass
66

67
def patch_transformers_download():
68
    global transformers
69
    import copy, requests, tqdm, time
70
    class Send_to_socketio(object):
71
        def write(self, bar):
72
            bar = bar.replace("\r", "").replace("\n", "")
73
            if bar != "":
74
                try:
75
                    print(bar, end="\r")
76
                    if utils.emit is not None:
77
                        utils.emit('from_server', {'cmd': 'model_load_status', 'data': bar.replace(" ", "&nbsp;")}, broadcast=True)
78
                    eventlet.sleep(seconds=0)
79
                except:
80
                    pass
81
    def http_get(
82
        url: str,
83
        temp_file: transformers.utils.hub.BinaryIO,
84
        proxies=None,
85
        resume_size=0,
86
        headers: transformers.utils.hub.Optional[transformers.utils.hub.Dict[str, str]] = None,
87
        file_name: transformers.utils.hub.Optional[str] = None,
88
    ):
89
        """
90
        Download remote file. Do not gobble up errors.
91
        """
92
        headers = copy.deepcopy(headers)
93
        if resume_size > 0:
94
            headers["Range"] = f"bytes={resume_size}-"
95
        r = requests.get(url, stream=True, proxies=proxies, headers=headers)
96
        transformers.utils.hub._raise_for_status(r)
97
        content_length = r.headers.get("Content-Length")
98
        total = resume_size + int(content_length) if content_length is not None else None
99
        # `tqdm` behavior is determined by `utils.logging.is_progress_bar_enabled()`
100
        # and can be set using `utils.logging.enable/disable_progress_bar()`
101
        if url[-11:] != 'config.json':
102
            progress = tqdm.tqdm(
103
                unit="B",
104
                unit_scale=True,
105
                unit_divisor=1024,
106
                total=total,
107
                initial=resume_size,
108
                desc=f"Downloading {file_name}" if file_name is not None else "Downloading",
109
                file=Send_to_socketio(),
110
            )
111
        for chunk in r.iter_content(chunk_size=1024):
112
            if chunk:  # filter out keep-alive new chunks
113
                if url[-11:] != 'config.json':
114
                    progress.update(len(chunk))
115
                temp_file.write(chunk)
116
        if url[-11:] != 'config.json':
117
            progress.close()
118

119
    transformers.utils.hub.http_get = http_get
120

121

122
def patch_transformers():
123
    global transformers
124
    
125
    patch_transformers_download()
126
    
127
    old_from_pretrained = PreTrainedModel.from_pretrained.__func__
128
    @classmethod
129
    def new_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
130
        utils.num_shards = None
131
        utils.current_shard = 0
132
        utils.from_pretrained_model_name = pretrained_model_name_or_path
133
        utils.from_pretrained_index_filename = None
134
        utils.from_pretrained_kwargs = kwargs
135
        utils.bar = None
136
        if utils.args is None or not utils.args.no_aria2:
137
            utils.aria2_hook(pretrained_model_name_or_path, **kwargs)
138
        return old_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
139
    if(not hasattr(PreTrainedModel, "_kai_patched")):
140
        PreTrainedModel.from_pretrained = new_from_pretrained
141
        PreTrainedModel._kai_patched = True
142
    if(hasattr(modeling_utils, "get_checkpoint_shard_files")):
143
        old_get_checkpoint_shard_files = modeling_utils.get_checkpoint_shard_files
144
        def new_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs):
145
            utils.num_shards = utils.get_num_shards(index_filename)
146
            utils.from_pretrained_index_filename = index_filename
147
            return old_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs)
148
        modeling_utils.get_checkpoint_shard_files = new_get_checkpoint_shard_files
149
        
150
    # Some versions of transformers 4.17.0.dev0 are affected by
151
    # https://github.com/huggingface/transformers/issues/15736
152
    # This is a workaround for those versions of transformers.
153
    if(transformers_version == "4.17.0.dev0"):
154
        try:
155
            from transformers.models.xglm.modeling_xglm import XGLMSinusoidalPositionalEmbedding
156
        except ImportError:
157
            pass
158
        else:
159
            @torch.no_grad()
160
            def new_forward(self, input_ids: torch.Tensor = None, inputs_embeds: torch.Tensor = None, past_key_values_length: int = 0):
161
                bsz, seq_len = inputs_embeds.size()[:-1]
162
                input_shape = inputs_embeds.size()[:-1]
163
                sequence_length = input_shape[1]
164
                position_ids = torch.arange(
165
                    past_key_values_length + self.padding_idx + 1, past_key_values_length + sequence_length + self.padding_idx + 1, dtype=torch.long, device=inputs_embeds.device
166
                ).unsqueeze(0).expand(input_shape).contiguous()
167
                max_pos = self.padding_idx + 1 + seq_len + past_key_values_length
168
                if max_pos > self.weights.size(0):
169
                    self.make_weights(max_pos + self.offset, self.embedding_dim, self.padding_idx)
170
                return self.weights.index_select(0, position_ids.view(-1)).view(bsz, seq_len, -1).detach()
171
            XGLMSinusoidalPositionalEmbedding.forward = new_forward
172

173

174
    # Fix a bug in OPTForCausalLM where self.lm_head is the wrong size
175
    if(packaging.version.parse("4.19.0.dev0") <= packaging.version.parse(transformers_version) < packaging.version.parse("4.20.0")):
176
        try:
177
            from transformers import OPTForCausalLM, OPTModel
178
        except ImportError:
179
            pass
180
        else:
181
            # This is the same as the original __init__ but with
182
            # config.hidden_size
183
            # replaced with
184
            # config.word_embed_proj_dim
185
            def new_init(self, config):
186
                super(OPTForCausalLM, self).__init__(config)
187
                self.model = OPTModel(config)
188
                self.lm_head = torch.nn.Linear(config.word_embed_proj_dim, config.vocab_size, bias=False)
189
                self.post_init()
190
            OPTForCausalLM.__init__ = new_init
191

192

193
def device_list(n_layers, primary=None, selected=None):
194
    device_count = torch.cuda.device_count()
195
    if(device_count < 2):
196
        primary = None
197
    gpu_blocks = breakmodel.gpu_blocks + (device_count - len(breakmodel.gpu_blocks))*[0]
198
    print(f"{colors.YELLOW}       DEVICE ID  |  LAYERS  |  DEVICE NAME{colors.END}")
199
    for i in range(device_count):
200
        name = torch.cuda.get_device_name(i)
201
        if(len(name) > 47):
202
            name = "..." + name[-44:]
203
        row_color = colors.END
204
        sep_color = colors.YELLOW
205
        print(f"{row_color}{colors.YELLOW + '->' + row_color if i == selected else '  '} {'(primary)' if i == primary else ' '*9} {i:3}  {sep_color}|{row_color}     {gpu_blocks[i]:3}  {sep_color}|{row_color}  {name}{colors.END}")
206
    row_color = colors.END
207
    sep_color = colors.YELLOW
208
    print(f"{row_color}{colors.YELLOW + '->' + row_color if -1 == selected else '  '} {' '*9} N/A  {sep_color}|{row_color}     {breakmodel.disk_blocks:3}  {sep_color}|{row_color}  (Disk cache){colors.END}")
209
    print(f"{row_color}   {' '*9} N/A  {sep_color}|{row_color}     {n_layers:3}  {sep_color}|{row_color}  (CPU){colors.END}")
210

211

212
def move_model_to_devices(model, usegpu, gpu_device):
213
    global generator
214

215
    if(not use_breakmodel):
216
        if(usegpu):
217
            model = model.half().to(gpu_device)
218
        else:
219
            model = model.to('cpu').float()
220
        generator = model.generate
221
        return
222

223
    for key, value in model.state_dict().items():
224
        target_dtype = torch.float32 if breakmodel.primary_device == "cpu" else torch.float16
225
        if(value.dtype is not target_dtype):
226
            accelerate.utils.set_module_tensor_to_device(model, key, target_dtype)
227
    disk_blocks = breakmodel.disk_blocks
228
    gpu_blocks = breakmodel.gpu_blocks
229
    ram_blocks = len(utils.layers_module_names) - sum(gpu_blocks)
230
    cumulative_gpu_blocks = tuple(itertools.accumulate(gpu_blocks))
231
    device_map = {}
232
    for name in utils.layers_module_names:
233
        layer = int(name.rsplit(".", 1)[1])
234
        device = ("disk" if layer < disk_blocks else "cpu") if layer < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, layer - ram_blocks)
235
        device_map[name] = device
236
    for name in utils.get_missing_module_names(model, list(device_map.keys())):
237
        device_map[name] = breakmodel.primary_device
238
    breakmodel.dispatch_model_ex(model, device_map, main_device=breakmodel.primary_device, offload_buffers=True, offload_dir="accelerate-disk-cache")
239
    gc.collect()
240
    generator = model.generate
241
    return
242

243

244
_PromptTuningPreTrainedModel = Union["UniversalPromptTuningMixin", GPTPromptTuningMixin, transformers.PreTrainedModel]
245

246
class _WTEDummy:
247
    def __init__(self, model: transformers.PreTrainedModel):
248
        self.model = model
249

250
    @property
251
    def wte(self: "_WTEDummy"):
252
        return self.model.get_input_embeddings()
253

254
    @wte.setter
255
    def wte(self: "_WTEDummy", v):
256
        self.model.set_input_embeddings(v)
257

258
class _WTEMixin:
259
    @property
260
    def wte(self: Union["_WTEMixin", transformers.PreTrainedModel]):
261
        return self.get_input_embeddings()
262

263
    @wte.setter
264
    def wte(self: Union["_WTEMixin", transformers.PreTrainedModel], v):
265
        self.set_input_embeddings(v)
266

267

268
class UniversalPromptTuningMixin:
269
    @classmethod
270
    def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
271
        model: _PromptTuningPreTrainedModel = super().from_pretrained(pretrained_model_name_or_path, **kwargs)
272

273
        if not hasattr(model, "transformer"):
274
            model.transformer = _WTEDummy(model)
275
        elif not hasattr(model.transformer, "wte"):
276
            assert isinstance(model.transformer, type)
277
            model.transformer.__class__ = type("_UniversalPromptTuning" + model.transformer.__class__.__name__, (_WTEMixin, model.transformer.__class__), {})
278

279
        model.__class__ = type("_UniversalPromptTuning" + model.__class__.__name__, (UniversalPromptTuningMixin, model.__class__), {})
280

281
        for param in model.parameters():
282
            param.requires_grad = False
283
        model.initialize_soft_prompt()
284

285
        return model
286

287
    def forward(
288
        self: _PromptTuningPreTrainedModel,
289
        input_ids: Optional[torch.Tensor] = None,
290
        attention_mask: Optional[torch.Tensor] = None,
291
        labels: Optional[torch.Tensor] = None,
292
        use_cache: Optional[bool] = None,
293
        return_dict: Optional[bool] = None,
294
        **kwargs,
295
    ):
296
        assert input_ids is not None
297
        assert input_ids.ndim == 2
298

299
        input_ids = F.pad(input_ids, (self.learned_embedding.size(0), 0, 0, 0), value=self.transformer.wte.weight.size(0) // 2)
300

301
        if labels is not None:
302
            labels = self._extend_labels(labels)
303

304
        if attention_mask is not None:
305
            attention_mask = self._extend_attention_mask(attention_mask)
306

307
        old_embedding_call = Embedding.__call__
308
        model = self
309

310
        def new_embedding_call(self, input_ids, *args, **kwargs):
311
            inputs_embeds = old_embedding_call(self, input_ids, *args, **kwargs)
312
            if model.transformer.wte is self:
313
                assert inputs_embeds.ndim == 3
314
                inputs_embeds[:, :model.learned_embedding.size(0), :] = model.learned_embedding[None]
315
            return inputs_embeds
316

317
        Embedding.__call__ = new_embedding_call
318

319
        try:
320
            return super().forward(
321
                input_ids=input_ids,
322
                attention_mask=attention_mask,
323
                labels=labels,
324
                use_cache=use_cache,
325
                return_dict=return_dict,
326
            )
327
        finally:
328
            Embedding.__call__ = old_embedding_call
329

330
for k in dir(GPTPromptTuningMixin):
331
    v = getattr(GPTPromptTuningMixin, k)
332
    _v = getattr(UniversalPromptTuningMixin, k, None)
333
    if _v is None or (_v is getattr(object, k, None) and callable(_v) and not isinstance(_v, type)):
334
        setattr(UniversalPromptTuningMixin, k, v)
335

336

337
class AutoPromptTuningLM(UniversalPromptTuningMixin, transformers.AutoModelForCausalLM):
338
    def __init__(self, config):
339
        super().__init__(config)
340

341

342
default_quiet = False
343

344

345
def get_tokenizer(model_id, revision=None) -> transformers.PreTrainedTokenizerBase:
346
    if(os.path.isdir(model_id)):
347
        try:
348
            tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache", use_fast=False)
349
        except Exception as e:
350
            try:
351
                tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache")
352
            except Exception as e:
353
                try:
354
                    tokenizer = GPT2Tokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache")
355
                except Exception as e:
356
                    tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=revision, cache_dir="cache")
357
    elif(os.path.isdir("models/{}".format(model_id.replace('/', '_')))):
358
        try:
359
            tokenizer = AutoTokenizer.from_pretrained("models/{}".format(model_id.replace('/', '_')), revision=revision, cache_dir="cache", use_fast=False)
360
        except Exception as e:
361
            try:
362
                tokenizer = AutoTokenizer.from_pretrained("models/{}".format(model_id.replace('/', '_')), revision=revision, cache_dir="cache")
363
            except Exception as e:
364
                try:
365
                    tokenizer = GPT2Tokenizer.from_pretrained("models/{}".format(model_id.replace('/', '_')), revision=revision, cache_dir="cache")
366
                except Exception as e:
367
                    tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=revision, cache_dir="cache")
368
    else:
369
        try:
370
            tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache", use_fast=False)
371
        except Exception as e:
372
            try:
373
                tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache")
374
            except Exception as e:
375
                try:
376
                    tokenizer = GPT2Tokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache")
377
                except Exception as e:
378
                    tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=revision, cache_dir="cache")
379

380
    @contextlib.contextmanager
381
    def _kai_no_prefix():
382
        add_bos_token = getattr(tokenizer, "add_bos_token", False)
383
        add_prefix_space = getattr(tokenizer, "add_prefix_space", False)
384
        tokenizer.add_bos_token = False
385
        tokenizer.add_prefix_space = False
386
        try:
387
            yield
388
        finally:
389
            tokenizer.add_bos_token = add_bos_token
390
            tokenizer.add_prefix_space = add_prefix_space
391

392
    tokenizer._kai_no_prefix = _kai_no_prefix
393
    return tokenizer
394

395

396
class ConfigurationError(Exception):
397
    def __init__(self, msg: str = "Unknown error", code: int = 1, quiet: Optional[bool] = None):
398
        if quiet is None:
399
            quiet = default_quiet
400
        super().__init__(msg)
401
        self.code = code
402
        self.quiet = quiet
403

404

405
class TrainerBase(abc.ABC):
406
    @abc.abstractmethod
407
    def startup(self, step: int) -> None:
408
        ...
409

410
    @abc.abstractmethod
411
    def get_batch(self, step: int, size: int) -> np.ndarray:
412
        ...
413

414
    @abc.abstractmethod
415
    def get_num_sequences(self) -> int:
416
        ...
417

418
    @abc.abstractmethod
419
    def get_initial_soft_embeddings(self, model: transformers.PreTrainedModel) -> SoftPrompt:
420
        ...
421

422
    @abc.abstractmethod
423
    def tokenize_dataset_callback(self, tokenizer: transformers.PreTrainedTokenizerBase, text: str) -> List[int]:
424
        ...
425

426
    class TrainerData:
427
        def __init__(self):
428
            self.__lazy_load_spec: Optional[dict] = None
429
            self.model_spec: Optional[dict] = None
430
            self.tokenizer_id: Optional[str] = None
431
            self.newlinemode: Optional[str] = None
432
            self.ckpt_path: Optional[str] = None
433
            self.save_file: Optional[str] = None
434
            self.params: Optional[dict] = None
435
            self.stparams: Optional[dict] = None
436
            self.gradient_accumulation_steps = -1
437
            self.soft_in_dim = -1
438
            self.prompt_method = "tokens"
439
            self.prompt_seed = 42
440

441
        @property
442
        def lazy_load_spec(self):
443
            print("WARNING:  `TrainerData.lazy_load_spec` is currently unused", file=sys.stderr)
444
            return self.__lazy_load_spec
445

446
        @lazy_load_spec.setter
447
        def lazy_load_spec(self, value: Optional[dict]):
448
            print("WARNING:  `TrainerData.lazy_load_spec` is currently unused", file=sys.stderr)
449
            self.__lazy_load_spec = value
450

451
        @property
452
        def kaiming_size(self):  # backwards compatibility
453
            return self.soft_in_dim
454

455
        @kaiming_size.setter
456
        def kaiming_size(self, value: int):  # backwards compatibility
457
            self.prompt_method = "kaiming"
458
            self.soft_in_dim = value
459

460
    data: TrainerData
461

462
    def __init__(self, universe: Optional[int] = None, quiet=False):
463
        self.quiet = quiet
464
        self.universe = universe
465
        self.data = self.TrainerData()
466
        self._spmodule: Optional[str] = None
467
        if universe is not None:
468
            print("WARNING:  The `universe` argument of `TrainerBase.__init__` is currently unused", file=sys.stderr)
469

470
    def raise_configuration_error(self, msg, **kwargs):
471
        if "quiet" not in kwargs:
472
            kwargs["quiet"] = self.quiet
473
        raise ConfigurationError(msg, **kwargs)
474
    
475
    def _get_model_config(self) -> transformers.configuration_utils.PretrainedConfig:
476
        REVISION = None
477
        if(os.path.isdir(self.data.ckpt_path)):
478
            model_config     = AutoConfig.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache")
479
        elif(os.path.isdir("models/{}".format(self.data.ckpt_path.replace('/', '_')))):
480
            model_config     = AutoConfig.from_pretrained("models/{}".format(self.data.ckpt_path.replace('/', '_')), revision=REVISION, cache_dir="cache")
481
        else:
482
            model_config     = AutoConfig.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache")
483
        return model_config
484

485
    def get_hf_checkpoint_metadata(self) -> bool:
486
        params = {}
487
        model_config = self._get_model_config()
488
        params["tokenizer_id"] = self.data.ckpt_path
489
        tokenizer = get_tokenizer(self.data.ckpt_path)
490
        params["newlinemode"] = params.get(
491
            "newlinemode", "s" if model_config.model_type == "xglm" else "n"
492
        )
493
        params["max_batch_size"] = 2048
494
        with tokenizer._kai_no_prefix():
495
            params["eos_token"] = (
496
                [50259, 50259] if model_config.model_type == "xglm" and model_config.eos_token_id == 50259 else [model_config.eos_token_id]
497
            )
498
        params["seq"] = 2048
499
        self.data.params = params
500
        return True
501

502
    def get_tokenizer(self) -> transformers.PreTrainedTokenizerBase:
503
        return get_tokenizer(self.data.ckpt_path)
504
    
505
    def save_data(self):
506
        pass
507

508
    def export_to_kobold(self, output_file: str, name: str, author: str, supported: str, description: str):
509
        try:
510
            z = torch.load(self.data.save_file)
511
            assert z["step"] > 0
512
            assert z["tensor"].ndim == 2 and "opt_state" in z
513
            assert z["tensor"].shape[0] < self.data.params["max_batch_size"]
514
            self.data.soft_in_dim = z["tensor"].shape[0]
515
        except AssertionError:
516
            self.raise_configuration_error("MKUSP file is corrupted.", code=14)
517

518
        tensor = z["tensor"]
519

520
        meta = {
521
            "name": name,
522
            "author": author,
523
            "supported": supported,
524
            "description": description,
525
        }
526
        if len(meta["author"].strip()) == 0:
527
            meta.pop("author")
528
        meta["supported"] = list(map(lambda m: m.strip(), supported.split(",")))
529

530
        with zipfile.ZipFile(output_file, "w", compression=zipfile.ZIP_LZMA) as z:
531
            with z.open("tensor.npy", "w") as f:
532
                np.save(f, tensor.detach().cpu().numpy(), allow_pickle=False)
533
        with zipfile.ZipFile(output_file, "a", compression=zipfile.ZIP_STORED) as z:
534
            with z.open("meta.json", "w") as f:
535
                f.write(json.dumps(meta, indent=2).encode("utf-8"))
536

537
    def export_to_mkultra(self, output_file: str, soft_prompt_name: str, soft_prompt_description: str):
538
        try:
539
            z = torch.load(self.data.save_file)
540
            assert z["step"] > 0
541
            assert z["tensor"].ndim == 2 and "opt_state" in z
542
            assert z["tensor"].shape[0] < self.data.params["max_batch_size"]
543
            self.data.soft_in_dim = z["tensor"].shape[0]
544
            _step = z["step"]
545
        except AssertionError:
546
            self.raise_configuration_error("MKUSP file is corrupted.", code=14)
547

548
        tensor = z["tensor"]
549

550
        with open(output_file, "w") as f:
551
            json.dump(
552
                {
553
                    "metadata": {
554
                        "step": _step,
555
                        "loss": float(z["loss"]),
556
                        "uuid": str(uuid.uuid4()),
557
                        "name": soft_prompt_name,
558
                        "description": soft_prompt_description,
559
                        "epoch": datetime.datetime.now().timestamp(),
560
                    },
561
                    "tensor": base64.b64encode(
562
                        pickle.dumps(
563
                            tensor.detach().cpu(),
564
                            protocol=4,
565
                        ),
566
                    ).decode("ascii"),
567
                },
568
                f,
569
            )
570

571
    def tokenize_dataset(
572
        self,
573
        dataset_path: Union[str, TextIO],
574
        output_file: Union[str, TextIO],
575
        batch_size=2048,
576
        epochs=1,
577
        use_ftfy=True,
578
        shuffle_seed: Optional[Union[int, float, str, bytes, bytearray]] = 1729,
579
    ):
580
        dataset_path = dataset_path.replace("\\", "/")
581
        output_file = output_file.replace("\\", "/")
582
        if not isinstance(batch_size, int) or batch_size < 1:
583
            self.raise_configuration_error(
584
                "batch_size must be an integer greater than zero.", code=9
585
            )
586
        if (
587
            not isinstance(epochs, int) and not isinstance(epochs, float)
588
        ) or epochs <= 0:
589
            self.raise_configuration_error(
590
                "epochs must be an int or float greater than zero.", code=10
591
            )
592
        if isinstance(output_file, str) and output_file.endswith("/"):
593
            self.raise_configuration_error(
594
                "output_file should be the path to a file, not a directory.", code=11
595
            )
596
        if isinstance(dataset_path, str) and not os.path.exists(dataset_path):
597
            self.raise_configuration_error(
598
                "dataset_path is not set to a valid file or directory.", code=12
599
            )
600

601
        if use_ftfy:
602
            import ftfy
603

604
        tokenizer = self.get_tokenizer()
605

606
        batch_size = min(
607
            batch_size,
608
            self.data.params["max_batch_size"] - self.data.soft_in_dim,
609
        )
610
        assert batch_size >= 0
611
        print(
612
            termcolor.colored(
613
                "\nIf you see a warning somewhere below about token indices, ignore it.  That warning is normal.\n",
614
                "magenta",
615
            )
616
        )
617
        print("Batch size:", batch_size)
618
        print(termcolor.colored("Tokenizing your dataset...\n", "magenta"))
619

620
        if not isinstance(dataset_path, str):
621
            files = [dataset_path]
622
        elif os.path.isfile(dataset_path):
623
            files = [dataset_path]
624
        else:
625
            files = sorted(
626
                os.path.join(dataset_path, filename)
627
                for filename in os.listdir(dataset_path)
628
            )
629
        if shuffle_seed is not None:
630
            random.Random(shuffle_seed).shuffle(files)
631
        tokens = []
632
        eos = tokenizer.decode(self.data.params["eos_token"])
633
        for path in files:
634
            if isinstance(path, str):
635
                f = open(path)
636
            else:
637
                f = path
638
            try:
639
                text = f.read()
640
                if use_ftfy:
641
                    text = ftfy.fix_text(text)
642
                text = text.replace("<|endoftext|>", eos)
643
                tokens.extend(self.tokenize_dataset_callback(tokenizer, text))
644
            finally:
645
                if isinstance(path, str):
646
                    f.close()
647

648
        print("Dataset size (in tokens):", len(tokens))
649
        if len(tokens) < batch_size + 1:
650
            self.raise_configuration_error(
651
                "Your dataset is too small!  The number of tokens has to be greater than the batch size.  Try increasing the epochs.",
652
                code=13,
653
            )
654
        tail = len(tokens) % (batch_size + 1)
655
        if tail:
656
            print(
657
                f"We're removing the last {tail} tokens from your dataset to make the length a multiple of {batch_size+1}."
658
            )
659
            tokens = tokens[:-tail]
660

661
        tokens = np.array(tokens, dtype=np.uint16).reshape((-1, batch_size + 1))
662
        sequences_per_epoch = tokens.shape[0]
663
        _epochs = math.ceil(epochs)
664
        if _epochs > 1:
665
            rng = np.random.Generator(np.random.PCG64(1729))
666
            tokens = np.concatenate(
667
                (
668
                    tokens,
669
                    *(rng.permutation(tokens, axis=0) for i in range(_epochs - 1)),
670
                ),
671
                axis=0,
672
            )
673
        tokens = tokens[: math.ceil(epochs * sequences_per_epoch)]
674
        print(f"Total sequences in your dataset: {tokens.shape[0]}")
675

676
        if isinstance(output_file, str):
677
            f = open(output_file, "w")
678
        else:
679
            f = output_file
680
        try:
681
            np.save(output_file, tokens)
682
        finally:
683
            if isinstance(output_file, str):
684
                f.close()
685

686
    def train(
687
        self,
688
        breakmodel_primary_device: Optional[Union[str, int, torch.device]] = None,
689
        breakmodel_gpulayers: Optional[List[int]] = None,
690
        breakmodel_disklayers = 0,
691
    ):
692
        if breakmodel_gpulayers is None:
693
            breakmodel_gpulayers = []
694
        if breakmodel_primary_device is None:
695
            breakmodel_primary_device = 0 if sum(x if x >= 0 else 1 for x in breakmodel_gpulayers) else "cpu"
696

697
        if self.data.params is not None and "max_batch_size" not in self.data.params:
698
            self.data.params["max_batch_size"] = 2048
699

700
        if not os.path.exists(self.data.save_file):
701
            print("We are starting a brand new soft-tuning session.\n")
702
            self.startup(step=-1)
703
            if self.data.soft_in_dim <= 0:
704
                self.raise_configuration_error(
705
                    "You have not set a soft prompt size.", code=6
706
                )
707
            step = 0
708
        else:
709
            # If we're resuming a soft-tuning session, the soft prompt tensor is
710
            # already in the save file and we just have to decode it.
711
            try:
712
                z = torch.load(self.data.save_file)
713
                assert z["step"] > 0
714
                assert z["tensor"].ndim == 2 and "opt_state" in z
715
                assert z["tensor"].shape[0] < self.data.params["max_batch_size"]
716
                self.data.soft_in_dim = z["tensor"].shape[0]
717
                step = z["step"]
718
                opt_state = z["opt_state"]
719
            except AssertionError:
720
                self.raise_configuration_error("MKUSP file is corrupted.", code=14)
721
            print(f"We're resuming a previous soft-tuning session at step {step+1}.\n")
722
            self.startup(step=step + 1)
723
            soft_embeddings = z["tensor"]
724

725
        REVISION = None
726

727
        patch_transformers()
728

729
        model: _PromptTuningPreTrainedModel
730

731
        model_config = self._get_model_config()
732
        n_layers = utils.num_layers(model_config)
733
        breakmodel_gpulayers = [x if x >= 0 else n_layers for x in breakmodel_gpulayers]
734

735
        convert_to_float16 = True
736
        hascuda = torch.cuda.is_available()
737
        usegpu = hascuda and not breakmodel_disklayers and len(breakmodel_gpulayers) == 1 and breakmodel_gpulayers[0] == n_layers
738
        gpu_device = breakmodel_primary_device
739
        use_breakmodel = bool(hascuda or breakmodel_disklayers or sum(breakmodel_gpulayers))
740

741
        assert len(breakmodel_gpulayers) <= torch.cuda.device_count()
742
        assert sum(breakmodel_gpulayers) + breakmodel_disklayers <= n_layers
743

744
        breakmodel.gpu_blocks = breakmodel_gpulayers
745
        breakmodel.disk_blocks = breakmodel_disklayers
746
        disk_blocks = breakmodel.disk_blocks
747
        gpu_blocks = breakmodel.gpu_blocks
748
        ram_blocks = ram_blocks = n_layers - sum(gpu_blocks)
749
        cumulative_gpu_blocks = tuple(itertools.accumulate(gpu_blocks))
750

751
        device_list(ram_blocks, primary=breakmodel.primary_device)
752

753
        def lazy_load_callback(model_dict: Dict[str, Union[torch_lazy_loader.LazyTensor, torch.Tensor]], f, **_):
754
            if lazy_load_callback.nested:
755
                return
756
            lazy_load_callback.nested = True
757

758
            device_map: Dict[str, Union[str, int]] = {}
759

760
            @functools.lru_cache(maxsize=None)
761
            def get_original_key(key):
762
                return max((original_key for original_key in utils.module_names if original_key.endswith(key)), key=len)
763

764
            for key, value in model_dict.items():
765
                original_key = get_original_key(key)
766
                if isinstance(value, torch_lazy_loader.LazyTensor) and not any(original_key.startswith(n) for n in utils.layers_module_names):
767
                    device_map[key] = gpu_device if hascuda and usegpu else "cpu" if not hascuda or not use_breakmodel else breakmodel.primary_device
768
                else:
769
                    layer = int(max((n for n in utils.layers_module_names if original_key.startswith(n)), key=len).rsplit(".", 1)[1])
770
                    device = gpu_device if hascuda and usegpu else "disk" if layer < disk_blocks and layer < ram_blocks else "cpu" if not hascuda or not use_breakmodel else "shared" if layer < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, layer - ram_blocks)
771
                    device_map[key] = device
772

773
            if utils.num_shards is None or utils.current_shard == 0:
774
                utils.offload_index = {}
775
                if os.path.isdir("accelerate-disk-cache"):
776
                    # Delete all of the files in the disk cache folder without deleting the folder itself to allow people to create symbolic links for this folder
777
                    # (the folder doesn't contain any subfolders so os.remove will do just fine)
778
                    for filename in os.listdir("accelerate-disk-cache"):
779
                        try:
780
                            os.remove(os.path.join("accelerate-disk-cache", filename))
781
                        except OSError:
782
                            pass
783
                os.makedirs("accelerate-disk-cache", exist_ok=True)
784
                if utils.num_shards is not None:
785
                    num_tensors = len(utils.get_sharded_checkpoint_num_tensors(utils.from_pretrained_model_name, utils.from_pretrained_index_filename, **utils.from_pretrained_kwargs))
786
                else:
787
                    num_tensors = len(device_map)
788
                print(flush=True)
789
                utils.bar = tqdm(total=num_tensors, desc="Loading model tensors", file=Send_to_socketio())
790

791
            with zipfile.ZipFile(f, "r") as z:
792
                try:
793
                    last_storage_key = None
794
                    f = None
795
                    current_offset = 0
796
                    able_to_pin_layers = True
797
                    if utils.num_shards is not None:
798
                        utils.current_shard += 1
799
                    for key in sorted(device_map.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)):
800
                        storage_key = model_dict[key].key
801
                        if storage_key != last_storage_key or model_dict[key].seek_offset < current_offset:
802
                            last_storage_key = storage_key
803
                            if isinstance(f, zipfile.ZipExtFile):
804
                                f.close()
805
                            f = z.open(f"archive/data/{storage_key}")
806
                            current_offset = 0
807
                        if current_offset != model_dict[key].seek_offset:
808
                            f.read(model_dict[key].seek_offset - current_offset)
809
                            current_offset = model_dict[key].seek_offset
810
                        device = device_map[key]
811
                        size = functools.reduce(lambda x, y: x * y, model_dict[key].shape, 1)
812
                        dtype = model_dict[key].dtype
813
                        nbytes = size if dtype is torch.bool else size * ((torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits >> 3)
814
                        #print(f"Transferring <{key}>  to  {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ", end="", flush=True)
815
                        model_dict[key] = model_dict[key].materialize(f, map_location="cpu")
816
                        # if model_dict[key].dtype is torch.float32:
817
                        #     fp32_model = True
818
                        if convert_to_float16 and breakmodel.primary_device != "cpu" and hascuda and (use_breakmodel or usegpu) and model_dict[key].dtype is torch.float32:
819
                            model_dict[key] = model_dict[key].to(torch.float16)
820
                        if breakmodel.primary_device == "cpu" or (not usegpu and not use_breakmodel and model_dict[key].dtype is torch.float16):
821
                            model_dict[key] = model_dict[key].to(torch.float32)
822
                        if device == "shared":
823
                            model_dict[key] = model_dict[key].to("cpu").detach_()
824
                            if able_to_pin_layers:
825
                                try:
826
                                    model_dict[key] = model_dict[key].pin_memory()
827
                                except:
828
                                    able_to_pin_layers = False
829
                        elif device == "disk":
830
                            accelerate.utils.offload_weight(model_dict[key], get_original_key(key), "accelerate-disk-cache", index=utils.offload_index)
831
                            model_dict[key] = model_dict[key].to("meta")
832
                        else:
833
                            model_dict[key] = model_dict[key].to(device)
834
                        #print("OK", flush=True)
835
                        current_offset += nbytes
836
                        utils.bar.update(1)
837
                finally:
838
                    if utils.num_shards is None or utils.current_shard >= utils.num_shards:
839
                        if utils.offload_index:
840
                            for name, tensor in utils.named_buffers:
841
                                if name not in utils.offload_index:
842
                                    accelerate.utils.offload_weight(tensor, name, "accelerate-disk-cache", index=utils.offload_index)
843
                            accelerate.utils.save_offload_index(utils.offload_index, "accelerate-disk-cache")
844
                        utils.bar.close()
845
                        utils.bar = None
846
                    lazy_load_callback.nested = False
847
                    if isinstance(f, zipfile.ZipExtFile):
848
                        f.close()
849

850
        lazy_load_callback.nested = False
851

852
        # Since we're using lazy loader, we need to figure out what the model's hidden layers are called
853
        with torch_lazy_loader.use_lazy_torch_load(dematerialized_modules=True, use_accelerate_init_empty_weights=True):
854
            try:
855
                metamodel = AutoModelForCausalLM.from_config(model_config)
856
            except Exception as e:
857
                metamodel = GPTNeoForCausalLM.from_config(model_config)
858
            utils.layers_module_names = utils.get_layers_module_names(metamodel)
859
            utils.module_names = list(metamodel.state_dict().keys())
860
            utils.named_buffers = list(metamodel.named_buffers(recurse=True))
861

862
        with torch_lazy_loader.use_lazy_torch_load(callback=lazy_load_callback, dematerialized_modules=True):
863
            if(os.path.isdir(self.data.ckpt_path)):
864
                try:
865
                    model     = AutoPromptTuningLM.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache")
866
                except Exception as e:
867
                    if("out of memory" in traceback.format_exc().lower()):
868
                        raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
869
                    model     = GPTNeoPromptTuningLM.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache")
870
            elif(os.path.isdir("models/{}".format(self.data.ckpt_path.replace('/', '_')))):
871
                try:
872
                    model     = AutoPromptTuningLM.from_pretrained("models/{}".format(self.data.ckpt_path.replace('/', '_')), revision=REVISION, cache_dir="cache")
873
                except Exception as e:
874
                    if("out of memory" in traceback.format_exc().lower()):
875
                        raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
876
                    model     = GPTNeoPromptTuningLM.from_pretrained("models/{}".format(self.data.ckpt_path.replace('/', '_')), revision=REVISION, cache_dir="cache")
877
            else:
878
                try:
879
                    model     = AutoPromptTuningLM.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache")
880
                except Exception as e:
881
                    if("out of memory" in traceback.format_exc().lower()):
882
                        raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
883
                    model     = GPTNeoPromptTuningLM.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache")
884

885
        if(hascuda):
886
            if(usegpu):
887
                model = model.half().to(gpu_device)
888
            elif(use_breakmodel):  # Use both RAM and VRAM (breakmodel)
889
                move_model_to_devices(model, usegpu, gpu_device)
890
            elif(__import__("breakmodel").disk_blocks > 0):
891
                move_model_to_devices(model, usegpu, gpu_device)
892
            else:
893
                model = model.to('cpu').float()
894
        elif(__import__("breakmodel").disk_blocks > 0):
895
            move_model_to_devices(model, usegpu, gpu_device)
896
        else:
897
            model.to('cpu').float()
898

899
        if step == 0:
900
            soft_embeddings = self.get_initial_soft_embeddings(model)
901
        else:
902
            soft_embeddings = SoftPrompt.from_inputs_embeds(soft_embeddings)
903
        model.set_soft_prompt(soft_embeddings)
904

905
        steps = self.get_num_sequences() // self.data.gradient_accumulation_steps
906
        warmup_steps = max(1, round(steps * self.data.stparams["warmup"]))
907

908
        beta1: Optional[float] = self.data.stparams.get("beta1", 0.0)
909
        if beta1 == 0.0:
910
            beta1 = None
911
        optimizer = transformers.Adafactor(
912
            params=(model.get_soft_params(),),
913
            scale_parameter=False,
914
            relative_step=False,
915
            warmup_init=False,
916
            lr=self.data.stparams["lr"],
917
            beta1=beta1,
918
            decay_rate=self.data.stparams.get("decay_rate", -0.8),
919
            weight_decay=self.data.stparams.get("weight_decay", 0.1),
920
        )
921
        if step != 0:
922
            optimizer.load_state_dict(opt_state)
923
        scheduler = transformers.get_cosine_with_hard_restarts_schedule_with_warmup(
924
            optimizer=optimizer,
925
            num_warmup_steps=warmup_steps,
926
            num_training_steps=steps - warmup_steps,
927
            num_cycles=(steps - warmup_steps) // self.data.stparams.get("training_steps_per_cycle", 56),
928
        )
929

930
        torch.cuda.empty_cache()
931
        optimizer.state['step'] = step
932
        cross_entropy_loss = CrossEntropyLoss()
933

934
        def save_mkusp(
935
            loss,
936
            grad_norm,
937
        ):
938
            with open(self.data.save_file, "wb") as f:
939
                torch.save(
940
                    {
941
                        "tensor": soft_embeddings.get_inputs_embeds(),
942
                        "opt_state": optimizer.state_dict(),
943
                        "step": step,
944
                        "loss": loss,
945
                        "grad_norm": grad_norm,
946
                    },
947
                    f,
948
                )
949
            self.save_data()
950
        
951
        bar1 = tqdm(initial=step + 1, total=steps, desc="CURRENT TRAINING STEP")
952

953
        while step < steps:
954
            step += 1
955
            model.train()
956

957
            total_loss = total_grad = total_grad_norm = 0
958

959
            # Get the next sequences from the dataset
960
            block = torch.tensor(np.int32(self.get_batch(step, self.data.gradient_accumulation_steps))).to(model.transformer.wte.weight.device)
961

962
            for sequence in tqdm(block, desc="GRADIENT ACCUMULATION", leave=False):
963
                # input_ids is the context to the model (without the soft prompt) and labels is what we expect the model to generate (the -100s represent soft prompt tokens for which loss is not calculated)
964
                input_ids = sequence[:-1].unsqueeze(0).detach()
965
                labels = torch.cat((torch.full((model.get_soft_params().size(0) - 1,), -100, device=sequence.device), sequence), dim=-1).unsqueeze(0).detach()
966

967
                # Give the context to the model and compare the model's output logits with the labels to compute the loss
968
                logits = model(input_ids=input_ids, labels=input_ids).logits
969
                loss: torch.Tensor = cross_entropy_loss(logits.view(-1, model.transformer.wte.weight.size(0)), labels.view(-1))
970
                total_loss += loss.detach()
971

972
                # Compute the gradient of the loss function and add it to model.get_soft_params().grad (model.get_soft_params().grad += gradient)
973
                loss.backward()
974

975
                total_grad_norm += torch.linalg.norm(model.get_soft_params().grad.detach() - total_grad)
976
                total_grad = model.get_soft_params().grad.detach()
977

978
                del input_ids
979
                del labels
980
                del logits
981
                torch.cuda.empty_cache()
982

983
            mean_loss = (total_loss / self.data.gradient_accumulation_steps).item()
984
            mean_grad_norm = (total_grad_norm / self.data.gradient_accumulation_steps).item()
985

986
            # Apply the optimization algorithm using the accumulated gradients, which changes the contents of the soft prompt matrix very slightly to reduce the loss
987
            optimizer.step()
988
            lr = optimizer.param_groups[0]["lr"]
989
            scheduler.step()
990
            optimizer.zero_grad()
991

992
            # Save checkpoint every few steps
993
            if step == 1 or step % self.data.stparams["save_every"] == 0:
994
                save_mkusp(mean_loss, mean_grad_norm)
995

996
            bar1.set_postfix({"loss": mean_loss, "grad_norm": mean_grad_norm, "learning_rate": lr})
997
            bar1.update()
998

999

1000
class BasicTrainer(TrainerBase):
1001
    class TrainerData(TrainerBase.TrainerData):
1002
        def __init__(self):
1003
            super().__init__()
1004
            self.dataset_file: Optional[str] = None
1005
            self.initial_softprompt: Optional[List[int]] = None
1006

1007
    data: "BasicTrainer.TrainerData"
1008

1009
    def __init__(self, *args, **kwargs):
1010
        super().__init__(*args, **kwargs)
1011
        self.dataset: Optional[np.ndarray] = None
1012

1013
    def startup(self, step: int) -> None:
1014
        if self.get_num_sequences() < self.data.gradient_accumulation_steps:
1015
            self.raise_configuration_error(
1016
                "Your dataset is too small!  gradient_accumulation_steps must be less than or equal to the number of sequences.",
1017
                code=101,
1018
            )
1019
        if (
1020
            self.data.prompt_method == "tokens"
1021
            and step < 0
1022
            and self.data.initial_softprompt is None
1023
        ):
1024
            self.raise_configuration_error(
1025
                "You have not set an initial soft prompt string.", code=103
1026
            )
1027
        if self.data.prompt_method == "tokens" and step < 0:
1028
            self.data.soft_in_dim = len(self.data.initial_softprompt)
1029

1030
    def get_batch(self, step: int, size: int) -> np.ndarray:
1031
        return self.dataset[(step - 1) * size : step * size]
1032

1033
    def get_num_sequences(self) -> int:
1034
        if self.dataset is None:
1035
            if self.data.dataset_file is None or not os.path.exists(
1036
                self.data.dataset_file
1037
            ):
1038
                self.raise_configuration_error(
1039
                    f"Dataset file not found at {repr(self.data.dataset_file)}",
1040
                    code=102,
1041
                )
1042
            self.dataset = np.load(self.data.dataset_file, mmap_mode="r")
1043
        assert self.dataset.ndim >= 2
1044
        assert self.dataset.shape[0] >= 2
1045
        return self.dataset.shape[0]
1046

1047
    def get_initial_soft_embeddings(self, model: transformers.PreTrainedModel) -> SoftPrompt:
1048
        if self.data.prompt_method == "vocab_sample":
1049
            rng = np.random.Generator(
1050
                np.random.PCG64(
1051
                    [
1052
                        self.data.prompt_seed,
1053
                        int.from_bytes(hashlib.sha256(model.config.model_type.encode("utf8")).digest()[:4], "little"),
1054
                    ]
1055
                )
1056
            )
1057
            tokenizer = self.get_tokenizer()
1058
            with tokenizer._kai_no_prefix():
1059
                special_tokens = set(
1060
                    itertools.chain.from_iterable(
1061
                        tokenizer.encode(str(v))
1062
                        for v in tokenizer.special_tokens_map_extended.values()
1063
                    )
1064
                )
1065
            sample_space = [
1066
                k for k in range(model.get_input_embeddings().weight.shape[-2]) if k not in special_tokens
1067
            ]
1068
            sample = rng.choice(sample_space, self.data.soft_in_dim, False)
1069
            return SoftPrompt.from_inputs_embeds(model.get_input_embeddings()(torch.tensor(sample, dtype=torch.int32, device=model.get_input_embeddings().weight.device)))
1070
        elif self.data.prompt_method == "tokens":
1071
            return SoftPrompt.from_inputs_embeds(model.get_input_embeddings()(torch.tensor(self.data.initial_softprompt, dtype=torch.int32, device=model.get_input_embeddings().weight.device)))
1072
        self.raise_configuration_error(
1073
            f"Unknown prompt method {repr(self.data.prompt_method)}", code=104
1074
        )
1075

1076
    def tokenize_dataset_callback(
1077
        self, tokenizer: transformers.PreTrainedTokenizerBase, text: str
1078
    ) -> List[int]:
1079
        if self.data.newlinemode == "s":
1080
            text = text.replace("\n", "</s>")
1081
        with tokenizer._kai_no_prefix():
1082
            return tokenizer.encode(text) + self.data.params["eos_token"]
1083

1084
Product

Resources

Company