Path: blob/master/modules/sd_hijack_open_clip.py
4923 views
import open_clip.tokenizer1import torch23from modules import sd_hijack_clip, devices4from modules.shared import opts56tokenizer = open_clip.tokenizer._tokenizer789class FrozenOpenCLIPEmbedderWithCustomWords(sd_hijack_clip.FrozenCLIPEmbedderWithCustomWordsBase):10def __init__(self, wrapped, hijack):11super().__init__(wrapped, hijack)1213self.comma_token = [v for k, v in tokenizer.encoder.items() if k == ',</w>'][0]14self.id_start = tokenizer.encoder["<start_of_text>"]15self.id_end = tokenizer.encoder["<end_of_text>"]16self.id_pad = 01718def tokenize(self, texts):19assert not opts.use_old_emphasis_implementation, 'Old emphasis implementation not supported for Open Clip'2021tokenized = [tokenizer.encode(text) for text in texts]2223return tokenized2425def encode_with_transformers(self, tokens):26# set self.wrapped.layer_idx here according to opts.CLIP_stop_at_last_layers27z = self.wrapped.encode_with_transformer(tokens)2829return z3031def encode_embedding_init_text(self, init_text, nvpt):32ids = tokenizer.encode(init_text)33ids = torch.asarray([ids], device=devices.device, dtype=torch.int)34embedded = self.wrapped.model.token_embedding.wrapped(ids).squeeze(0)3536return embedded373839class FrozenOpenCLIPEmbedder2WithCustomWords(sd_hijack_clip.FrozenCLIPEmbedderWithCustomWordsBase):40def __init__(self, wrapped, hijack):41super().__init__(wrapped, hijack)4243self.comma_token = [v for k, v in tokenizer.encoder.items() if k == ',</w>'][0]44self.id_start = tokenizer.encoder["<start_of_text>"]45self.id_end = tokenizer.encoder["<end_of_text>"]46self.id_pad = 04748def tokenize(self, texts):49assert not opts.use_old_emphasis_implementation, 'Old emphasis implementation not supported for Open Clip'5051tokenized = [tokenizer.encode(text) for text in texts]5253return tokenized5455def encode_with_transformers(self, tokens):56d = self.wrapped.encode_with_transformer(tokens)57z = d[self.wrapped.layer]5859pooled = d.get("pooled")60if pooled is not None:61z.pooled = pooled6263return z6465def encode_embedding_init_text(self, init_text, nvpt):66ids = tokenizer.encode(init_text)67ids = torch.asarray([ids], device=devices.device, dtype=torch.int)68embedded = self.wrapped.model.token_embedding.wrapped(ids.to(self.wrapped.model.token_embedding.wrapped.weight.device)).squeeze(0)6970return embedded717273