CoCalc -- base

GitHub Repository: yiming-wange/cs224n-2023-solution
Path: blob/main/minBERT/base_bert.py
⁹⁸⁴ views
1
import re
2
from torch import device, dtype
3
from config import BertConfig, PretrainedConfig
4
from utils import *
5

6

7
class BertPreTrainedModel(nn.Module):
8
  config_class = BertConfig
9
  base_model_prefix = "bert"
10
  _keys_to_ignore_on_load_missing = [r"position_ids"]
11
  _keys_to_ignore_on_load_unexpected = None
12

13
  def __init__(self, config: PretrainedConfig, *inputs, **kwargs):
14
    super().__init__()
15
    self.config = config
16
    self.name_or_path = config.name_or_path
17

18
  def init_weights(self):
19
    # Initialize weights
20
    self.apply(self._init_weights)
21

22
  def _init_weights(self, module):
23
    """ Initialize the weights """
24
    if isinstance(module, (nn.Linear, nn.Embedding)):
25
      # Slightly different from the TF version which uses truncated_normal for initialization
26
      # cf https://github.com/pytorch/pytorch/pull/5617
27
      module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
28
    elif isinstance(module, nn.LayerNorm):
29
      module.bias.data.zero_()
30
      module.weight.data.fill_(1.0)
31
    if isinstance(module, nn.Linear) and module.bias is not None:
32
      module.bias.data.zero_()
33

34
  @property
35
  def dtype(self) -> dtype:
36
    return get_parameter_dtype(self)
37

38
  @classmethod
39
  def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], *model_args, **kwargs):
40
    config = kwargs.pop("config", None)
41
    state_dict = kwargs.pop("state_dict", None)
42
    cache_dir = kwargs.pop("cache_dir", None)
43
    force_download = kwargs.pop("force_download", False)
44
    resume_download = kwargs.pop("resume_download", False)
45
    proxies = kwargs.pop("proxies", None)
46
    output_loading_info = kwargs.pop("output_loading_info", False)
47
    local_files_only = kwargs.pop("local_files_only", False)
48
    use_auth_token = kwargs.pop("use_auth_token", None)
49
    revision = kwargs.pop("revision", None)
50
    mirror = kwargs.pop("mirror", None)
51

52
    # Load config if we don't provide a configuration
53
    if not isinstance(config, PretrainedConfig):
54
      config_path = config if config is not None else pretrained_model_name_or_path
55
      config, model_kwargs = cls.config_class.from_pretrained(
56
        config_path,
57
        *model_args,
58
        cache_dir=cache_dir,
59
        return_unused_kwargs=True,
60
        force_download=force_download,
61
        resume_download=resume_download,
62
        proxies=proxies,
63
        local_files_only=local_files_only,
64
        use_auth_token=use_auth_token,
65
        revision=revision,
66
        **kwargs,
67
      )
68
    else:
69
      model_kwargs = kwargs
70

71
    # Load model
72
    if pretrained_model_name_or_path is not None:
73
      pretrained_model_name_or_path = str(pretrained_model_name_or_path)
74
      if os.path.isdir(pretrained_model_name_or_path):
75
        # Load from a PyTorch checkpoint
76
        archive_file = os.path.join(pretrained_model_name_or_path, WEIGHTS_NAME)
77
      elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
78
        archive_file = pretrained_model_name_or_path
79
      else:
80
        archive_file = hf_bucket_url(
81
          pretrained_model_name_or_path,
82
          filename=WEIGHTS_NAME,
83
          revision=revision,
84
          mirror=mirror,
85
        )
86
      try:
87
        # Load from URL or cache if already cached
88
        resolved_archive_file = cached_path(
89
          archive_file,
90
          cache_dir=cache_dir,
91
          force_download=force_download,
92
          proxies=proxies,
93
          resume_download=resume_download,
94
          local_files_only=local_files_only,
95
          use_auth_token=use_auth_token,
96
        )
97
      except EnvironmentError as err:
98
        #logger.error(err)
99
        msg = (
100
          f"Can't load weights for '{pretrained_model_name_or_path}'. Make sure that:\n\n"
101
          f"- '{pretrained_model_name_or_path}' is a correct model identifier listed on 'https://huggingface.co/models'\n\n"
102
          f"- or '{pretrained_model_name_or_path}' is the correct path to a directory containing a file named one of {WEIGHTS_NAME}.\n\n"
103
        )
104
        raise EnvironmentError(msg)
105
    else:
106
      resolved_archive_file = None
107

108
    config.name_or_path = pretrained_model_name_or_path
109

110
    # Instantiate model.
111
    model = cls(config, *model_args, **model_kwargs)
112

113
    if state_dict is None:
114
      try:
115
        state_dict = torch.load(resolved_archive_file, map_location="cpu")
116
      except Exception:
117
        raise OSError(
118
          f"Unable to load weights from pytorch checkpoint file for '{pretrained_model_name_or_path}' "
119
          f"at '{resolved_archive_file}'"
120
        )
121

122
    missing_keys = []
123
    unexpected_keys = []
124
    error_msgs = []
125

126
    # Convert old format to new format if needed from a PyTorch state_dict
127
    old_keys = []
128
    new_keys = []
129
    m = {'embeddings.word_embeddings': 'word_embedding',
130
         'embeddings.position_embeddings': 'pos_embedding',
131
         'embeddings.token_type_embeddings': 'tk_type_embedding',
132
         'embeddings.LayerNorm': 'embed_layer_norm',
133
         'embeddings.dropout': 'embed_dropout',
134
         'encoder.layer': 'bert_layers',
135
         'pooler.dense': 'pooler_dense',
136
         'pooler.activation': 'pooler_af',
137
         'attention.self': "self_attention",
138
         'attention.output.dense': 'attention_dense',
139
         'attention.output.LayerNorm': 'attention_layer_norm',
140
         'attention.output.dropout': 'attention_dropout',
141
         'intermediate.dense': 'interm_dense',
142
         'intermediate.intermediate_act_fn': 'interm_af',
143
         'output.dense': 'out_dense',
144
         'output.LayerNorm': 'out_layer_norm',
145
         'output.dropout': 'out_dropout'}
146

147
    for key in state_dict.keys():
148
      new_key = None
149
      if "gamma" in key:
150
        new_key = key.replace("gamma", "weight")
151
      if "beta" in key:
152
        new_key = key.replace("beta", "bias")
153
      for x, y in m.items():
154
        if new_key is not None:
155
          _key = new_key
156
        else:
157
          _key = key
158
        if x in key:
159
          new_key = _key.replace(x, y)
160
      if new_key:
161
        old_keys.append(key)
162
        new_keys.append(new_key)
163

164
    for old_key, new_key in zip(old_keys, new_keys):
165
      # print(old_key, new_key)
166
      state_dict[new_key] = state_dict.pop(old_key)
167

168
    # copy state_dict so _load_from_state_dict can modify it
169
    metadata = getattr(state_dict, "_metadata", None)
170
    state_dict = state_dict.copy()
171
    if metadata is not None:
172
      state_dict._metadata = metadata
173

174
    your_bert_params = [f"bert.{x[0]}" for x in model.named_parameters()]
175
    for k in state_dict:
176
      if k not in your_bert_params and not k.startswith("cls."):
177
        possible_rename = [x for x in k.split(".")[1:-1] if x in m.values()]
178
        raise ValueError(f"{k} cannot be reload to your model, one/some of {possible_rename} we provided have been renamed")
179

180
    # PyTorch's `_load_from_state_dict` does not copy parameters in a module's descendants
181
    # so we need to apply the function recursively.
182
    def load(module: nn.Module, prefix=""):
183
      local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {})
184
      module._load_from_state_dict(
185
        state_dict,
186
        prefix,
187
        local_metadata,
188
        True,
189
        missing_keys,
190
        unexpected_keys,
191
        error_msgs,
192
      )
193
      for name, child in module._modules.items():
194
        if child is not None:
195
          load(child, prefix + name + ".")
196

197
    # Make sure we are able to load base models as well as derived models (with heads)
198
    start_prefix = ""
199
    model_to_load = model
200
    has_prefix_module = any(s.startswith(cls.base_model_prefix) for s in state_dict.keys())
201
    if not hasattr(model, cls.base_model_prefix) and has_prefix_module:
202
      start_prefix = cls.base_model_prefix + "."
203
    if hasattr(model, cls.base_model_prefix) and not has_prefix_module:
204
      model_to_load = getattr(model, cls.base_model_prefix)
205
    load(model_to_load, prefix=start_prefix)
206

207
    if model.__class__.__name__ != model_to_load.__class__.__name__:
208
      base_model_state_dict = model_to_load.state_dict().keys()
209
      head_model_state_dict_without_base_prefix = [
210
        key.split(cls.base_model_prefix + ".")[-1] for key in model.state_dict().keys()
211
      ]
212
      missing_keys.extend(head_model_state_dict_without_base_prefix - base_model_state_dict)
213

214
    # Some models may have keys that are not in the state by design, removing them before needlessly warning
215
    # the user.
216
    if cls._keys_to_ignore_on_load_missing is not None:
217
      for pat in cls._keys_to_ignore_on_load_missing:
218
        missing_keys = [k for k in missing_keys if re.search(pat, k) is None]
219

220
    if cls._keys_to_ignore_on_load_unexpected is not None:
221
      for pat in cls._keys_to_ignore_on_load_unexpected:
222
        unexpected_keys = [k for k in unexpected_keys if re.search(pat, k) is None]
223

224
    if len(error_msgs) > 0:
225
      raise RuntimeError(
226
        "Error(s) in loading state_dict for {}:\n\t{}".format(
227
          model.__class__.__name__, "\n\t".join(error_msgs)
228
        )
229
      )
230

231
    # Set model in evaluation mode to deactivate DropOut modules by default
232
    model.eval()
233

234
    if output_loading_info:
235
      loading_info = {
236
        "missing_keys": missing_keys,
237
        "unexpected_keys": unexpected_keys,
238
        "error_msgs": error_msgs,
239
      }
240
      return model, loading_info
241

242
    if hasattr(config, "xla_device") and config.xla_device and is_torch_tpu_available():
243
      import torch_xla.core.xla_model as xm
244

245
      model = xm.send_cpu_data_to_device(model, xm.xla_device())
246
      model.to(xm.xla_device())
247

248
    return model
249

250
Product

Resources

Company