Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
labmlai
GitHub Repository: labmlai/annotated_deep_learning_paper_implementations
Path: blob/master/labml_nn/neox/tokenizer.py
4918 views
1
"""
2
---
3
title: GPT-NeoX Tokenizer
4
summary: >
5
Loads the GPT-NeoX tokenizer
6
---
7
8
# GPT-NeoX Tokenizer
9
10
This initializes a Hugging Face tokenizer from the downloaded vocabulary.
11
"""
12
13
from tokenizers import Tokenizer
14
15
from labml import lab, monit
16
17
18
@monit.func('Load NeoX Tokenizer')
19
def get_tokenizer() -> Tokenizer:
20
"""
21
### Load NeoX Tokenizer
22
23
:return: the tokenizer
24
"""
25
vocab_file = lab.get_data_path() / 'neox' / 'slim_weights' / '20B_tokenizer.json'
26
tokenizer = Tokenizer.from_file(str(vocab_file))
27
28
return tokenizer
29
30