CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
huggingface

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

GitHub Repository: huggingface/notebooks
Path: blob/main/sagemaker/24_train_bloom_peft_lora/scripts/inference.py
Views: 2548
1
from transformers import AutoModelForCausalLM, AutoTokenizer
2
import torch
3
4
5
def model_fn(model_dir):
6
# load model and processor from model_dir
7
model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", load_in_8bit=True)
8
tokenizer = AutoTokenizer.from_pretrained(model_dir)
9
10
return model, tokenizer
11
12
13
def predict_fn(data, model_and_tokenizer):
14
# unpack model and tokenizer
15
model, tokenizer = model_and_tokenizer
16
17
# process input
18
inputs = data.pop("inputs", data)
19
parameters = data.pop("parameters", None)
20
21
# preprocess
22
input_ids = tokenizer(inputs, return_tensors="pt").input_ids.to(model.device)
23
24
# pass inputs with all kwargs in data
25
if parameters is not None:
26
outputs = model.generate(input_ids, **parameters)
27
else:
28
outputs = model.generate(input_ids)
29
30
# postprocess the prediction
31
prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
32
33
return [{"generated_text": prediction}]
34
35
36