CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
huggingface

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

GitHub Repository: huggingface/notebooks
Path: blob/main/examples/idefics/inference_4bit.py
Views: 2542
1
# this is a demo of inference of IDEFICS-9B using 4bit-quantization which needs about 7GB of GPU memory
2
# which makes it possible to run even on Google Colab
3
4
import torch
5
from transformers import IdeficsForVisionText2Text, AutoProcessor, BitsAndBytesConfig
6
7
device = "cuda" if torch.cuda.is_available() else "cpu"
8
9
checkpoint = "HuggingFaceM4/idefics-9b"
10
#checkpoint = "HuggingFaceM4/tiny-random-idefics"
11
12
quantization_config = BitsAndBytesConfig(
13
load_in_4bit=True,
14
bnb_4bit_compute_dtype="float16",
15
)
16
model = IdeficsForVisionText2Text.from_pretrained(checkpoint, quantization_config=quantization_config, device_map="auto")
17
processor = AutoProcessor.from_pretrained(checkpoint)
18
19
prompts = [
20
"Instruction: provide an answer to the question. Use the image to answer.\n",
21
"https://hips.hearstapps.com/hmg-prod/images/cute-photos-of-cats-in-grass-1593184777.jpg",
22
"Question: What's on the picture? Answer: \n"
23
]
24
25
inputs = processor(prompts, return_tensors="pt")
26
generated_ids = model.generate(**inputs, max_length=150)
27
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
28
print(generated_text[0])
29
30