CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
huggingface

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

GitHub Repository: huggingface/notebooks
Path: blob/main/examples/idefics/inference.py
Views: 2542
1
# this is a demo of inference of IDEFICS-9B which needs about 20GB of GPU memory
2
3
import torch
4
from transformers import IdeficsForVisionText2Text, AutoProcessor
5
6
device = "cuda" if torch.cuda.is_available() else "cpu"
7
8
checkpoint = "HuggingFaceM4/idefics-9b"
9
#checkpoint = "HuggingFaceM4/tiny-random-idefics"
10
11
model = IdeficsForVisionText2Text.from_pretrained(checkpoint, torch_dtype=torch.bfloat16).to(device)
12
processor = AutoProcessor.from_pretrained(checkpoint)
13
14
url = "https://hips.hearstapps.com/hmg-prod/images/cute-photos-of-cats-in-grass-1593184777.jpg"
15
image = processor.image_processor.fetch_images(url)
16
17
prompts = [
18
[
19
"User:",
20
image,
21
"Describe this image.\nAssistant: An image of two kittens in grass.\n",
22
"User:",
23
"https://hips.hearstapps.com/hmg-prod/images/dog-puns-1581708208.jpg",
24
"Describe this image.\nAssistant:",
25
],
26
[
27
"User:",
28
"https://hips.hearstapps.com/hmg-prod/images/dog-puns-1581708208.jpg",
29
"Describe this image.\nAssistant: An image of a dog wearing funny glasses.\n",
30
"User:",
31
"https://hips.hearstapps.com/hmg-prod/images/cute-photos-of-cats-in-grass-1593184777.jpg",
32
"Describe this image.\nAssistant:",
33
],
34
[
35
"User:",
36
image,
37
"Describe this image.\nAssistant: An image of two kittens in grass.\n",
38
"User:",
39
"https://huggingface.co/datasets/hf-internal-testing/fixtures_nlvr2/raw/main/image1.jpeg",
40
"Describe this image.\nAssistant:",
41
],
42
[
43
"User:",
44
"https://huggingface.co/datasets/hf-internal-testing/fixtures_nlvr2/raw/main/image2.jpeg",
45
"Describe this image.\nAssistant: An image of a dog.\n",
46
"User:",
47
"https://huggingface.co/datasets/hf-internal-testing/fixtures_nlvr2/raw/main/image1.jpeg",
48
"Describe this image.\nAssistant:",
49
],
50
]
51
52
# batched mode
53
inputs = processor(prompts, return_tensors="pt").to(device)
54
# single sample mode
55
#inputs = processor(prompts[0], return_tensors="pt").to(device)
56
57
generated_ids = model.generate(**inputs, max_length=128)
58
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
59
for i,t in enumerate(generated_text):
60
print(f"{i}:\n{t}\n")
61
62