Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
numba
GitHub Repository: numba/llvmlite
Path: blob/main/examples/npm_pipeline.py
1154 views
1
"""
2
This example demonstrates how to optimize a module with a module pass manager
3
pre-populated with passes according to a given optimization level.
4
5
The optimized module is executed using the MCJIT bindings.
6
"""
7
8
from ctypes import CFUNCTYPE, c_int, POINTER
9
import faulthandler
10
import llvmlite.binding as llvm
11
12
import numpy as np
13
14
# Dump Python traceback in the event of a segfault
15
faulthandler.enable()
16
17
# All are required to initialize LLVM
18
llvm.initialize_native_target()
19
llvm.initialize_native_asmprinter()
20
21
# Module to optimize and execute
22
strmod = """
23
; ModuleID = '<string>'
24
source_filename = "<string>"
25
target triple = "unknown-unknown-unknown"
26
27
define i32 @sum(i32* %.1, i32 %.2) {
28
.4:
29
br label %.5
30
31
.5: ; preds = %.5, %.4
32
%.8 = phi i32 [ 0, %.4 ], [ %.13, %.5 ]
33
%.9 = phi i32 [ 0, %.4 ], [ %.12, %.5 ]
34
%.10 = getelementptr i32, i32* %.1, i32 %.8
35
%.11 = load i32, i32* %.10, align 4
36
%.12 = add i32 %.9, %.11
37
%.13 = add i32 %.8, 1
38
%.14 = icmp ult i32 %.13, %.2
39
br i1 %.14, label %.5, label %.6
40
41
.6: ; preds = %.5
42
ret i32 %.12
43
}
44
"""
45
46
47
module = llvm.parse_assembly(strmod)
48
49
print("Module before optimization:\n")
50
print(module)
51
52
# Create a ModulePassManager for speed optimization level 3
53
target_machine = llvm.Target.from_default_triple().create_target_machine()
54
pto = llvm.create_pipeline_tuning_options(speed_level=3)
55
pb = llvm.create_pass_builder(target_machine, pto)
56
pm = pb.getModulePassManager()
57
58
# Run the optimization pipeline on the module
59
pm.run(module, pb)
60
61
# O3 optimization will likely have vectorized the loop. The resulting code will
62
# be more complex, but more performant.
63
print("\nModule after optimization:\n")
64
print(module)
65
66
with llvm.create_mcjit_compiler(module, target_machine) as ee:
67
# Generate code and get a pointer to it for calling
68
ee.finalize_object()
69
cfptr = ee.get_function_address("sum")
70
71
# We should also observe vector instructions in the generated assembly
72
print("\nAssembly code generated from module\n")
73
print(target_machine.emit_assembly(module))
74
75
# Create an array of integers and call our optimized sum function with them
76
cfunc = CFUNCTYPE(c_int, POINTER(c_int), c_int)(cfptr)
77
A = np.arange(10, dtype=np.int32)
78
res = cfunc(A.ctypes.data_as(POINTER(c_int)), A.size)
79
80
# Print results, which should be identical
81
print(f"Result of executing the optimized function: {res}")
82
print(f"Expected result: {A.sum()}")
83
84
# Sanity check
85
np.testing.assert_equal(res, A.sum())
86
print("Success!")
87
88