CoCalc -- npm_pipeline.py

GitHub Repository: numba/llvmlite
Path: blob/main/examples/npm_pipeline.py
¹¹⁵⁴ views
1
"""
2
This example demonstrates how to optimize a module with a module pass manager
3
pre-populated with passes according to a given optimization level.
4

5
The optimized module is executed using the MCJIT bindings.
6
"""
7

8
from ctypes import CFUNCTYPE, c_int, POINTER
9
import faulthandler
10
import llvmlite.binding as llvm
11

12
import numpy as np
13

14
# Dump Python traceback in the event of a segfault
15
faulthandler.enable()
16

17
# All are required to initialize LLVM
18
llvm.initialize_native_target()
19
llvm.initialize_native_asmprinter()
20

21
# Module to optimize and execute
22
strmod = """
23
; ModuleID = '<string>'
24
source_filename = "<string>"
25
target triple = "unknown-unknown-unknown"
26

27
define i32 @sum(i32* %.1, i32 %.2) {
28
.4:
29
  br label %.5
30

31
.5:                                               ; preds = %.5, %.4
32
  %.8 = phi i32 [ 0, %.4 ], [ %.13, %.5 ]
33
  %.9 = phi i32 [ 0, %.4 ], [ %.12, %.5 ]
34
  %.10 = getelementptr i32, i32* %.1, i32 %.8
35
  %.11 = load i32, i32* %.10, align 4
36
  %.12 = add i32 %.9, %.11
37
  %.13 = add i32 %.8, 1
38
  %.14 = icmp ult i32 %.13, %.2
39
  br i1 %.14, label %.5, label %.6
40

41
.6:                                               ; preds = %.5
42
  ret i32 %.12
43
}
44
"""
45

46

47
module = llvm.parse_assembly(strmod)
48

49
print("Module before optimization:\n")
50
print(module)
51

52
# Create a ModulePassManager for speed optimization level 3
53
target_machine = llvm.Target.from_default_triple().create_target_machine()
54
pto = llvm.create_pipeline_tuning_options(speed_level=3)
55
pb = llvm.create_pass_builder(target_machine, pto)
56
pm = pb.getModulePassManager()
57

58
# Run the optimization pipeline on the module
59
pm.run(module, pb)
60

61
# O3 optimization will likely have vectorized the loop. The resulting code will
62
# be more complex, but more performant.
63
print("\nModule after optimization:\n")
64
print(module)
65

66
with llvm.create_mcjit_compiler(module, target_machine) as ee:
67
    # Generate code and get a pointer to it for calling
68
    ee.finalize_object()
69
    cfptr = ee.get_function_address("sum")
70

71
    # We should also observe vector instructions in the generated assembly
72
    print("\nAssembly code generated from module\n")
73
    print(target_machine.emit_assembly(module))
74

75
    # Create an array of integers and call our optimized sum function with them
76
    cfunc = CFUNCTYPE(c_int, POINTER(c_int), c_int)(cfptr)
77
    A = np.arange(10, dtype=np.int32)
78
    res = cfunc(A.ctypes.data_as(POINTER(c_int)), A.size)
79

80
    # Print results, which should be identical
81
    print(f"Result of executing the optimized function: {res}")
82
    print(f"Expected result: {A.sum()}")
83

84
    # Sanity check
85
    np.testing.assert_equal(res, A.sum())
86
    print("Success!")
87

88
Product

Resources

Company