CoCalc -- heuristicstat.py

GitHub Repository: Roblox/luau
Path: blob/master/tools/heuristicstat.py
²⁷²³ views
1
#!/usr/bin/python3
2
# This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
3

4
import argparse
5
import json
6
from collections import Counter
7
import pandas as pd
8
## needed for 'to_markdown' method for pandas data frame
9
import tabulate
10

11

12
def getArgs():
13
    parser = argparse.ArgumentParser(description='Analyze compiler statistics')
14
    parser.add_argument('--bytecode-bin-factor', dest='bytecodeBinFactor',default=10,help='Bytecode bin size as a multiple of 1000 (10 by default)')
15
    parser.add_argument('--block-bin-factor', dest='blockBinFactor',default=1,help='Block bin size as a multiple of 1000 (1 by default)')
16
    parser.add_argument('--block-instruction-bin-factor', dest='blockInstructionBinFactor',default=1,help='Block bin size as a multiple of 1000 (1 by default)')
17
    parser.add_argument('statsFile', help='stats.json file generated by running luau-compile')
18
    args = parser.parse_args()
19
    return args
20

21
def readStats(statsFile):
22
    with open(statsFile) as f:
23
        stats = json.load(f)
24

25
        scripts = []
26
        functionCounts = []
27
        bytecodeLengths = []
28
        blockPreOptCounts = []
29
        blockPostOptCounts = []
30
        maxBlockInstructionCounts = []
31

32
        for path, fileStat in stats.items():
33
            scripts.append(path)
34
            functionCounts.append(fileStat['lowerStats']['totalFunctions'] - fileStat['lowerStats']['skippedFunctions'])
35
            bytecodeLengths.append(fileStat['bytecode'])
36
            blockPreOptCounts.append(fileStat['lowerStats']['blocksPreOpt'])
37
            blockPostOptCounts.append(fileStat['lowerStats']['blocksPostOpt'])
38
            maxBlockInstructionCounts.append(fileStat['lowerStats']['maxBlockInstructions'])
39

40
        stats_df = pd.DataFrame({
41
            'Script': scripts,
42
            'FunctionCount': functionCounts,
43
            'BytecodeLength': bytecodeLengths,
44
            'BlockPreOptCount': blockPreOptCounts,
45
            'BlockPostOptCount': blockPostOptCounts,
46
            'MaxBlockInstructionCount': maxBlockInstructionCounts
47
        })
48

49
        return stats_df
50

51

52
def analyzeBytecodeStats(stats_df, config):
53
    binFactor = config.bytecodeBinFactor
54
    divisor = binFactor * 1000
55
    totalScriptCount = len(stats_df.index)
56

57
    lengthLabels = []
58
    scriptCounts = []
59
    scriptPercs = []
60

61
    counter = Counter()
62

63
    for index, row in stats_df.iterrows():
64
        value = row['BytecodeLength']
65
        factor = int(value / divisor)
66
        counter[factor] += 1
67

68
    for factor, scriptCount in sorted(counter.items()):
69
        left = factor * binFactor
70
        right = left + binFactor
71
        lengthLabel = '{left}K-{right}K'.format(left=left, right=right)
72
        lengthLabels.append(lengthLabel)
73
        scriptCounts.append(scriptCount)
74
        scriptPerc = round(scriptCount * 100 / totalScriptCount, 1)
75
        scriptPercs.append(scriptPerc)
76

77
    bcode_df = pd.DataFrame({
78
        'BytecodeLength': lengthLabels,
79
        'ScriptCount': scriptCounts,
80
        'ScriptPerc': scriptPercs
81
    })
82

83
    return bcode_df
84

85

86
def analyzeBlockStats(stats_df, config, field):
87
    binFactor = config.blockBinFactor
88
    divisor = binFactor * 1000
89
    totalScriptCount = len(stats_df.index)
90

91
    blockLabels = []
92
    scriptCounts = []
93
    scriptPercs = []
94

95
    counter = Counter()
96

97
    for index, row in stats_df.iterrows():
98
        value = row[field]
99
        factor = int(value / divisor)
100
        counter[factor] += 1
101

102
    for factor, scriptCount in sorted(counter.items()):
103
        left = factor * binFactor
104
        right = left + binFactor
105
        blockLabel = '{left}K-{right}K'.format(left=left, right=right)
106
        blockLabels.append(blockLabel)
107
        scriptCounts.append(scriptCount)
108
        scriptPerc = round((scriptCount * 100) / totalScriptCount, 1)
109
        scriptPercs.append(scriptPerc)
110

111
    block_df = pd.DataFrame({
112
        field: blockLabels,
113
        'ScriptCount': scriptCounts,
114
        'ScriptPerc': scriptPercs
115
    })
116

117
    return block_df
118

119
def analyzeMaxBlockInstructionStats(stats_df, config):
120
    binFactor = config.blockInstructionBinFactor
121
    divisor = binFactor * 1000
122
    totalScriptCount = len(stats_df.index)
123

124
    blockLabels = []
125
    scriptCounts = []
126
    scriptPercs = []
127

128
    counter = Counter()
129

130
    for index, row in stats_df.iterrows():
131
        value = row['MaxBlockInstructionCount']
132
        factor = int(value / divisor)
133
        counter[factor] += 1
134

135
    for factor, scriptCount in sorted(counter.items()):
136
        left = factor * binFactor
137
        right = left + binFactor
138
        blockLabel = '{left}K-{right}K'.format(left=left, right=right)
139
        blockLabels.append(blockLabel)
140
        scriptCounts.append(scriptCount)
141
        scriptPerc = round((scriptCount * 100) / totalScriptCount, 1)
142
        scriptPercs.append(scriptPerc)
143

144
    block_df = pd.DataFrame({
145
        'MaxBlockInstructionCount': blockLabels,
146
        'ScriptCount': scriptCounts,
147
        'ScriptPerc': scriptPercs
148
    })
149

150
    return block_df
151

152
if __name__ == '__main__':
153
    config = getArgs()
154

155
    stats_df = readStats(config.statsFile)
156

157
    bcode_df = analyzeBytecodeStats(stats_df, config)
158
    print(bcode_df.to_markdown())
159

160
    block_df = analyzeBlockStats(stats_df, config, 'BlockPreOptCount')
161
    print(block_df.to_markdown())
162

163
    block_df = analyzeBlockStats(stats_df, config, 'BlockPostOptCount')
164
    print(block_df.to_markdown())
165

166
    block_df = analyzeMaxBlockInstructionStats(stats_df, config)
167
    print(block_df.to_markdown())
168

169
Product

Resources

Company