luau/tools/heuristicstat.py

168 lines
5.4 KiB
Python
Raw Normal View History

Sync to upstream/release/600 (#1076) ### What's Changed - Improve readability of unions and intersections by limiting the number of elements of those types that can be presented on a single line (gated under `FFlag::LuauToStringSimpleCompositeTypesSingleLine`) - Adds a new option to the compiler `--record-stats` to record and output compilation statistics - `if...then...else` expressions are now optimized into `AND/OR` form when possible. ### VM - Add a new `buffer` type to Luau based on the [buffer RFC](https://github.com/Roblox/luau/pull/739) and additional C API functions to work with it; this release does not include the library. - Internal C API to work with string buffers has been updated to align with Lua version more closely ### Native Codegen - Added support for new X64 instruction (rev) and new A64 instruction (bswap) in the assembler - Simplified the way numerical loop condition is translated to IR ### New Type Solver - Operator inference now handled by type families - Created a new system called `Type Paths` to explain why subtyping tests fail in order to improve the quality of error messages. - Systematic changes to implement Data Flow analysis in the new solver (`Breadcrumb` removed and replaced with `RefinementKey`) --- Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vighnesh Vijay <vvijay@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com>
2023-10-21 09:10:30 +08:00
#!/usr/bin/python3
# This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
import argparse
import json
from collections import Counter
import pandas as pd
## needed for 'to_markdown' method for pandas data frame
import tabulate
def getArgs():
parser = argparse.ArgumentParser(description='Analyze compiler statistics')
parser.add_argument('--bytecode-bin-factor', dest='bytecodeBinFactor',default=10,help='Bytecode bin size as a multiple of 1000 (10 by default)')
parser.add_argument('--block-bin-factor', dest='blockBinFactor',default=1,help='Block bin size as a multiple of 1000 (1 by default)')
parser.add_argument('--block-instruction-bin-factor', dest='blockInstructionBinFactor',default=1,help='Block bin size as a multiple of 1000 (1 by default)')
parser.add_argument('statsFile', help='stats.json file generated by running luau-compile')
args = parser.parse_args()
return args
def readStats(statsFile):
with open(statsFile) as f:
stats = json.load(f)
scripts = []
functionCounts = []
bytecodeLengths = []
blockPreOptCounts = []
blockPostOptCounts = []
maxBlockInstructionCounts = []
for path, fileStat in stats.items():
scripts.append(path)
functionCounts.append(fileStat['lowerStats']['totalFunctions'] - fileStat['lowerStats']['skippedFunctions'])
bytecodeLengths.append(fileStat['bytecode'])
blockPreOptCounts.append(fileStat['lowerStats']['blocksPreOpt'])
blockPostOptCounts.append(fileStat['lowerStats']['blocksPostOpt'])
maxBlockInstructionCounts.append(fileStat['lowerStats']['maxBlockInstructions'])
stats_df = pd.DataFrame({
'Script': scripts,
'FunctionCount': functionCounts,
'BytecodeLength': bytecodeLengths,
'BlockPreOptCount': blockPreOptCounts,
'BlockPostOptCount': blockPostOptCounts,
'MaxBlockInstructionCount': maxBlockInstructionCounts
})
return stats_df
def analyzeBytecodeStats(stats_df, config):
binFactor = config.bytecodeBinFactor
divisor = binFactor * 1000
totalScriptCount = len(stats_df.index)
lengthLabels = []
scriptCounts = []
scriptPercs = []
counter = Counter()
for index, row in stats_df.iterrows():
value = row['BytecodeLength']
factor = int(value / divisor)
counter[factor] += 1
for factor, scriptCount in sorted(counter.items()):
left = factor * binFactor
right = left + binFactor
lengthLabel = '{left}K-{right}K'.format(left=left, right=right)
lengthLabels.append(lengthLabel)
scriptCounts.append(scriptCount)
scriptPerc = round(scriptCount * 100 / totalScriptCount, 1)
scriptPercs.append(scriptPerc)
bcode_df = pd.DataFrame({
'BytecodeLength': lengthLabels,
'ScriptCount': scriptCounts,
'ScriptPerc': scriptPercs
})
return bcode_df
def analyzeBlockStats(stats_df, config, field):
binFactor = config.blockBinFactor
divisor = binFactor * 1000
totalScriptCount = len(stats_df.index)
blockLabels = []
scriptCounts = []
scriptPercs = []
counter = Counter()
for index, row in stats_df.iterrows():
value = row[field]
factor = int(value / divisor)
counter[factor] += 1
for factor, scriptCount in sorted(counter.items()):
left = factor * binFactor
right = left + binFactor
blockLabel = '{left}K-{right}K'.format(left=left, right=right)
blockLabels.append(blockLabel)
scriptCounts.append(scriptCount)
scriptPerc = round((scriptCount * 100) / totalScriptCount, 1)
scriptPercs.append(scriptPerc)
block_df = pd.DataFrame({
field: blockLabels,
'ScriptCount': scriptCounts,
'ScriptPerc': scriptPercs
})
return block_df
def analyzeMaxBlockInstructionStats(stats_df, config):
binFactor = config.blockInstructionBinFactor
divisor = binFactor * 1000
totalScriptCount = len(stats_df.index)
blockLabels = []
scriptCounts = []
scriptPercs = []
counter = Counter()
for index, row in stats_df.iterrows():
value = row['MaxBlockInstructionCount']
factor = int(value / divisor)
counter[factor] += 1
for factor, scriptCount in sorted(counter.items()):
left = factor * binFactor
right = left + binFactor
blockLabel = '{left}K-{right}K'.format(left=left, right=right)
blockLabels.append(blockLabel)
scriptCounts.append(scriptCount)
scriptPerc = round((scriptCount * 100) / totalScriptCount, 1)
scriptPercs.append(scriptPerc)
block_df = pd.DataFrame({
'MaxBlockInstructionCount': blockLabels,
'ScriptCount': scriptCounts,
'ScriptPerc': scriptPercs
})
return block_df
if __name__ == '__main__':
config = getArgs()
stats_df = readStats(config.statsFile)
bcode_df = analyzeBytecodeStats(stats_df, config)
print(bcode_df.to_markdown())
block_df = analyzeBlockStats(stats_df, config, 'BlockPreOptCount')
print(block_df.to_markdown())
block_df = analyzeBlockStats(stats_df, config, 'BlockPostOptCount')
print(block_df.to_markdown())
block_df = analyzeMaxBlockInstructionStats(stats_df, config)
print(block_df.to_markdown())