luau/tools/heuristicstat.py

#!/usr/bin/python3
# This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details

import argparse
import json
from collections import Counter
import pandas as pd
## needed for 'to_markdown' method for pandas data frame
import tabulate


def getArgs():
    parser = argparse.ArgumentParser(description='Analyze compiler statistics')
    parser.add_argument('--bytecode-bin-factor', dest='bytecodeBinFactor',default=10,help='Bytecode bin size as a multiple of 1000 (10 by default)')
    parser.add_argument('--block-bin-factor', dest='blockBinFactor',default=1,help='Block bin size as a multiple of 1000 (1 by default)')
    parser.add_argument('--block-instruction-bin-factor', dest='blockInstructionBinFactor',default=1,help='Block bin size as a multiple of 1000 (1 by default)')
    parser.add_argument('statsFile', help='stats.json file generated by running luau-compile')
    args = parser.parse_args()
    return args

def readStats(statsFile):
    with open(statsFile) as f:
        stats = json.load(f)

        scripts = []
        functionCounts = []
        bytecodeLengths = []
        blockPreOptCounts = []
        blockPostOptCounts = []
        maxBlockInstructionCounts = []

        for path, fileStat in stats.items():
            scripts.append(path)
            functionCounts.append(fileStat['lowerStats']['totalFunctions'] - fileStat['lowerStats']['skippedFunctions'])
            bytecodeLengths.append(fileStat['bytecode'])
            blockPreOptCounts.append(fileStat['lowerStats']['blocksPreOpt'])
            blockPostOptCounts.append(fileStat['lowerStats']['blocksPostOpt'])
            maxBlockInstructionCounts.append(fileStat['lowerStats']['maxBlockInstructions'])

        stats_df = pd.DataFrame({
            'Script': scripts,
            'FunctionCount': functionCounts,
            'BytecodeLength': bytecodeLengths,
            'BlockPreOptCount': blockPreOptCounts,
            'BlockPostOptCount': blockPostOptCounts,
            'MaxBlockInstructionCount': maxBlockInstructionCounts
        })

        return stats_df


def analyzeBytecodeStats(stats_df, config):
    binFactor = config.bytecodeBinFactor
    divisor = binFactor * 1000
    totalScriptCount = len(stats_df.index)

    lengthLabels = []
    scriptCounts = []
    scriptPercs = []

    counter = Counter()

    for index, row in stats_df.iterrows():
        value = row['BytecodeLength']
        factor = int(value / divisor)
        counter[factor] += 1

    for factor, scriptCount in sorted(counter.items()):
        left = factor * binFactor
        right = left + binFactor
        lengthLabel = '{left}K-{right}K'.format(left=left, right=right)
        lengthLabels.append(lengthLabel)
        scriptCounts.append(scriptCount)
        scriptPerc = round(scriptCount * 100 / totalScriptCount, 1)
        scriptPercs.append(scriptPerc)

    bcode_df = pd.DataFrame({
        'BytecodeLength': lengthLabels,
        'ScriptCount': scriptCounts,
        'ScriptPerc': scriptPercs
    })

    return bcode_df


def analyzeBlockStats(stats_df, config, field):
    binFactor = config.blockBinFactor
    divisor = binFactor * 1000
    totalScriptCount = len(stats_df.index)

    blockLabels = []
    scriptCounts = []
    scriptPercs = []

    counter = Counter()

    for index, row in stats_df.iterrows():
        value = row[field]
        factor = int(value / divisor)
        counter[factor] += 1

    for factor, scriptCount in sorted(counter.items()):
        left = factor * binFactor
        right = left + binFactor
        blockLabel = '{left}K-{right}K'.format(left=left, right=right)
        blockLabels.append(blockLabel)
        scriptCounts.append(scriptCount)
        scriptPerc = round((scriptCount * 100) / totalScriptCount, 1)
        scriptPercs.append(scriptPerc)

    block_df = pd.DataFrame({
        field: blockLabels,
        'ScriptCount': scriptCounts,
        'ScriptPerc': scriptPercs
    })

    return block_df

def analyzeMaxBlockInstructionStats(stats_df, config):
    binFactor = config.blockInstructionBinFactor
    divisor = binFactor * 1000
    totalScriptCount = len(stats_df.index)

    blockLabels = []
    scriptCounts = []
    scriptPercs = []

    counter = Counter()

    for index, row in stats_df.iterrows():
        value = row['MaxBlockInstructionCount']
        factor = int(value / divisor)
        counter[factor] += 1

    for factor, scriptCount in sorted(counter.items()):
        left = factor * binFactor
        right = left + binFactor
        blockLabel = '{left}K-{right}K'.format(left=left, right=right)
        blockLabels.append(blockLabel)
        scriptCounts.append(scriptCount)
        scriptPerc = round((scriptCount * 100) / totalScriptCount, 1)
        scriptPercs.append(scriptPerc)

    block_df = pd.DataFrame({
        'MaxBlockInstructionCount': blockLabels,
        'ScriptCount': scriptCounts,
        'ScriptPerc': scriptPercs
    })

    return block_df

if __name__ == '__main__':
    config = getArgs()

    stats_df = readStats(config.statsFile)

    bcode_df = analyzeBytecodeStats(stats_df, config)
    print(bcode_df.to_markdown())

    block_df = analyzeBlockStats(stats_df, config, 'BlockPreOptCount')
    print(block_df.to_markdown())

    block_df = analyzeBlockStats(stats_df, config, 'BlockPostOptCount')
    print(block_df.to_markdown())

    block_df = analyzeMaxBlockInstructionStats(stats_df, config)
    print(block_df.to_markdown())
Sync to upstream/release/600 (#1076) ### What's Changed - Improve readability of unions and intersections by limiting the number of elements of those types that can be presented on a single line (gated under `FFlag::LuauToStringSimpleCompositeTypesSingleLine`) - Adds a new option to the compiler `--record-stats` to record and output compilation statistics - `if...then...else` expressions are now optimized into `AND/OR` form when possible. ### VM - Add a new `buffer` type to Luau based on the [buffer RFC](https://github.com/Roblox/luau/pull/739) and additional C API functions to work with it; this release does not include the library. - Internal C API to work with string buffers has been updated to align with Lua version more closely ### Native Codegen - Added support for new X64 instruction (rev) and new A64 instruction (bswap) in the assembler - Simplified the way numerical loop condition is translated to IR ### New Type Solver - Operator inference now handled by type families - Created a new system called `Type Paths` to explain why subtyping tests fail in order to improve the quality of error messages. - Systematic changes to implement Data Flow analysis in the new solver (`Breadcrumb` removed and replaced with `RefinementKey`) --- Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vighnesh Vijay <vvijay@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Arseny Kapoulkine <arseny.kapoulkine@gmail.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> 2023-10-21 09:10:30 +08:00			`#!/usr/bin/python3`
			`# This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details`

			`import argparse`
			`import json`
			`from collections import Counter`
			`import pandas as pd`
			`## needed for 'to_markdown' method for pandas data frame`
			`import tabulate`


			`def getArgs():`
			`parser = argparse.ArgumentParser(description='Analyze compiler statistics')`
			`parser.add_argument('--bytecode-bin-factor', dest='bytecodeBinFactor',default=10,help='Bytecode bin size as a multiple of 1000 (10 by default)')`
			`parser.add_argument('--block-bin-factor', dest='blockBinFactor',default=1,help='Block bin size as a multiple of 1000 (1 by default)')`
			`parser.add_argument('--block-instruction-bin-factor', dest='blockInstructionBinFactor',default=1,help='Block bin size as a multiple of 1000 (1 by default)')`
			`parser.add_argument('statsFile', help='stats.json file generated by running luau-compile')`
			`args = parser.parse_args()`
			`return args`

			`def readStats(statsFile):`
			`with open(statsFile) as f:`
			`stats = json.load(f)`

			`scripts = []`
			`functionCounts = []`
			`bytecodeLengths = []`
			`blockPreOptCounts = []`
			`blockPostOptCounts = []`
			`maxBlockInstructionCounts = []`

			`for path, fileStat in stats.items():`
			`scripts.append(path)`
			`functionCounts.append(fileStat['lowerStats']['totalFunctions'] - fileStat['lowerStats']['skippedFunctions'])`
			`bytecodeLengths.append(fileStat['bytecode'])`
			`blockPreOptCounts.append(fileStat['lowerStats']['blocksPreOpt'])`
			`blockPostOptCounts.append(fileStat['lowerStats']['blocksPostOpt'])`
			`maxBlockInstructionCounts.append(fileStat['lowerStats']['maxBlockInstructions'])`

			`stats_df = pd.DataFrame({`
			`'Script': scripts,`
			`'FunctionCount': functionCounts,`
			`'BytecodeLength': bytecodeLengths,`
			`'BlockPreOptCount': blockPreOptCounts,`
			`'BlockPostOptCount': blockPostOptCounts,`
			`'MaxBlockInstructionCount': maxBlockInstructionCounts`
			`})`

			`return stats_df`


			`def analyzeBytecodeStats(stats_df, config):`
			`binFactor = config.bytecodeBinFactor`
			`divisor = binFactor * 1000`
			`totalScriptCount = len(stats_df.index)`

			`lengthLabels = []`
			`scriptCounts = []`
			`scriptPercs = []`

			`counter = Counter()`

			`for index, row in stats_df.iterrows():`
			`value = row['BytecodeLength']`
			`factor = int(value / divisor)`
			`counter[factor] += 1`

			`for factor, scriptCount in sorted(counter.items()):`
			`left = factor * binFactor`
			`right = left + binFactor`
			`lengthLabel = '{left}K-{right}K'.format(left=left, right=right)`
			`lengthLabels.append(lengthLabel)`
			`scriptCounts.append(scriptCount)`
			`scriptPerc = round(scriptCount * 100 / totalScriptCount, 1)`
			`scriptPercs.append(scriptPerc)`

			`bcode_df = pd.DataFrame({`
			`'BytecodeLength': lengthLabels,`
			`'ScriptCount': scriptCounts,`
			`'ScriptPerc': scriptPercs`
			`})`

			`return bcode_df`


			`def analyzeBlockStats(stats_df, config, field):`
			`binFactor = config.blockBinFactor`
			`divisor = binFactor * 1000`
			`totalScriptCount = len(stats_df.index)`

			`blockLabels = []`
			`scriptCounts = []`
			`scriptPercs = []`

			`counter = Counter()`

			`for index, row in stats_df.iterrows():`
			`value = row[field]`
			`factor = int(value / divisor)`
			`counter[factor] += 1`

			`for factor, scriptCount in sorted(counter.items()):`
			`left = factor * binFactor`
			`right = left + binFactor`
			`blockLabel = '{left}K-{right}K'.format(left=left, right=right)`
			`blockLabels.append(blockLabel)`
			`scriptCounts.append(scriptCount)`
			`scriptPerc = round((scriptCount * 100) / totalScriptCount, 1)`
			`scriptPercs.append(scriptPerc)`

			`block_df = pd.DataFrame({`
			`field: blockLabels,`
			`'ScriptCount': scriptCounts,`
			`'ScriptPerc': scriptPercs`
			`})`

			`return block_df`

			`def analyzeMaxBlockInstructionStats(stats_df, config):`
			`binFactor = config.blockInstructionBinFactor`
			`divisor = binFactor * 1000`
			`totalScriptCount = len(stats_df.index)`

			`blockLabels = []`
			`scriptCounts = []`
			`scriptPercs = []`

			`counter = Counter()`

			`for index, row in stats_df.iterrows():`
			`value = row['MaxBlockInstructionCount']`
			`factor = int(value / divisor)`
			`counter[factor] += 1`

			`for factor, scriptCount in sorted(counter.items()):`
			`left = factor * binFactor`
			`right = left + binFactor`
			`blockLabel = '{left}K-{right}K'.format(left=left, right=right)`
			`blockLabels.append(blockLabel)`
			`scriptCounts.append(scriptCount)`
			`scriptPerc = round((scriptCount * 100) / totalScriptCount, 1)`
			`scriptPercs.append(scriptPerc)`

			`block_df = pd.DataFrame({`
			`'MaxBlockInstructionCount': blockLabels,`
			`'ScriptCount': scriptCounts,`
			`'ScriptPerc': scriptPercs`
			`})`

			`return block_df`

			`if __name__ == '__main__':`
			`config = getArgs()`

			`stats_df = readStats(config.statsFile)`

			`bcode_df = analyzeBytecodeStats(stats_df, config)`
			`print(bcode_df.to_markdown())`

			`block_df = analyzeBlockStats(stats_df, config, 'BlockPreOptCount')`
			`print(block_df.to_markdown())`

			`block_df = analyzeBlockStats(stats_df, config, 'BlockPostOptCount')`
			`print(block_df.to_markdown())`

			`block_df = analyzeMaxBlockInstructionStats(stats_df, config)`
			`print(block_df.to_markdown())`