debug information for compiled opcodes implemented

This commit is contained in:
Thakee Nathees 2021-02-17 23:57:24 +05:30
parent d7495235aa
commit 869d87256c
9 changed files with 510 additions and 83 deletions

View File

@ -1,16 +1,25 @@
[] Runtime error trace.
[] VM's debug informations.
[] Stack reallocation.
[] Garbage collection.
[] VM's script cache (imported).
[] Map literal.
[] Hex, binary literals and floats like ".5".
[] Relative file import.
[] Compilte core methods.
[] Complete var methods.
[] Complete core functions.
[] Complete builtin operators.
[] Complete opcodes.
[] Union tagging alter in var.
[] Std libs.
// To implement.
[ ] Runtime error trace.
[ ] VM's debug informations.
[*] Compiled opcode dump.
[ ] Stack trace.
[ ] Stack reallocation.
[ ] Garbage collection.
[ ] VM's script cache (imported).
[ ] Map literal.
[ ] Hex, binary literals and floats like ".5".
[ ] Relative file import.
[ ] Compilte core methods.
[ ] Complete var methods.
[ ] Complete core functions.
[ ] Complete builtin operators.
[ ] Complete opcodes.
[ ] Union tagging alter in var.
[ ] Std libs.
// Bugs.
[ ] `function() "do" <body> end` make 'do' optional.

View File

@ -37,37 +37,8 @@
#define MS_PUBLIC
#endif
#define STRINGIFY(x) TOSTRING(x)
#define TOSTRING(x) #x
// The factor by which a buffer will grow when it's capacity reached.
#define GROW_FACTOR 2
// The initial capacity of a buffer.
#define MIN_CAPACITY 8
// Unique number to identify for various cases.
typedef uint32_t ID;
// Nan-Tagging could be disable for debugging/portability purposes.
// To disable define `VAR_NAN_TAGGING 0`, otherwise it defaults to Nan-Tagging.
#ifndef VAR_NAN_TAGGING
#define VAR_NAN_TAGGING 1
#endif
#if VAR_NAN_TAGGING
typedef uint64_t Var;
#else
typedef struct Var Var;
#endif
typedef struct Object Object;
typedef struct String String;
typedef struct List List;
typedef struct Range Range;
typedef struct Script Script;
typedef struct Function Function;
// Set this to dump compiled opcodes of each functions.
#define DEBUG_DUMP_COMPILED_CODE 1
#ifdef DEBUG
@ -89,6 +60,9 @@ typedef struct Function Function;
} \
} while (false)
#define ASSERT_INDEX(index, size) \
ASSERT(index >= 0 && index < size, "Index out of bounds.")
#define UNREACHABLE() \
do { \
fprintf(stderr, "Execution reached an unreachable path\n" \
@ -116,6 +90,15 @@ typedef struct Function Function;
#define TODO ASSERT(false, "TODO")
#define OOPS "Oops a bug!! report plese."
#define STRINGIFY(x) TOSTRING(x)
#define TOSTRING(x) #x
// The factor by which a buffer will grow when it's capacity reached.
#define GROW_FACTOR 2
// The initial capacity of a buffer.
#define MIN_CAPACITY 8
// Allocate object of [type] using the vmRealloc function.
#define ALLOCATE(vm, type) \
((type*)vmRealloc(vm, NULL, 0, sizeof(type)))
@ -133,4 +116,28 @@ typedef struct Function Function;
#define DEALLOCATE(vm, pointer) \
vmRealloc(vm, pointer, 0, 0)
// Nan-Tagging could be disable for debugging/portability purposes.
// To disable define `VAR_NAN_TAGGING 0`, otherwise it defaults to Nan-Tagging.
#ifndef VAR_NAN_TAGGING
#define VAR_NAN_TAGGING 1
#endif
#if VAR_NAN_TAGGING
typedef uint64_t Var;
#else
typedef struct Var Var;
#endif
typedef struct Object Object;
typedef struct String String;
typedef struct List List;
typedef struct Range Range;
typedef struct Script Script;
typedef struct Function Function;
// Unique number to identify for various cases.
typedef uint32_t ID;
#endif //MS_COMMON_H

View File

@ -14,6 +14,10 @@
#include "utils.h"
#include "vm.h"
#if DEBUG_DUMP_COMPILED_CODE
#include "debug.h"
#endif
// The maximum number of variables (or global if compiling top level script)
// to lookup from the compiling context. Also it's limited by it's opcode
// which is using a single byte value to identify the local.
@ -1001,16 +1005,15 @@ static void exprName(Compiler* compiler, bool can_assign) {
NameSearchResult result = compilerSearchName(compiler, name_start, name_len);
if (result.type == NAME_NOT_DEFINED) {
if (can_assign && match(parser, TK_EQ)) {
int index = compilerAddVariable(compiler, name_start, name_len,
name_line);
compileExpression(compiler);
emitStoreVariable(compiler, index, compiler->scope_depth == DEPTH_GLOBAL);
return;
} else {
parseError(parser, "Name \"%.*s\" is not defined.", name_len, name_start);
}
return;
}
switch (result.type) {
@ -1481,6 +1484,10 @@ static int compileFunction(Compiler* compiler, FuncType fn_type) {
emitOpcode(compiler, OP_END);
compilerExitBlock(compiler); // Parameter depth.
#if DEBUG_DUMP_COMPILED_CODE
dumpInstructions(compiler->vm, compiler->func->ptr);
#endif
compiler->func = compiler->func->outer_func;
return fn_index;
@ -1574,7 +1581,7 @@ static void compileWhileStatement(Compiler* compiler) {
compileExpression(compiler); //< Condition.
emitOpcode(compiler, OP_JUMP_IF_NOT);
int whilepatch = emitByte(compiler, 0xffff); //< Will be patched.
int whilepatch = emitShort(compiler, 0xffff); //< Will be patched.
compileBlockBody(compiler, BLOCK_LOOP);
@ -1767,6 +1774,10 @@ Script* compileSource(MSVM* vm, const char* path) {
vm->compiler = NULL;
#if DEBUG_DUMP_COMPILED_CODE
dumpInstructions(vm, script->body);
#endif
if (compiler.parser.has_errors) return NULL;
return script;
}

View File

@ -8,6 +8,7 @@
#include <math.h>
#include <time.h>
#include "var.h"
#include "vm.h"
typedef struct {
@ -26,9 +27,9 @@ typedef struct {
_BuiltinFn builtins[BUILTIN_COUNT];
static void initializeBuiltinFN(MSVM* vm, _BuiltinFn* bfn, const char* name,
int arity, MiniScriptNativeFn ptr) {
int length, int arity, MiniScriptNativeFn ptr) {
bfn->name = name;
bfn->length = (name != NULL) ? (int)strlen(name) : 0;
bfn->length = length;
varInitObject(&bfn->fn._super, vm, OBJ_FUNC);
bfn->fn.name = name;
@ -121,6 +122,11 @@ Function* getBuiltinFunction(int index) {
return &builtins[index].fn;
}
const char* getBuiltinFunctionName(int index) {
ASSERT(index < BUILTIN_COUNT, "Index out of bound.");
return builtins[index].name;
}
#define FN_IS_PRIMITE_TYPE(name, check) \
void coreIs##name(MSVM* vm) { \
RET(VAR_BOOL(check(ARG(1)))); \
@ -215,25 +221,28 @@ void initializeCore(MSVM* vm) {
int i = 0; //< Iterate through builtins.
#define INITALIZE_BUILTIN_FN(name, fn, argc) \
initializeBuiltinFN(vm, &builtins[i++], name, (int)strlen(name), argc, fn);
// Initialize builtin functions.
initializeBuiltinFN(vm, &builtins[i++], "is_null", 1, coreIsNull);
initializeBuiltinFN(vm, &builtins[i++], "is_bool", 1, coreIsBool);
initializeBuiltinFN(vm, &builtins[i++], "is_num", 1, coreIsNum);
INITALIZE_BUILTIN_FN("is_null", coreIsNull, 1);
INITALIZE_BUILTIN_FN("is_bool", coreIsBool, 1);
INITALIZE_BUILTIN_FN("is_num", coreIsNum, 1);
initializeBuiltinFN(vm, &builtins[i++], "is_string", 1, coreIsString);
initializeBuiltinFN(vm, &builtins[i++], "is_list", 1, coreIsList);
initializeBuiltinFN(vm, &builtins[i++], "is_map", 1, coreIsMap);
initializeBuiltinFN(vm, &builtins[i++], "is_range", 1, coreIsRange);
initializeBuiltinFN(vm, &builtins[i++], "is_function", 1, coreIsFunction);
initializeBuiltinFN(vm, &builtins[i++], "is_script", 1, coreIsScript);
initializeBuiltinFN(vm, &builtins[i++], "is_userobj", 1, coreIsUserObj);
INITALIZE_BUILTIN_FN("is_string", coreIsString, 1);
INITALIZE_BUILTIN_FN("is_list", coreIsList, 1);
INITALIZE_BUILTIN_FN("is_map", coreIsMap, 1);
INITALIZE_BUILTIN_FN("is_range", coreIsRange, 1);
INITALIZE_BUILTIN_FN("is_function", coreIsFunction, 1);
INITALIZE_BUILTIN_FN("is_script", coreIsScript, 1);
INITALIZE_BUILTIN_FN("is_userobj", coreIsUserObj, 1);
initializeBuiltinFN(vm, &builtins[i++], "to_string", 1, coreToString);
initializeBuiltinFN(vm, &builtins[i++], "print", -1, corePrint);
initializeBuiltinFN(vm, &builtins[i++], "import", 1, coreImport);
INITALIZE_BUILTIN_FN("to_string", coreToString, 1);
INITALIZE_BUILTIN_FN("print", corePrint, -1);
INITALIZE_BUILTIN_FN("import", coreImport, 1);
// Sentinal to mark the end of the array.
initializeBuiltinFN(vm, &builtins[i], NULL, 0, NULL);
initializeBuiltinFN(vm, &builtins[i], NULL, 0, 0, NULL);
// Make STD scripts.
Script* std; // A temporary pointer to the current std script.
@ -348,7 +357,16 @@ bool varLesser(MSVM* vm, Var v1, Var v2) {
return false;
}
// A convinent convenient macro used in varGetAttrib and varSetAttrib.
#define IS_ATTRIB(name) \
(attrib->length == strlen(name) && strcmp(name, attrib->data) == 0)
#define ERR_NO_ATTRIB() \
msSetRuntimeError(vm, "'%s' objects has no attribute named '%s'", \
varTypeName(on), attrib->data);
Var varGetAttrib(MSVM* vm, Var on, String* attrib) {
if (!IS_OBJ(on)) {
msSetRuntimeError(vm, "%s type is not subscriptable.", varTypeName(on));
return VAR_NULL;
@ -357,7 +375,29 @@ Var varGetAttrib(MSVM* vm, Var on, String* attrib) {
Object* obj = (Object*)AS_OBJ(on);
switch (obj->type) {
case OBJ_STRING:
{
if (IS_ATTRIB("length")) {
size_t length = ((String*)obj)->length;
return VAR_NUM((double)length);
} else {
ERR_NO_ATTRIB();
return VAR_NULL;
}
UNREACHABLE();
}
case OBJ_LIST:
{
if (IS_ATTRIB("length")) {
size_t length = ((List*)obj)->elements.count;
return VAR_NUM((double)length);
} else {
ERR_NO_ATTRIB();
return VAR_NULL;
}
UNREACHABLE();
}
case OBJ_MAP:
case OBJ_RANGE:
TODO;
@ -388,8 +428,73 @@ Var varGetAttrib(MSVM* vm, Var on, String* attrib) {
return VAR_NULL;
}
void varSetAttrib(MSVM* vm, Var on, String* name, Var value) {
TODO;
void varSetAttrib(MSVM* vm, Var on, String* attrib, Var value) {
#define ATTRIB_IMMUTABLE(prop) \
do { \
if (IS_ATTRIB(prop)) { \
msSetRuntimeError(vm, "'%s' attribute is immutable.", prop); \
return; \
} \
} while (false)
if (!IS_OBJ(on)) {
msSetRuntimeError(vm, "%s type is not subscriptable.", varTypeName(on));
return;
}
Object* obj = (Object*)AS_OBJ(on);
switch (obj->type) {
case OBJ_STRING:
ATTRIB_IMMUTABLE("length");
ERR_NO_ATTRIB();
return;
case OBJ_LIST:
ATTRIB_IMMUTABLE("length");
ERR_NO_ATTRIB();
return;
case OBJ_MAP:
TODO;
ERR_NO_ATTRIB();
return;
case OBJ_RANGE:
ERR_NO_ATTRIB();
return;
case OBJ_SCRIPT: {
Script* scr = (Script*)obj;
// TODO: check globals HERE.
// Check function.
int index = nameTableFind(&scr->function_names, attrib->data,
attrib->length);
if (index != -1) {
ASSERT_INDEX(index, scr->functions.count);
ATTRIB_IMMUTABLE(scr->functions.data[index]->name);
return;
}
ERR_NO_ATTRIB();
return;
}
case OBJ_FUNC:
ERR_NO_ATTRIB();
return;
case OBJ_USER:
ERR_NO_ATTRIB();
return;
default:
UNREACHABLE();
}
UNREACHABLE();
}
Var varGetSubscript(MSVM* vm, Var on, Var key) {
@ -400,7 +505,19 @@ Var varGetSubscript(MSVM* vm, Var on, Var key) {
Object* obj = AS_OBJ(on);
switch (obj->type) {
case OBJ_STRING: TODO;
case OBJ_STRING:
{
int32_t index;
String* str = ((String*)obj);
if (!validateIngeger(vm, key, &index, "List index")) {
return VAR_NULL;
}
if (!validateIndex(vm, index, str->length, "String")) {
return VAR_NULL;
}
String* c = newString(vm, str->data + index, 1);
return VAR_OBJ(c);
}
case OBJ_LIST:
{
@ -496,9 +613,15 @@ bool varIterate(MSVM* vm, Var seq, Var* iterator, Var* value) {
switch (obj->type) {
case OBJ_STRING: {
TODO; // Need to consider utf8.
TODO; // Return string[index].
// TODO: // Need to consider utf8.
String* str = ((String*)obj);
if (iter < 0 || iter >= str->length) {
return false; //< Stop iteration.
}
// TODO: Or I could add char as a type for efficiency.
*value = VAR_OBJ(newString(vm, str->data + iter, 1));
*iterator = VAR_NUM((double)iter + 1);
return true;
}
case OBJ_LIST: {

View File

@ -15,8 +15,12 @@ void initializeCore(MSVM* vm);
// if not found returns -1.
int findBuiltinFunction(const char* name, int length);
// Returns the builtin function at index [index].
Function* getBuiltinFunction(int index);
// Returns the builtin function's name at index [index].
const char* getBuiltinFunctionName(int index);
// Operators //////////////////////////////////////////////////////////////////
Var varAdd(MSVM* vm, Var v1, Var v2);

250
src/debug.c Normal file
View File

@ -0,0 +1,250 @@
/*
* Copyright (c) 2021 Thakee Nathees
* Licensed under: MIT License
*/
#include "core.h"
#include "debug.h"
#include "vm.h"
static const char* op_name[] = {
#define OPCODE(name, params, stack) #name,
#include "opcodes.h"
#undef OPCODE
NULL,
};
static void _dumpValue(MSVM* vm, Var value, bool recursive) {
if (IS_NULL(value)) {
printf("null");
return;
}
if (IS_BOOL(value)) {
printf((AS_BOOL(value)) ? "true" : "false");
return;
}
if (IS_NUM(value)) {
printf("%.14g", AS_NUM(value));
return;
}
ASSERT(IS_OBJ(value), OOPS);
Object* obj = AS_OBJ(value);
switch (obj->type) {
case OBJ_STRING:
printf("\"%s\"", ((String*)obj)->data);
return;
case OBJ_LIST:
{
List* list = ((List*)obj);
if (recursive) {
printf("[...]");
} else {
printf("[");
for (int i = 0; i < list->elements.count; i++) {
if (i != 0) printf(", ");
_dumpValue(vm, list->elements.data[i], true);
}
printf("]");
}
return;
}
case OBJ_MAP:
TODO;
return;
case OBJ_RANGE:
{
Range* range = ((Range*)obj);
printf("%.2g..%.2g", range->from, range->to);
}
case OBJ_SCRIPT:
printf("[Script:%p]", obj);
case OBJ_FUNC:
printf("[Fn:%p]", obj);
case OBJ_USER:
printf("[UserObj:%p]", obj);
}
}
void dumpValue(MSVM* vm, Var value) {
_dumpValue(vm, value, false);
}
void dumpInstructions(MSVM* vm, Function* func) {
int i = 0;
uint8_t* opcodes = func->fn->opcodes.data;
int* lines = func->fn->oplines.data;
int line = 1, last_line = 0;
printf("Instruction Dump of function '%s'\n", func->name);
#define READ_BYTE() (opcodes[i++])
#define READ_SHORT() (i += 2, opcodes[i - 2] << 8 | opcodes[i-1])
#define NO_ARGS() printf("\n")
#define SHORT_ARG() printf("%5d\n", READ_SHORT())
#define INDENTATION " "
while (i < func->fn->opcodes.count) {
ASSERT_INDEX(i, func->fn->opcodes.count);
// Print the line number.
line = lines[i];
if (line != last_line) {
printf(INDENTATION "%4d:", line);
last_line = line;
} else {
printf(INDENTATION " ");
}
printf(INDENTATION "%4d %-16s", i, op_name[opcodes[i]]);
Opcode op = (Opcode)func->fn->opcodes.data[i++];
switch (op) {
case OP_CONSTANT:
{
int index = READ_SHORT();
printf("%5d ", index);
ASSERT_INDEX(index, func->owner->literals.count);
Var value = func->owner->literals.data[index];
dumpValue(vm, value);
printf("\n");
break;
}
case OP_PUSH_NULL:
case OP_PUSH_SELF:
case OP_PUSH_TRUE:
case OP_PUSH_FALSE:
NO_ARGS();
break;
case OP_PUSH_LIST:
SHORT_ARG();
break;
case OP_LIST_APPEND: NO_ARGS(); break;
case OP_PUSH_LOCAL_0:
case OP_PUSH_LOCAL_1:
case OP_PUSH_LOCAL_2:
case OP_PUSH_LOCAL_3:
case OP_PUSH_LOCAL_4:
case OP_PUSH_LOCAL_5:
case OP_PUSH_LOCAL_6:
case OP_PUSH_LOCAL_7:
case OP_PUSH_LOCAL_8:
NO_ARGS();
break;
case OP_PUSH_LOCAL_N:
SHORT_ARG();
break;
case OP_STORE_LOCAL_0:
case OP_STORE_LOCAL_1:
case OP_STORE_LOCAL_2:
case OP_STORE_LOCAL_3:
case OP_STORE_LOCAL_4:
case OP_STORE_LOCAL_5:
case OP_STORE_LOCAL_6:
case OP_STORE_LOCAL_7:
case OP_STORE_LOCAL_8:
NO_ARGS();
break;
case OP_STORE_LOCAL_N:
SHORT_ARG();
break;
case OP_PUSH_GLOBAL:
case OP_STORE_GLOBAL:
case OP_PUSH_FN:
SHORT_ARG();
break;
case OP_PUSH_BUILTIN_FN:
{
int index = READ_SHORT();
printf("%5d [Fn:%s]\n", index, getBuiltinFunctionName(index));
break;
}
case OP_POP:
NO_ARGS();
break;
case OP_CALL:
printf("%5d (argc)\n", READ_SHORT());
break;
case OP_ITER:
case OP_JUMP:
case OP_JUMP_IF:
case OP_JUMP_IF_NOT:
{
int offset = READ_SHORT();
printf("%5d (ip:%d)\n", offset, i + offset);
break;
}
case OP_LOOP:
{
int offset = READ_SHORT();
printf("%5d (ip:%d)\n", -offset, i - offset);
break;
}
case OP_RETURN: NO_ARGS(); break;
case OP_GET_ATTRIB:
case OP_GET_ATTRIB_AOP:
case OP_SET_ATTRIB:
SHORT_ARG();
break;
case OP_GET_SUBSCRIPT:
case OP_GET_SUBSCRIPT_AOP:
case OP_SET_SUBSCRIPT:
NO_ARGS();
break;
case OP_NEGATIVE:
case OP_NOT:
case OP_BIT_NOT:
case OP_ADD:
case OP_SUBTRACT:
case OP_MULTIPLY:
case OP_DIVIDE:
case OP_MOD:
case OP_BIT_AND:
case OP_BIT_OR:
case OP_BIT_XOR:
case OP_BIT_LSHIFT:
case OP_BIT_RSHIFT:
case OP_AND:
case OP_OR:
case OP_EQEQ:
case OP_NOTEQ:
case OP_LT:
case OP_LTEQ:
case OP_GT:
case OP_GTEQ:
case OP_RANGE:
case OP_IN:
case OP_END:
NO_ARGS();
break;
default:
UNREACHABLE();
break;
}
}
}

18
src/debug.h Normal file
View File

@ -0,0 +1,18 @@
/*
* Copyright (c) 2021 Thakee Nathees
* Licensed under: MIT License
*/
#ifndef DEBUG_H
#define DEBUG_H
#include "common.h"
#include "miniscript.h"
// Dump the value of the [value] without a new line at the end.
void dumpValue(MSVM* vm, Var value);
// Dump opcodes of the given function.
void dumpInstructions(MSVM* vm, Function* func);
#endif // DEBUG_H

View File

@ -90,19 +90,19 @@ OPCODE(POP, 0, -1)
// done the stack top should be stored otherwise it'll be disregarded. The
// function should set the 0 th argment to return value. Locals at 0 to 8
// marked explicitly since it's performance criticle.
// params: CALL_0..8 -> 2 bytes index. _N -> 2 bytes index and 2 bytes count.
// params: n bytes argc.
// TODO: may be later.
//OPCODE(CALL_0, 2, 0) //< Push null call null will be the return value.
//OPCODE(CALL_1, 2, -1) //< Push null and arg1. arg1 will be popped.
//OPCODE(CALL_2, 2, -2) //< And so on.
//OPCODE(CALL_3, 2, -3)
//OPCODE(CALL_4, 2, -4)
//OPCODE(CALL_5, 2, -5)
//OPCODE(CALL_6, 2, -6)
//OPCODE(CALL_7, 2, -7)
//OPCODE(CALL_8, 2, -8)
OPCODE(CALL, 4, -0) //< Will calculated at compile time.
//OPCODE(CALL_0, 0, 0) //< Push null call null will be the return value.
//OPCODE(CALL_1, 0, -1) //< Push null and arg1. arg1 will be popped.
//OPCODE(CALL_2, 0, -2) //< And so on.
//OPCODE(CALL_3, 0, -3)
//OPCODE(CALL_4, 0, -4)
//OPCODE(CALL_5, 0, -5)
//OPCODE(CALL_6, 0, -6)
//OPCODE(CALL_7, 0, -7)
//OPCODE(CALL_8, 0, -8)
OPCODE(CALL, 2, -0) //< Will calculated at compile time.
// The stack top will be iteration value, next one is iterator (integer) and
// next would be the container. It'll update those values but not push or pop

View File

@ -6,6 +6,7 @@
#include "vm.h"
#include "core.h"
#include "debug.h"
#include "utils.h"
#define HAS_ERROR() (vm->error != NULL)
@ -229,8 +230,12 @@ MSInterpretResult vmRunScript(MSVM* vm, Script* _script) {
SWITCH(instruction) {
OPCODE(CONSTANT):
PUSH(script->literals.data[READ_SHORT()]);
{
int index = READ_SHORT();
ASSERT_INDEX(index, script->literals.count);
PUSH(script->literals.data[index]);
DISPATCH();
}
OPCODE(PUSH_NULL):
PUSH(VAR_NULL);