diff --git a/TODO.txt b/TODO.txt index 3f73228..c536dcc 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,16 +1,25 @@ -[] Runtime error trace. -[] VM's debug informations. -[] Stack reallocation. -[] Garbage collection. -[] VM's script cache (imported). -[] Map literal. -[] Hex, binary literals and floats like ".5". -[] Relative file import. -[] Compilte core methods. -[] Complete var methods. -[] Complete core functions. -[] Complete builtin operators. -[] Complete opcodes. -[] Union tagging alter in var. -[] Std libs. +// To implement. + +[ ] Runtime error trace. +[ ] VM's debug informations. + [*] Compiled opcode dump. + [ ] Stack trace. +[ ] Stack reallocation. +[ ] Garbage collection. +[ ] VM's script cache (imported). +[ ] Map literal. +[ ] Hex, binary literals and floats like ".5". +[ ] Relative file import. +[ ] Compilte core methods. +[ ] Complete var methods. +[ ] Complete core functions. +[ ] Complete builtin operators. +[ ] Complete opcodes. +[ ] Union tagging alter in var. +[ ] Std libs. + + +// Bugs. + +[ ] `function() "do" end` make 'do' optional. diff --git a/src/common.h b/src/common.h index 6364c01..8f5277b 100644 --- a/src/common.h +++ b/src/common.h @@ -37,37 +37,8 @@ #define MS_PUBLIC #endif -#define STRINGIFY(x) TOSTRING(x) -#define TOSTRING(x) #x - -// The factor by which a buffer will grow when it's capacity reached. -#define GROW_FACTOR 2 - -// The initial capacity of a buffer. -#define MIN_CAPACITY 8 - -// Unique number to identify for various cases. -typedef uint32_t ID; - -// Nan-Tagging could be disable for debugging/portability purposes. -// To disable define `VAR_NAN_TAGGING 0`, otherwise it defaults to Nan-Tagging. -#ifndef VAR_NAN_TAGGING - #define VAR_NAN_TAGGING 1 -#endif - -#if VAR_NAN_TAGGING -typedef uint64_t Var; -#else -typedef struct Var Var; -#endif - -typedef struct Object Object; -typedef struct String String; -typedef struct List List; -typedef struct Range Range; - -typedef struct Script Script; -typedef struct Function Function; +// Set this to dump compiled opcodes of each functions. +#define DEBUG_DUMP_COMPILED_CODE 1 #ifdef DEBUG @@ -89,6 +60,9 @@ typedef struct Function Function; } \ } while (false) +#define ASSERT_INDEX(index, size) \ + ASSERT(index >= 0 && index < size, "Index out of bounds.") + #define UNREACHABLE() \ do { \ fprintf(stderr, "Execution reached an unreachable path\n" \ @@ -116,6 +90,15 @@ typedef struct Function Function; #define TODO ASSERT(false, "TODO") #define OOPS "Oops a bug!! report plese." +#define STRINGIFY(x) TOSTRING(x) +#define TOSTRING(x) #x + +// The factor by which a buffer will grow when it's capacity reached. +#define GROW_FACTOR 2 + +// The initial capacity of a buffer. +#define MIN_CAPACITY 8 + // Allocate object of [type] using the vmRealloc function. #define ALLOCATE(vm, type) \ ((type*)vmRealloc(vm, NULL, 0, sizeof(type))) @@ -133,4 +116,28 @@ typedef struct Function Function; #define DEALLOCATE(vm, pointer) \ vmRealloc(vm, pointer, 0, 0) + +// Nan-Tagging could be disable for debugging/portability purposes. +// To disable define `VAR_NAN_TAGGING 0`, otherwise it defaults to Nan-Tagging. +#ifndef VAR_NAN_TAGGING + #define VAR_NAN_TAGGING 1 +#endif + +#if VAR_NAN_TAGGING +typedef uint64_t Var; +#else +typedef struct Var Var; +#endif + +typedef struct Object Object; +typedef struct String String; +typedef struct List List; +typedef struct Range Range; + +typedef struct Script Script; +typedef struct Function Function; + +// Unique number to identify for various cases. +typedef uint32_t ID; + #endif //MS_COMMON_H diff --git a/src/compiler.c b/src/compiler.c index 5a82201..004eaf0 100644 --- a/src/compiler.c +++ b/src/compiler.c @@ -14,6 +14,10 @@ #include "utils.h" #include "vm.h" +#if DEBUG_DUMP_COMPILED_CODE + #include "debug.h" +#endif + // The maximum number of variables (or global if compiling top level script) // to lookup from the compiling context. Also it's limited by it's opcode // which is using a single byte value to identify the local. @@ -1001,16 +1005,15 @@ static void exprName(Compiler* compiler, bool can_assign) { NameSearchResult result = compilerSearchName(compiler, name_start, name_len); if (result.type == NAME_NOT_DEFINED) { - if (can_assign && match(parser, TK_EQ)) { int index = compilerAddVariable(compiler, name_start, name_len, name_line); compileExpression(compiler); emitStoreVariable(compiler, index, compiler->scope_depth == DEPTH_GLOBAL); - return; } else { parseError(parser, "Name \"%.*s\" is not defined.", name_len, name_start); } + return; } switch (result.type) { @@ -1481,6 +1484,10 @@ static int compileFunction(Compiler* compiler, FuncType fn_type) { emitOpcode(compiler, OP_END); compilerExitBlock(compiler); // Parameter depth. + +#if DEBUG_DUMP_COMPILED_CODE + dumpInstructions(compiler->vm, compiler->func->ptr); +#endif compiler->func = compiler->func->outer_func; return fn_index; @@ -1574,7 +1581,7 @@ static void compileWhileStatement(Compiler* compiler) { compileExpression(compiler); //< Condition. emitOpcode(compiler, OP_JUMP_IF_NOT); - int whilepatch = emitByte(compiler, 0xffff); //< Will be patched. + int whilepatch = emitShort(compiler, 0xffff); //< Will be patched. compileBlockBody(compiler, BLOCK_LOOP); @@ -1767,6 +1774,10 @@ Script* compileSource(MSVM* vm, const char* path) { vm->compiler = NULL; +#if DEBUG_DUMP_COMPILED_CODE + dumpInstructions(vm, script->body); +#endif + if (compiler.parser.has_errors) return NULL; return script; } diff --git a/src/core.c b/src/core.c index 967be8f..5273b80 100644 --- a/src/core.c +++ b/src/core.c @@ -8,6 +8,7 @@ #include #include +#include "var.h" #include "vm.h" typedef struct { @@ -26,9 +27,9 @@ typedef struct { _BuiltinFn builtins[BUILTIN_COUNT]; static void initializeBuiltinFN(MSVM* vm, _BuiltinFn* bfn, const char* name, - int arity, MiniScriptNativeFn ptr) { + int length, int arity, MiniScriptNativeFn ptr) { bfn->name = name; - bfn->length = (name != NULL) ? (int)strlen(name) : 0; + bfn->length = length; varInitObject(&bfn->fn._super, vm, OBJ_FUNC); bfn->fn.name = name; @@ -121,6 +122,11 @@ Function* getBuiltinFunction(int index) { return &builtins[index].fn; } +const char* getBuiltinFunctionName(int index) { + ASSERT(index < BUILTIN_COUNT, "Index out of bound."); + return builtins[index].name; +} + #define FN_IS_PRIMITE_TYPE(name, check) \ void coreIs##name(MSVM* vm) { \ RET(VAR_BOOL(check(ARG(1)))); \ @@ -215,25 +221,28 @@ void initializeCore(MSVM* vm) { int i = 0; //< Iterate through builtins. +#define INITALIZE_BUILTIN_FN(name, fn, argc) \ + initializeBuiltinFN(vm, &builtins[i++], name, (int)strlen(name), argc, fn); + // Initialize builtin functions. - initializeBuiltinFN(vm, &builtins[i++], "is_null", 1, coreIsNull); - initializeBuiltinFN(vm, &builtins[i++], "is_bool", 1, coreIsBool); - initializeBuiltinFN(vm, &builtins[i++], "is_num", 1, coreIsNum); + INITALIZE_BUILTIN_FN("is_null", coreIsNull, 1); + INITALIZE_BUILTIN_FN("is_bool", coreIsBool, 1); + INITALIZE_BUILTIN_FN("is_num", coreIsNum, 1); - initializeBuiltinFN(vm, &builtins[i++], "is_string", 1, coreIsString); - initializeBuiltinFN(vm, &builtins[i++], "is_list", 1, coreIsList); - initializeBuiltinFN(vm, &builtins[i++], "is_map", 1, coreIsMap); - initializeBuiltinFN(vm, &builtins[i++], "is_range", 1, coreIsRange); - initializeBuiltinFN(vm, &builtins[i++], "is_function", 1, coreIsFunction); - initializeBuiltinFN(vm, &builtins[i++], "is_script", 1, coreIsScript); - initializeBuiltinFN(vm, &builtins[i++], "is_userobj", 1, coreIsUserObj); + INITALIZE_BUILTIN_FN("is_string", coreIsString, 1); + INITALIZE_BUILTIN_FN("is_list", coreIsList, 1); + INITALIZE_BUILTIN_FN("is_map", coreIsMap, 1); + INITALIZE_BUILTIN_FN("is_range", coreIsRange, 1); + INITALIZE_BUILTIN_FN("is_function", coreIsFunction, 1); + INITALIZE_BUILTIN_FN("is_script", coreIsScript, 1); + INITALIZE_BUILTIN_FN("is_userobj", coreIsUserObj, 1); - initializeBuiltinFN(vm, &builtins[i++], "to_string", 1, coreToString); - initializeBuiltinFN(vm, &builtins[i++], "print", -1, corePrint); - initializeBuiltinFN(vm, &builtins[i++], "import", 1, coreImport); + INITALIZE_BUILTIN_FN("to_string", coreToString, 1); + INITALIZE_BUILTIN_FN("print", corePrint, -1); + INITALIZE_BUILTIN_FN("import", coreImport, 1); // Sentinal to mark the end of the array. - initializeBuiltinFN(vm, &builtins[i], NULL, 0, NULL); + initializeBuiltinFN(vm, &builtins[i], NULL, 0, 0, NULL); // Make STD scripts. Script* std; // A temporary pointer to the current std script. @@ -348,7 +357,16 @@ bool varLesser(MSVM* vm, Var v1, Var v2) { return false; } +// A convinent convenient macro used in varGetAttrib and varSetAttrib. +#define IS_ATTRIB(name) \ + (attrib->length == strlen(name) && strcmp(name, attrib->data) == 0) + +#define ERR_NO_ATTRIB() \ + msSetRuntimeError(vm, "'%s' objects has no attribute named '%s'", \ + varTypeName(on), attrib->data); + Var varGetAttrib(MSVM* vm, Var on, String* attrib) { + if (!IS_OBJ(on)) { msSetRuntimeError(vm, "%s type is not subscriptable.", varTypeName(on)); return VAR_NULL; @@ -357,7 +375,29 @@ Var varGetAttrib(MSVM* vm, Var on, String* attrib) { Object* obj = (Object*)AS_OBJ(on); switch (obj->type) { case OBJ_STRING: + { + if (IS_ATTRIB("length")) { + size_t length = ((String*)obj)->length; + return VAR_NUM((double)length); + } else { + ERR_NO_ATTRIB(); + return VAR_NULL; + } + UNREACHABLE(); + } + case OBJ_LIST: + { + if (IS_ATTRIB("length")) { + size_t length = ((List*)obj)->elements.count; + return VAR_NUM((double)length); + } else { + ERR_NO_ATTRIB(); + return VAR_NULL; + } + UNREACHABLE(); + } + case OBJ_MAP: case OBJ_RANGE: TODO; @@ -388,8 +428,73 @@ Var varGetAttrib(MSVM* vm, Var on, String* attrib) { return VAR_NULL; } -void varSetAttrib(MSVM* vm, Var on, String* name, Var value) { - TODO; +void varSetAttrib(MSVM* vm, Var on, String* attrib, Var value) { + +#define ATTRIB_IMMUTABLE(prop) \ +do { \ + if (IS_ATTRIB(prop)) { \ + msSetRuntimeError(vm, "'%s' attribute is immutable.", prop); \ + return; \ + } \ +} while (false) + + if (!IS_OBJ(on)) { + msSetRuntimeError(vm, "%s type is not subscriptable.", varTypeName(on)); + return; + } + + Object* obj = (Object*)AS_OBJ(on); + switch (obj->type) { + case OBJ_STRING: + ATTRIB_IMMUTABLE("length"); + ERR_NO_ATTRIB(); + return; + + case OBJ_LIST: + ATTRIB_IMMUTABLE("length"); + ERR_NO_ATTRIB(); + return; + + case OBJ_MAP: + TODO; + ERR_NO_ATTRIB(); + return; + + case OBJ_RANGE: + ERR_NO_ATTRIB(); + return; + + case OBJ_SCRIPT: { + Script* scr = (Script*)obj; + + // TODO: check globals HERE. + + // Check function. + int index = nameTableFind(&scr->function_names, attrib->data, + attrib->length); + if (index != -1) { + ASSERT_INDEX(index, scr->functions.count); + ATTRIB_IMMUTABLE(scr->functions.data[index]->name); + return; + } + + ERR_NO_ATTRIB(); + return; + } + + case OBJ_FUNC: + ERR_NO_ATTRIB(); + return; + + case OBJ_USER: + ERR_NO_ATTRIB(); + return; + + default: + UNREACHABLE(); + } + + UNREACHABLE(); } Var varGetSubscript(MSVM* vm, Var on, Var key) { @@ -400,7 +505,19 @@ Var varGetSubscript(MSVM* vm, Var on, Var key) { Object* obj = AS_OBJ(on); switch (obj->type) { - case OBJ_STRING: TODO; + case OBJ_STRING: + { + int32_t index; + String* str = ((String*)obj); + if (!validateIngeger(vm, key, &index, "List index")) { + return VAR_NULL; + } + if (!validateIndex(vm, index, str->length, "String")) { + return VAR_NULL; + } + String* c = newString(vm, str->data + index, 1); + return VAR_OBJ(c); + } case OBJ_LIST: { @@ -496,9 +613,15 @@ bool varIterate(MSVM* vm, Var seq, Var* iterator, Var* value) { switch (obj->type) { case OBJ_STRING: { - TODO; // Need to consider utf8. - - TODO; // Return string[index]. + // TODO: // Need to consider utf8. + String* str = ((String*)obj); + if (iter < 0 || iter >= str->length) { + return false; //< Stop iteration. + } + // TODO: Or I could add char as a type for efficiency. + *value = VAR_OBJ(newString(vm, str->data + iter, 1)); + *iterator = VAR_NUM((double)iter + 1); + return true; } case OBJ_LIST: { diff --git a/src/core.h b/src/core.h index a2503b8..0e6fd4b 100644 --- a/src/core.h +++ b/src/core.h @@ -15,8 +15,12 @@ void initializeCore(MSVM* vm); // if not found returns -1. int findBuiltinFunction(const char* name, int length); +// Returns the builtin function at index [index]. Function* getBuiltinFunction(int index); +// Returns the builtin function's name at index [index]. +const char* getBuiltinFunctionName(int index); + // Operators ////////////////////////////////////////////////////////////////// Var varAdd(MSVM* vm, Var v1, Var v2); diff --git a/src/debug.c b/src/debug.c new file mode 100644 index 0000000..db15c66 --- /dev/null +++ b/src/debug.c @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2021 Thakee Nathees + * Licensed under: MIT License + */ + +#include "core.h" +#include "debug.h" +#include "vm.h" + +static const char* op_name[] = { + #define OPCODE(name, params, stack) #name, + #include "opcodes.h" + #undef OPCODE + NULL, +}; + +static void _dumpValue(MSVM* vm, Var value, bool recursive) { + if (IS_NULL(value)) { + printf("null"); + return; + } + if (IS_BOOL(value)) { + printf((AS_BOOL(value)) ? "true" : "false"); + return; + } + if (IS_NUM(value)) { + printf("%.14g", AS_NUM(value)); + return; + } + ASSERT(IS_OBJ(value), OOPS); + Object* obj = AS_OBJ(value); + switch (obj->type) { + case OBJ_STRING: + printf("\"%s\"", ((String*)obj)->data); + return; + case OBJ_LIST: + { + List* list = ((List*)obj); + if (recursive) { + printf("[...]"); + } else { + printf("["); + for (int i = 0; i < list->elements.count; i++) { + if (i != 0) printf(", "); + _dumpValue(vm, list->elements.data[i], true); + } + printf("]"); + } + return; + } + + case OBJ_MAP: + TODO; + return; + + case OBJ_RANGE: + { + Range* range = ((Range*)obj); + printf("%.2g..%.2g", range->from, range->to); + } + + case OBJ_SCRIPT: + printf("[Script:%p]", obj); + case OBJ_FUNC: + printf("[Fn:%p]", obj); + case OBJ_USER: + printf("[UserObj:%p]", obj); + } +} + +void dumpValue(MSVM* vm, Var value) { + _dumpValue(vm, value, false); +} + +void dumpInstructions(MSVM* vm, Function* func) { + + + int i = 0; + uint8_t* opcodes = func->fn->opcodes.data; + int* lines = func->fn->oplines.data; + int line = 1, last_line = 0; + + printf("Instruction Dump of function '%s'\n", func->name); +#define READ_BYTE() (opcodes[i++]) +#define READ_SHORT() (i += 2, opcodes[i - 2] << 8 | opcodes[i-1]) + +#define NO_ARGS() printf("\n") +#define SHORT_ARG() printf("%5d\n", READ_SHORT()) +#define INDENTATION " " + + while (i < func->fn->opcodes.count) { + ASSERT_INDEX(i, func->fn->opcodes.count); + + // Print the line number. + line = lines[i]; + if (line != last_line) { + printf(INDENTATION "%4d:", line); + last_line = line; + } else { + printf(INDENTATION " "); + } + + printf(INDENTATION "%4d %-16s", i, op_name[opcodes[i]]); + + Opcode op = (Opcode)func->fn->opcodes.data[i++]; + switch (op) { + case OP_CONSTANT: + { + int index = READ_SHORT(); + printf("%5d ", index); + ASSERT_INDEX(index, func->owner->literals.count); + Var value = func->owner->literals.data[index]; + dumpValue(vm, value); + printf("\n"); + break; + } + + case OP_PUSH_NULL: + case OP_PUSH_SELF: + case OP_PUSH_TRUE: + case OP_PUSH_FALSE: + NO_ARGS(); + break; + + case OP_PUSH_LIST: + SHORT_ARG(); + break; + + case OP_LIST_APPEND: NO_ARGS(); break; + + case OP_PUSH_LOCAL_0: + case OP_PUSH_LOCAL_1: + case OP_PUSH_LOCAL_2: + case OP_PUSH_LOCAL_3: + case OP_PUSH_LOCAL_4: + case OP_PUSH_LOCAL_5: + case OP_PUSH_LOCAL_6: + case OP_PUSH_LOCAL_7: + case OP_PUSH_LOCAL_8: + NO_ARGS(); + break; + + case OP_PUSH_LOCAL_N: + SHORT_ARG(); + break; + + case OP_STORE_LOCAL_0: + case OP_STORE_LOCAL_1: + case OP_STORE_LOCAL_2: + case OP_STORE_LOCAL_3: + case OP_STORE_LOCAL_4: + case OP_STORE_LOCAL_5: + case OP_STORE_LOCAL_6: + case OP_STORE_LOCAL_7: + case OP_STORE_LOCAL_8: + NO_ARGS(); + break; + + case OP_STORE_LOCAL_N: + SHORT_ARG(); + break; + + case OP_PUSH_GLOBAL: + case OP_STORE_GLOBAL: + case OP_PUSH_FN: + SHORT_ARG(); + break; + + case OP_PUSH_BUILTIN_FN: + { + int index = READ_SHORT(); + printf("%5d [Fn:%s]\n", index, getBuiltinFunctionName(index)); + break; + } + + + case OP_POP: + NO_ARGS(); + break; + + case OP_CALL: + printf("%5d (argc)\n", READ_SHORT()); + break; + + case OP_ITER: + case OP_JUMP: + case OP_JUMP_IF: + case OP_JUMP_IF_NOT: + { + int offset = READ_SHORT(); + printf("%5d (ip:%d)\n", offset, i + offset); + break; + } + + case OP_LOOP: + { + int offset = READ_SHORT(); + printf("%5d (ip:%d)\n", -offset, i - offset); + break; + } + + case OP_RETURN: NO_ARGS(); break; + + case OP_GET_ATTRIB: + case OP_GET_ATTRIB_AOP: + case OP_SET_ATTRIB: + SHORT_ARG(); + break; + + case OP_GET_SUBSCRIPT: + case OP_GET_SUBSCRIPT_AOP: + case OP_SET_SUBSCRIPT: + NO_ARGS(); + break; + + case OP_NEGATIVE: + case OP_NOT: + case OP_BIT_NOT: + case OP_ADD: + case OP_SUBTRACT: + case OP_MULTIPLY: + case OP_DIVIDE: + case OP_MOD: + case OP_BIT_AND: + case OP_BIT_OR: + case OP_BIT_XOR: + case OP_BIT_LSHIFT: + case OP_BIT_RSHIFT: + case OP_AND: + case OP_OR: + case OP_EQEQ: + case OP_NOTEQ: + case OP_LT: + case OP_LTEQ: + case OP_GT: + case OP_GTEQ: + case OP_RANGE: + case OP_IN: + case OP_END: + NO_ARGS(); + break; + + default: + UNREACHABLE(); + break; + } + } +} + + diff --git a/src/debug.h b/src/debug.h new file mode 100644 index 0000000..bddc83a --- /dev/null +++ b/src/debug.h @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2021 Thakee Nathees + * Licensed under: MIT License + */ + +#ifndef DEBUG_H +#define DEBUG_H + +#include "common.h" +#include "miniscript.h" + +// Dump the value of the [value] without a new line at the end. +void dumpValue(MSVM* vm, Var value); + +// Dump opcodes of the given function. +void dumpInstructions(MSVM* vm, Function* func); + +#endif // DEBUG_H diff --git a/src/opcodes.h b/src/opcodes.h index 84067be..9fd5665 100644 --- a/src/opcodes.h +++ b/src/opcodes.h @@ -90,19 +90,19 @@ OPCODE(POP, 0, -1) // done the stack top should be stored otherwise it'll be disregarded. The // function should set the 0 th argment to return value. Locals at 0 to 8 // marked explicitly since it's performance criticle. -// params: CALL_0..8 -> 2 bytes index. _N -> 2 bytes index and 2 bytes count. +// params: n bytes argc. // TODO: may be later. -//OPCODE(CALL_0, 2, 0) //< Push null call null will be the return value. -//OPCODE(CALL_1, 2, -1) //< Push null and arg1. arg1 will be popped. -//OPCODE(CALL_2, 2, -2) //< And so on. -//OPCODE(CALL_3, 2, -3) -//OPCODE(CALL_4, 2, -4) -//OPCODE(CALL_5, 2, -5) -//OPCODE(CALL_6, 2, -6) -//OPCODE(CALL_7, 2, -7) -//OPCODE(CALL_8, 2, -8) -OPCODE(CALL, 4, -0) //< Will calculated at compile time. +//OPCODE(CALL_0, 0, 0) //< Push null call null will be the return value. +//OPCODE(CALL_1, 0, -1) //< Push null and arg1. arg1 will be popped. +//OPCODE(CALL_2, 0, -2) //< And so on. +//OPCODE(CALL_3, 0, -3) +//OPCODE(CALL_4, 0, -4) +//OPCODE(CALL_5, 0, -5) +//OPCODE(CALL_6, 0, -6) +//OPCODE(CALL_7, 0, -7) +//OPCODE(CALL_8, 0, -8) +OPCODE(CALL, 2, -0) //< Will calculated at compile time. // The stack top will be iteration value, next one is iterator (integer) and // next would be the container. It'll update those values but not push or pop diff --git a/src/vm.c b/src/vm.c index 7e820ec..bd229b4 100644 --- a/src/vm.c +++ b/src/vm.c @@ -6,6 +6,7 @@ #include "vm.h" #include "core.h" +#include "debug.h" #include "utils.h" #define HAS_ERROR() (vm->error != NULL) @@ -229,8 +230,12 @@ MSInterpretResult vmRunScript(MSVM* vm, Script* _script) { SWITCH(instruction) { OPCODE(CONSTANT): - PUSH(script->literals.data[READ_SHORT()]); + { + int index = READ_SHORT(); + ASSERT_INDEX(index, script->literals.count); + PUSH(script->literals.data[index]); DISPATCH(); + } OPCODE(PUSH_NULL): PUSH(VAR_NULL);