From 34716b4f6bc93cc64118515d1dfd3581e6313e6c Mon Sep 17 00:00:00 2001 From: Thakee Nathees Date: Sun, 25 Apr 2021 20:49:39 +0530 Subject: [PATCH 1/2] garbage collection implementations. --- include/miniscript.h | 20 +++++++++++++++++ ms_configure.py | 1 + src/core.c | 5 ++--- src/core.h | 8 +++++++ src/types/name_table.c | 2 ++ src/vm.c | 50 +++++++++++++++++++++++++++++++++++++----- src/vm.h | 21 ++++++++++++++---- test/main.c | 3 +++ 8 files changed, 98 insertions(+), 12 deletions(-) diff --git a/include/miniscript.h b/include/miniscript.h index 0f22deb..c7c7075 100644 --- a/include/miniscript.h +++ b/include/miniscript.h @@ -40,6 +40,18 @@ typedef struct MSVM MSVM; typedef struct Var Var; #endif +// A function that'll be called for all the allocation calls by MSVM. +// +// - To allocate new memory it'll pass NULL to parameter [memory] and the +// required size to [new_size]. On failure the return value would be NULL. +// +// - When reallocating an existing memory if it's grow in place the return +// address would be the same as [memory] otherwise a new address. +// +// - To free an allocated memory pass [memory] and 0 to [new_size]. The +// function will return NULL. +typedef void* (*MiniScriptReallocFn)(void* memory, size_t new_size, void* user_data); + // C function pointer which is callable from MiniScript. typedef void (*MiniScriptNativeFn)(MSVM* vm); @@ -84,6 +96,10 @@ typedef void (*MiniScriptLoadScriptDoneFn) (MSVM* vm, const char* path, typedef struct { + // The callback used to allocate, reallocate, and free. If the function + // pointer is NULL it defaults to the VM's realloc(), free() wrappers. + MiniScriptReallocFn realloc_fn; + MiniScriptErrorFn error_fn; MiniScriptWriteFn write_fn; @@ -95,6 +111,10 @@ typedef struct { } MSConfiguration; +// Initialize the configuration and set ALL of it's values to the defaults. +// Call this before setting any particular field of it. +void MSInitConfiguration(MSConfiguration* config); + typedef enum { RESULT_SUCCESS = 0, RESULT_COMPILE_ERROR, diff --git a/ms_configure.py b/ms_configure.py index 9afa253..6cbd4e9 100644 --- a/ms_configure.py +++ b/ms_configure.py @@ -40,6 +40,7 @@ def generate_files(): import buffergen ec = buffergen.gen() + ## .bat files are just for quick rebuild of the buffer templates in windows. if sys.platform == 'win32': with open('src/types/gen.bat', 'w') as f: f.write('python buffergen.py') diff --git a/src/core.c b/src/core.c index 959cf15..51993db 100644 --- a/src/core.c +++ b/src/core.c @@ -268,7 +268,6 @@ void initializeCore(MSVM* vm) { STD_NEW_SCRIPT("std:list"); STD_ADD_FUNCTION("sort", stdListSort, 1); - // std:os script. STD_NEW_SCRIPT("std:os"); STD_ADD_FUNCTION("clock", stdOsClock, 0); @@ -408,7 +407,7 @@ Var varGetAttrib(MSVM* vm, Var on, String* attrib) { int index = nameTableFind(&scr->function_names, attrib->data, attrib->length); if (index != -1) { - // TODO: Assert index (not a runtime error). + ASSERT_INDEX(index, scr->functions.count); return VAR_OBJ(scr->functions.data[index]); } @@ -492,7 +491,7 @@ do { \ return; case OBJ_USER: - ERR_NO_ATTRIB(); + TODO; //ERR_NO_ATTRIB(); return; default: diff --git a/src/core.h b/src/core.h index fdbec1b..1809e3d 100644 --- a/src/core.h +++ b/src/core.h @@ -9,6 +9,14 @@ #include "var.h" #include "common.h" +// Initialize core language, builtin function and "std" scripts. +// Note (TODO: refactore required): +// Since the builtin function doesn't require any allocation they're +// elements of a static `builtins` array but the "std" scripts are `Script` +// objects they required memory management and they're bound with the VM. +// It contradicts `initializeCore()` to be called for each VM or only once. +// 1. Make the scripts share between VMs. +// 2. Destroy scripts buffer only when the last VM die. void initializeCore(MSVM* vm); // Find the builtin function name and returns it's index in the builtins array diff --git a/src/types/name_table.c b/src/types/name_table.c index 6d65132..4b7b823 100644 --- a/src/types/name_table.c +++ b/src/types/name_table.c @@ -17,6 +17,8 @@ void nameTableClear(NameTable* self, MSVM* vm) { int nameTableAdd(NameTable* self, MSVM* vm, const char* name, size_t length, String** ptr) { + // Note: Since stringBuffer won't copy the string we don't have to free the + // below string, it'll be managed by the string buffer. String* string = newString(vm, name, (uint32_t)length); vmPushTempRef(vm, &string->_super); diff --git a/src/vm.c b/src/vm.c index b3a4f4e..b908daa 100644 --- a/src/vm.c +++ b/src/vm.c @@ -17,6 +17,14 @@ // Minimum size of the stack. #define MIN_STACK_SIZE 128 +static void* defaultRealloc(void* memory, size_t new_size, void* user_data) { + if (new_size == 0) { + free(memory); + return NULL; + } + return realloc(memory, new_size); +} + Fiber* newFiber(MSVM* vm) { Fiber* fiber = ALLOCATE(vm, Fiber); memset(fiber, 0, sizeof(Fiber)); @@ -26,33 +34,43 @@ Fiber* newFiber(MSVM* vm) { void* vmRealloc(MSVM* self, void* memory, size_t old_size, size_t new_size) { + // TODO: Debug trace allocations here. + // Track the total allocated memory of the VM to trigger the GC. // if vmRealloc is called for freeing the old_size would be 0 since // deallocated bytes are traced by garbage collector. self->bytes_allocated += new_size - old_size; - // TODO: If vm->bytes_allocated > some_value -> GC(); + if (new_size > 0 && self->bytes_allocated > self->next_gc) { + vmCollectGarbage(self); + } if (new_size == 0) { free(memory); return NULL; } - return realloc(memory, new_size); + return self->config.realloc_fn(memory, new_size, self->config.user_data); } void vmInit(MSVM* self, MSConfiguration* config) { memset(self, 0, sizeof(MSVM)); self->config = *config; + self->gray_list_count = 0; + self->gray_list_capacity = 8; // TODO: refactor the magic '8' here. + self->gray_list = (Object**)self->config.realloc_fn( + NULL, sizeof(Object*) * self->gray_list_capacity, NULL); + self->next_gc = 1024 * 1024 * 10; // TODO: + // TODO: no need to initialize if already done by another vm. initializeCore(self); } void vmPushTempRef(MSVM* self, Object* obj) { ASSERT(obj != NULL, "Cannot reference to NULL."); - ASSERT(self->temp_reference_count < MAX_TEMP_REFERENCE, - "Too many temp references"); + ASSERT(self->temp_reference_count < MAX_TEMP_REFERENCE, + "Too many temp references"); self->temp_reference[self->temp_reference_count++] = obj; } @@ -61,6 +79,15 @@ void vmPopTempRef(MSVM* self) { self->temp_reference_count--; } +void vmCollectGarbage(MSVM* self) { + + // Reset VM's bytes_allocated value and count it again so that we don't + // required to know the size of each object that'll be freeing. + self->bytes_allocated = 0; + + TODO; +} + void vmAddStdScript(MSVM* self, Script* script) { ASSERT(self->std_count < MAX_SCRIPT_CACHE, OOPS); self->std_scripts[self->std_count++] = script; @@ -127,6 +154,18 @@ void vmReportError(MSVM* vm) { ASSERT(false, "TODO: create debug.h"); } +void MSInitConfiguration(MSConfiguration* config) { + config->realloc_fn = defaultRealloc; + + // TODO: Handle Null functions before calling them. + config->error_fn = NULL; + config->write_fn = NULL; + + config->load_script_fn = NULL; + config->load_script_done_fn = NULL; + config->user_data = NULL; +} + MSVM* msNewVM(MSConfiguration* config) { MSVM* vm = (MSVM*)malloc(sizeof(MSVM)); vmInit(vm, config); @@ -137,7 +176,8 @@ MSInterpretResult msInterpret(MSVM* vm, const char* file) { Script* script = compileSource(vm, file); if (script == NULL) return RESULT_COMPILE_ERROR; - // TODO: Check if scripts size is enough. + // TODO: The below assertion should be an error report. + ASSERT(vm->script_count + 1 < MAX_SCRIPT_CACHE, "Scripts cache out of bound."); vm->scripts[vm->script_count++] = script; return vmRunScript(vm, script); } diff --git a/src/vm.h b/src/vm.h index 565c6cc..48e0c10 100644 --- a/src/vm.h +++ b/src/vm.h @@ -36,15 +36,15 @@ struct Fiber { // body function). Function* func; - // The stack of the execution holding locals and temps. A heap allocated - // Will and grow as needed. + // The stack of the execution holding locals and temps. A heap will be + // allocated and grow as needed. Var* stack; // The stack pointer (%rsp) pointing to the stack top. Var* sp; // The stack base pointer of the current frame. It'll be updated before - // calling a native function. + // calling a native function. (`fiber->ret` === `curr_call_frame->rbp`). Var* ret; // Size of the allocated stack. @@ -68,8 +68,18 @@ struct MSVM { // The first object in the link list of all heap allocated objects. Object* first; + // The number of bytes allocated by the vm and not (yet) garbage collected. size_t bytes_allocated; + // The number of bytes that'll trigger the next GC. + size_t next_gc; + + // In the tri coloring scheme gray is the working list. We recursively pop + // from the list color it balck and add it's referenced objects to gray_list. + Object** gray_list; + int gray_list_count; + int gray_list_capacity; + // A stack of temporary object references to ensure that the object // doesn't garbage collected. Object* temp_reference[MAX_TEMP_REFERENCE]; @@ -81,7 +91,7 @@ struct MSVM { // Current compiler reference to mark it's heap allocated objects. Compiler* compiler; - // Std scripts array. + // Std scripts array. (TODO: assert "std" scripts doesn't have global vars). Script* std_scripts[MAX_SCRIPT_CACHE]; // Std scripts count. @@ -122,6 +132,9 @@ void vmPushTempRef(MSVM* self, Object* obj); // Pop the top most object from temporary reference stack. void vmPopTempRef(MSVM* self); +// Trigger garbage collection manually. +void vmCollectGarbage(MSVM* self); + // Add a std script to vm when initializing core. void vmAddStdScript(MSVM* self, Script* script); diff --git a/test/main.c b/test/main.c index f1bb148..38785cb 100644 --- a/test/main.c +++ b/test/main.c @@ -18,6 +18,8 @@ void writeFunction(MSVM* vm, const char* text) { } void loadScriptDone(MSVM* vm, const char* path, void* user_data) { + // User data is the allocated source code buffer and it has to be freed + // manually since it wasn't allocated by the VM. free(user_data); } @@ -66,6 +68,7 @@ int main(int argc, char** argv) { const char* source_path = argv[1]; MSConfiguration config; + MSInitConfiguration(&config); config.error_fn = errorPrint; config.write_fn = writeFunction; config.load_script_fn = loadScript; From bd4642be703cc0c12ea4f0dbd0d4bdd6f8e83e6e Mon Sep 17 00:00:00 2001 From: Thakee Nathees Date: Mon, 26 Apr 2021 15:04:30 +0530 Subject: [PATCH 2/2] early root marking implemented --- include/miniscript.h | 5 +- src/compiler.c | 24 ++++++--- src/compiler.h | 2 + src/core.c | 75 +++++++++++++++++++-------- src/core.h | 3 ++ src/var.c | 118 +++++++++++++++++++++++++++++++++++++++---- src/var.h | 62 ++++++++++++++++++++++- src/vm.c | 105 +++++++++++++++++++++++++------------- src/vm.h | 61 +++------------------- test/main.c | 5 +- 10 files changed, 328 insertions(+), 132 deletions(-) diff --git a/include/miniscript.h b/include/miniscript.h index c7c7075..fc3e171 100644 --- a/include/miniscript.h +++ b/include/miniscript.h @@ -113,7 +113,7 @@ typedef struct { // Initialize the configuration and set ALL of it's values to the defaults. // Call this before setting any particular field of it. -void MSInitConfiguration(MSConfiguration* config); +void msInitConfiguration(MSConfiguration* config); typedef enum { RESULT_SUCCESS = 0, @@ -124,6 +124,9 @@ typedef enum { // Allocate initialize and returns a new VM MSVM* msNewVM(MSConfiguration* config); +// Clean the VM and dispose all the resources allocated by the VM. +void msFreeVM(MSVM* vm); + // Compile and execut file at given path. MSInterpretResult msInterpret(MSVM* vm, const char* file); diff --git a/src/compiler.c b/src/compiler.c index b5d305d..25b3c81 100644 --- a/src/compiler.c +++ b/src/compiler.c @@ -173,7 +173,7 @@ static _Keyword _keywords[] = { { "continue", 8, TK_CONTINUE }, { "return", 6, TK_RETURN }, - { NULL, (TokenType)(0) }, // Sentinal to mark the end of the array + { NULL, 0, (TokenType)(0) }, // Sentinal to mark the end of the array }; typedef struct { @@ -195,7 +195,6 @@ typedef struct { // Precedence parsing references: // https://en.wikipedia.org/wiki/Shunting-yard_algorithm -// TODO: I should explicitly state wren-lang as a reference "globaly". typedef enum { PREC_NONE, @@ -299,9 +298,7 @@ struct Compiler { int var_count; //< Number of locals in [variables]. int global_count; //< Number of globals in [variables]. - int stack_size; //< Current size including locals ind temps. - - // TODO: compiler should mark Script* below not to be garbage collected. + int stack_size; //< Current size including locals ind temps.= Script* script; //< Current script. Loop* loop; //< Current loop. @@ -1035,7 +1032,8 @@ static void exprName(Compiler* compiler, bool can_assign) { if (can_assign && matchAssignment(parser)) { TokenType assignment = parser->previous.type; if (assignment != TK_EQ) { - emitPushVariable(compiler, result.index, result.type == NAME_GLOBAL_VAR); + emitPushVariable(compiler, result.index, + result.type == NAME_GLOBAL_VAR); compileExpression(compiler); switch (assignment) { @@ -1800,3 +1798,17 @@ Script* compileSource(MSVM* vm, const char* path) { if (compiler.parser.has_errors) return NULL; return script; } + +/////////////////////////////////////////////////////////////////////////////// + +void compilerMarkObjects(Compiler* compiler, MSVM* vm) { + + // Mark the script which is currently being compiled. + markObject(&compiler->script->_super, vm); + + // Mark the string literals (they haven't added to the script's literal + // buffer yet). + markValue(compiler->parser.current.value, vm); + markValue(compiler->parser.previous.value, vm); + markValue(compiler->parser.next.value, vm); +} diff --git a/src/compiler.h b/src/compiler.h index 19affa5..bab8440 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -13,4 +13,6 @@ typedef struct Compiler Compiler; Script* compileSource(MSVM* vm, const char* path); +void compilerMarkObjects(Compiler* compiler, MSVM* vm); + #endif // COMPILER_H diff --git a/src/core.c b/src/core.c index 51993db..ebf951a 100644 --- a/src/core.c +++ b/src/core.c @@ -14,7 +14,7 @@ typedef struct { const char* name; //< Name of the function. int length; //< Length of the name. - Function fn; //< Native function pointer. + Function* fn; //< Native function pointer. } _BuiltinFn; // Count of builtin function +1 for termination. @@ -24,26 +24,23 @@ typedef struct { #define _AS_INTEGER(var) (int32_t)trunc(AS_NUM(var)) // Array of all builtin functions. -_BuiltinFn builtins[BUILTIN_COUNT]; +static _BuiltinFn builtins[BUILTIN_COUNT]; +static int builtins_count = 0; static void initializeBuiltinFN(MSVM* vm, _BuiltinFn* bfn, const char* name, int length, int arity, MiniScriptNativeFn ptr) { bfn->name = name; bfn->length = length; - varInitObject(&bfn->fn._super, vm, OBJ_FUNC); - bfn->fn.name = name; - bfn->fn.arity = arity; - bfn->fn.owner = NULL; - bfn->fn.is_native = true; - bfn->fn.native = ptr; + bfn->fn = newFunction(vm, name, length, NULL, true); + bfn->fn->arity = arity; + bfn->fn->native = ptr; } int findBuiltinFunction(const char* name, int length) { - for (int i = 0; i < BUILTIN_COUNT; i++) { - if (builtins[i].name == NULL) return -1; - - if (length == builtins[i].length && strncmp(name, builtins[i].name, length) == 0) { + for (int i = 0; i < builtins_count; i++) { + if (length == builtins[i].length && + strncmp(name, builtins[i].name, length) == 0) { return i; } } @@ -118,12 +115,12 @@ static inline bool validateIndex(MSVM* vm, int32_t index, int32_t size, } while (false) Function* getBuiltinFunction(int index) { - ASSERT(index < BUILTIN_COUNT, "Index out of bound."); - return &builtins[index].fn; + ASSERT_INDEX(index, builtins_count); + return builtins[index].fn; } const char* getBuiltinFunctionName(int index) { - ASSERT(index < BUILTIN_COUNT, "Index out of bound."); + ASSERT_INDEX(index, builtins_count); return builtins[index].name; } @@ -219,16 +216,17 @@ void stdOsClock(MSVM* vm) { /*****************************************************************************/ void initializeCore(MSVM* vm) { - int i = 0; //< Iterate through builtins. + ASSERT(builtins_count == 0, "Initialize core only once."); -#define INITALIZE_BUILTIN_FN(name, fn, argc) \ - initializeBuiltinFN(vm, &builtins[i++], name, (int)strlen(name), argc, fn); +#define INITALIZE_BUILTIN_FN(name, fn, argc) \ + initializeBuiltinFN(vm, &builtins[builtins_count++], name, \ + (int)strlen(name), argc, fn); // Initialize builtin functions. INITALIZE_BUILTIN_FN("is_null", coreIsNull, 1); INITALIZE_BUILTIN_FN("is_bool", coreIsBool, 1); INITALIZE_BUILTIN_FN("is_num", coreIsNum, 1); - + INITALIZE_BUILTIN_FN("is_string", coreIsString, 1); INITALIZE_BUILTIN_FN("is_list", coreIsList, 1); INITALIZE_BUILTIN_FN("is_map", coreIsMap, 1); @@ -236,13 +234,13 @@ void initializeCore(MSVM* vm) { INITALIZE_BUILTIN_FN("is_function", coreIsFunction, 1); INITALIZE_BUILTIN_FN("is_script", coreIsScript, 1); INITALIZE_BUILTIN_FN("is_userobj", coreIsUserObj, 1); - + INITALIZE_BUILTIN_FN("to_string", coreToString, 1); INITALIZE_BUILTIN_FN("print", corePrint, -1); INITALIZE_BUILTIN_FN("import", coreImport, 1); // Sentinal to mark the end of the array. - initializeBuiltinFN(vm, &builtins[i], NULL, 0, 0, NULL); + //initializeBuiltinFN(vm, &builtins[i], NULL, 0, 0, NULL); // Make STD scripts. Script* std; // A temporary pointer to the current std script. @@ -273,6 +271,12 @@ void initializeCore(MSVM* vm) { STD_ADD_FUNCTION("clock", stdOsClock, 0); } +void markCoreObjects(MSVM* vm) { + for (int i = 0; i < builtins_count; i++) { + markObject(&builtins[i].fn->_super, vm); + } +} + /*****************************************************************************/ /* OPERATORS */ /*****************************************************************************/ @@ -287,7 +291,32 @@ Var varAdd(MSVM* vm, Var v1, Var v2) { return VAR_NULL; } - TODO; //string addition/ array addition etc. + if (IS_OBJ(v1) && IS_OBJ(v2)) { + Object *o1 = AS_OBJ(v1), *o2 = AS_OBJ(v2); + switch (o1->type) { + + case OBJ_STRING: + { + if (o2->type == OBJ_STRING) { + TODO; // Implement String.format('@@', s1, s2); + } + } break; + + case OBJ_LIST: + case OBJ_MAP: + case OBJ_RANGE: + case OBJ_SCRIPT: + case OBJ_FUNC: + case OBJ_FIBER: + case OBJ_USER: + TODO; + } + } + + + msSetRuntimeError(vm, "Unsupported operand types for operator '-' " + "%s and %s", varTypeName(v1), varTypeName(v2)); + return VAR_NULL; } @@ -300,6 +329,8 @@ Var varSubtract(MSVM* vm, Var v1, Var v2) { return VAR_NULL; } + TODO; // for user objects call vm.config.sub_userobj_sub(handles). + msSetRuntimeError(vm, "Unsupported operand types for operator '-' " "%s and %s", varTypeName(v1), varTypeName(v2)); diff --git a/src/core.h b/src/core.h index 1809e3d..9b8bee2 100644 --- a/src/core.h +++ b/src/core.h @@ -19,6 +19,9 @@ // 2. Destroy scripts buffer only when the last VM die. void initializeCore(MSVM* vm); +// Mark the heap allocated core object at the mark phase. +void markCoreObjects(MSVM* vm); + // Find the builtin function name and returns it's index in the builtins array // if not found returns -1. int findBuiltinFunction(const char* name, int length); diff --git a/src/var.c b/src/var.c index 721b393..d42aa42 100644 --- a/src/var.c +++ b/src/var.c @@ -40,9 +40,31 @@ const char* msAsString(MSVM* vm, Var value) { void varInitObject(Object* self, MSVM* vm, ObjectType type) { self->type = type; + self->is_marked = false; self->next = vm->first; vm->first = self; - // TODO: set isGray = false; +} + +void markObject(Object* self, MSVM* vm) { + if (self == NULL || self->is_marked) return; + self->is_marked = true; + + // Add the object to the VM's gray_list so that we can recursively mark + // it's referenced objects later. + if (vm->marked_list_count >= vm->marked_list_capacity) { + vm->marked_list_capacity *= 2; + vm->marked_list = (Object**)vm->config.realloc_fn( + vm->marked_list, + vm->marked_list_capacity * sizeof(Object*), + vm->config.user_data); + } + + vm->marked_list[vm->marked_list_count++] = self; +} + +void markValue(Var self, MSVM* vm) { + if (!IS_OBJ(self)) return; + markObject(AS_OBJ(self), vm); } #if VAR_NAN_TAGGING @@ -134,17 +156,26 @@ Function* newFunction(MSVM* vm, const char* name, int length, Script* owner, Function* func = ALLOCATE(vm, Function); varInitObject(&func->_super, vm, OBJ_FUNC); - // Add the name in the script's function buffer. - String* name_ptr; - vmPushTempRef(vm, &func->_super); - functionBufferWrite(&owner->functions, vm, func); - nameTableAdd(&owner->function_names, vm, name, length, &name_ptr); - vmPopTempRef(vm); + if (owner == NULL) { + ASSERT(is_native, OOPS); + func->name = name; + func->owner = NULL; + func->is_native = is_native; + + } else { + // Add the name in the script's function buffer. + String* name_ptr; + vmPushTempRef(vm, &func->_super); + functionBufferWrite(&owner->functions, vm, func); + nameTableAdd(&owner->function_names, vm, name, length, &name_ptr); + vmPopTempRef(vm); + + func->name = name_ptr->data; + func->owner = owner; + func->arity = -2; // -1 means variadic args. + func->is_native = is_native; + } - func->name = name_ptr->data; - func->owner = owner; - func->arity = -2; // -1 means variadic args. - func->is_native = is_native; if (is_native) { func->native = NULL; @@ -159,6 +190,71 @@ Function* newFunction(MSVM* vm, const char* name, int length, Script* owner, return func; } +Fiber* newFiber(MSVM* vm) { + Fiber* fiber = ALLOCATE(vm, Fiber); + memset(fiber, 0, sizeof(Fiber)); + varInitObject(&fiber->_super, vm, OBJ_FIBER); + return fiber; +} + +void freeObject(MSVM* vm, Object* obj) { + // TODO: Debug trace memory here. + + // First clean the object's referencs, but we're not recursively doallocating + // them because they're not marked and will be cleaned later. + // Example: List's `elements` is VarBuffer that contain a heap allocated + // array of `var*` which will be cleaned below but the actual `var` elements + // will won't be freed here instead they havent marked at all, and will be + // removed at the sweeping phase of the garbage collection. + switch (obj->type) { + case OBJ_STRING: + break; + + case OBJ_LIST: + varBufferClear(&(((List*)obj)->elements), vm); + break; + + case OBJ_MAP: + TODO; + break; + + case OBJ_RANGE: + break; + + case OBJ_SCRIPT: { + Script* scr = (Script*)obj; + varBufferClear(&scr->globals, vm); + nameTableClear(&scr->global_names, vm); + varBufferClear(&scr->literals, vm); + functionBufferClear(&scr->functions, vm); + nameTableClear(&scr->function_names, vm); + stringBufferClear(&scr->names, vm); + + } break; + + case OBJ_FUNC: + { + Function* func = (Function*)obj; + if (!func->is_native) { + byteBufferClear(&func->fn->opcodes, vm); + intBufferClear(&func->fn->oplines, vm); + } + } break; + + case OBJ_FIBER: + { + Fiber* fiber = (Fiber*)obj; + DEALLOCATE(vm, fiber->stack); + DEALLOCATE(vm, fiber->frames); + } break; + + case OBJ_USER: + break; + } + + DEALLOCATE(vm, obj); +} + // Utility functions ////////////////////////////////////////////////////////// const char* varTypeName(Var v) { diff --git a/src/var.h b/src/var.h index a1efdc6..cf45393 100644 --- a/src/var.h +++ b/src/var.h @@ -136,7 +136,7 @@ // Check types. #define IS_CONST(value) ((value & _MASK_CONST) == _MASK_CONST) #define IS_NULL(value) ((value) == VAR_NULL) -#define IS_UNDEF(value) ((value) == VAR_UNDEF) +#define IS_UNDEF(value) ((value) == VAR_UNDEFINED) #define IS_FALSE(value) ((value) == VAR_FALSE) #define IS_TRUE(value) ((value) == VAR_TRUE) #define IS_BOOL(value) (IS_TRUE(value) || IS_FALSE(value)) @@ -210,6 +210,7 @@ typedef enum /* ObjectType */ { // Base struct for all heap allocated objects. struct Object { ObjectType type; //< Type of the object in \ref var_Object_Type. + bool is_marked; //< Marked when garbage collection's marking phase. //Class* is; //< The class the object IS. // No OOP in MS. Object* next; //< Next object in the heap allocated link list. @@ -280,10 +281,59 @@ struct Function { }; }; +typedef struct { + uint8_t* ip; //< Pointer to the next instruction byte code. + Function* fn; //< Function of the frame. + Var* rbp; //< Stack base pointer. (%rbp) +} CallFrame; + +struct Fiber { + Object _super; + + // The root function of the fiber. (For script it'll be the script's implicit + // body function). + Function* func; + + // The stack of the execution holding locals and temps. A heap will be + // allocated and grow as needed. + Var* stack; + + // The stack pointer (%rsp) pointing to the stack top. + Var* sp; + + // The stack base pointer of the current frame. It'll be updated before + // calling a native function. (`fiber->ret` === `curr_call_frame->rbp`). + Var* ret; + + // Size of the allocated stack. + int stack_size; + + // Heap allocated array of call frames will grow as needed. + CallFrame* frames; + + // Capacity of the frames array. + int frame_capacity; + + // Number of frame entry in frames. + int frame_count; + + // Runtime error initially NULL, heap allocated. + String* error; +}; + // Methods //////////////////////////////////////////////////////////////////// +// Initialize the object with it's default value. void varInitObject(Object* self, MSVM* vm, ObjectType type); +// Mark the reachable objects at the mark-and-sweep phase of the garbage +// collection. +void markObject(Object* self, MSVM* vm); + +// Mark the reachable values at the mark-and-sweep phase of the garbage +// collection. +void markValue(Var self, MSVM* vm); + // Instead use VAR_NUM(value) and AS_NUM(value) Var doubleToVar(double value); double varToDouble(Var value); @@ -301,10 +351,18 @@ Range* newRange(MSVM* vm, double from, double to); Script* newScript(MSVM* vm); // Allocate new Function object and return Function*. Parameter [name] should -// be the name in the Script's nametable. +// be the name in the Script's nametable. If the [owner] is NULL the function +// would be builtin function. For builtin function arity and the native +// function pointer would be initialized after calling this function. Function* newFunction(MSVM* vm, const char* name, int length, Script* owner, bool is_native); +// Allocate new Fiber object and return Fiber*. +Fiber* newFiber(MSVM* vm); + +// Release all the object owned by the [obj] including itself. +void freeObject(MSVM* vm, Object* obj); + // Utility functions ////////////////////////////////////////////////////////// // Returns the type name of the var [v]. diff --git a/src/vm.c b/src/vm.c index b908daa..2ed44b5 100644 --- a/src/vm.c +++ b/src/vm.c @@ -25,13 +25,6 @@ static void* defaultRealloc(void* memory, size_t new_size, void* user_data) { return realloc(memory, new_size); } -Fiber* newFiber(MSVM* vm) { - Fiber* fiber = ALLOCATE(vm, Fiber); - memset(fiber, 0, sizeof(Fiber)); - varInitObject(&fiber->_super, vm, OBJ_FIBER); - return fiber; -} - void* vmRealloc(MSVM* self, void* memory, size_t old_size, size_t new_size) { // TODO: Debug trace allocations here. @@ -44,7 +37,7 @@ void* vmRealloc(MSVM* self, void* memory, size_t old_size, size_t new_size) { if (new_size > 0 && self->bytes_allocated > self->next_gc) { vmCollectGarbage(self); } - + Function* f = (Function*)memory; if (new_size == 0) { free(memory); return NULL; @@ -53,14 +46,47 @@ void* vmRealloc(MSVM* self, void* memory, size_t old_size, size_t new_size) { return self->config.realloc_fn(memory, new_size, self->config.user_data); } +void msInitConfiguration(MSConfiguration* config) { + config->realloc_fn = defaultRealloc; + + // TODO: Handle Null functions before calling them. + config->error_fn = NULL; + config->write_fn = NULL; + + config->load_script_fn = NULL; + config->load_script_done_fn = NULL; + config->user_data = NULL; +} + +MSVM* msNewVM(MSConfiguration* config) { + MSVM* vm = (MSVM*)malloc(sizeof(MSVM)); + vmInit(vm, config); + return vm; +} + +void msFreeVM(MSVM* self) { + // TODO: Check if vm already freed. + + Object* obj = self->first; + while (obj != NULL) { + Object* next = obj->next; + freeObject(self, obj); + obj = next; + } + + self->marked_list = (Object**)self->config.realloc_fn( + self->marked_list, 0, self->config.user_data); + self->config.realloc_fn(self, 0, self->config.user_data); +} + void vmInit(MSVM* self, MSConfiguration* config) { memset(self, 0, sizeof(MSVM)); self->config = *config; - self->gray_list_count = 0; - self->gray_list_capacity = 8; // TODO: refactor the magic '8' here. - self->gray_list = (Object**)self->config.realloc_fn( - NULL, sizeof(Object*) * self->gray_list_capacity, NULL); + self->marked_list_count = 0; + self->marked_list_capacity = 8; // TODO: refactor the magic '8' here. + self->marked_list = (Object**)self->config.realloc_fn( + NULL, sizeof(Object*) * self->marked_list_capacity, NULL); self->next_gc = 1024 * 1024 * 10; // TODO: // TODO: no need to initialize if already done by another vm. @@ -85,6 +111,33 @@ void vmCollectGarbage(MSVM* self) { // required to know the size of each object that'll be freeing. self->bytes_allocated = 0; + // Mark core objects (mostlikely builtin functions). + markCoreObjects(self); + + // Mark all the 'std' scripts. + for (int i = 0; i < self->std_count; i++) { + markObject(&(self->std_scripts[i]->_super), self); + } + + // Mark temp references. + for (int i = 0; i < self->temp_reference_count; i++) { + markObject(self->temp_reference[i], self); + } + + // Garbage collection triggered at the middle of a compilation. + if (self->compiler != NULL) { + compilerMarkObjects(self->compiler, self); + } + + // Garbage collection triggered at the middle of runtime. + if (self->script != NULL) { + markObject(&self->script->_super, self); + } + + if (self->fiber != NULL) { + markObject(&self->fiber->_super, self); + } + TODO; } @@ -154,31 +207,11 @@ void vmReportError(MSVM* vm) { ASSERT(false, "TODO: create debug.h"); } -void MSInitConfiguration(MSConfiguration* config) { - config->realloc_fn = defaultRealloc; - - // TODO: Handle Null functions before calling them. - config->error_fn = NULL; - config->write_fn = NULL; - - config->load_script_fn = NULL; - config->load_script_done_fn = NULL; - config->user_data = NULL; -} - -MSVM* msNewVM(MSConfiguration* config) { - MSVM* vm = (MSVM*)malloc(sizeof(MSVM)); - vmInit(vm, config); - return vm; -} - MSInterpretResult msInterpret(MSVM* vm, const char* file) { Script* script = compileSource(vm, file); if (script == NULL) return RESULT_COMPILE_ERROR; - // TODO: The below assertion should be an error report. - ASSERT(vm->script_count + 1 < MAX_SCRIPT_CACHE, "Scripts cache out of bound."); - vm->scripts[vm->script_count++] = script; + vm->script = script; return vmRunScript(vm, script); } @@ -469,13 +502,15 @@ MSInterpretResult vmRunScript(MSVM* vm, Script* _script) { DISPATCH(); } - OPCODE(JUMP): { + OPCODE(JUMP): + { int offset = READ_SHORT(); ip += offset; DISPATCH(); } - OPCODE(LOOP): { + OPCODE(LOOP): + { int offset = READ_SHORT(); ip -= offset; DISPATCH(); diff --git a/src/vm.h b/src/vm.h index 48e0c10..6c3aed7 100644 --- a/src/vm.h +++ b/src/vm.h @@ -23,46 +23,6 @@ typedef enum { #undef OPCODE } Opcode; -typedef struct { - uint8_t* ip; //< Pointer to the next instruction byte code. - Function* fn; //< Function of the frame. - Var* rbp; //< Stack base pointer. (%rbp) -} CallFrame; - -struct Fiber { - Object _super; - - // The root function of the fiber. (For script it'll be the script's implicit - // body function). - Function* func; - - // The stack of the execution holding locals and temps. A heap will be - // allocated and grow as needed. - Var* stack; - - // The stack pointer (%rsp) pointing to the stack top. - Var* sp; - - // The stack base pointer of the current frame. It'll be updated before - // calling a native function. (`fiber->ret` === `curr_call_frame->rbp`). - Var* ret; - - // Size of the allocated stack. - int stack_size; - - // Heap allocated array of call frames will grow as needed. - CallFrame* frames; - - // Capacity of the frames array. - int frame_capacity; - - // Number of frame entry in frames. - int frame_count; - - // Runtime error initially NULL, heap allocated. - String* error; -}; - struct MSVM { // The first object in the link list of all heap allocated objects. @@ -76,9 +36,9 @@ struct MSVM { // In the tri coloring scheme gray is the working list. We recursively pop // from the list color it balck and add it's referenced objects to gray_list. - Object** gray_list; - int gray_list_count; - int gray_list_capacity; + Object** marked_list; + int marked_list_count; + int marked_list_capacity; // A stack of temporary object references to ensure that the object // doesn't garbage collected. @@ -88,29 +48,24 @@ struct MSVM { // VM's configurations. MSConfiguration config; - // Current compiler reference to mark it's heap allocated objects. + // Current compiler reference to mark it's heap allocated objects. Note that + // The compiler isn't heap allocated. Compiler* compiler; // Std scripts array. (TODO: assert "std" scripts doesn't have global vars). Script* std_scripts[MAX_SCRIPT_CACHE]; - - // Std scripts count. int std_count; // Execution variables //////////////////////////////////////////////////// - // Compiled script cache. - Script* scripts[MAX_SCRIPT_CACHE]; - - // Number of script cache. - int script_count; + // The root script of the runtime and it's one of the VM's reference root. + // VM is responsible to manage the memory (TODO: implement handlers). + Script* script; // Current fiber. Fiber* fiber; }; -Fiber* newFiber(MSVM* vm); - // A realloc wrapper which handles memory allocations of the VM. // - To allocate new memory pass NULL to parameter [memory] and 0 to // parameter [old_size] on failure it'll return NULL. diff --git a/test/main.c b/test/main.c index 38785cb..06c1bf3 100644 --- a/test/main.c +++ b/test/main.c @@ -9,7 +9,7 @@ #include "miniscript.h" void errorPrint(MSVM* vm, MSErrorType type, const char* file, int line, - const char* message) { + const char* message) { fprintf(stderr, "Error: %s\n\tat %s:%i\n", message, file, line); } @@ -68,7 +68,7 @@ int main(int argc, char** argv) { const char* source_path = argv[1]; MSConfiguration config; - MSInitConfiguration(&config); + msInitConfiguration(&config); config.error_fn = errorPrint; config.write_fn = writeFunction; config.load_script_fn = loadScript; @@ -76,6 +76,7 @@ int main(int argc, char** argv) { MSVM* vm = msNewVM(&config); MSInterpretResult result = msInterpret(vm, source_path); + msFreeVM(vm); return result; }