early root marking implemented

This commit is contained in:
Thakee Nathees 2021-04-26 15:04:30 +05:30
parent 34716b4f6b
commit bd4642be70
10 changed files with 328 additions and 132 deletions

View File

@ -113,7 +113,7 @@ typedef struct {
// Initialize the configuration and set ALL of it's values to the defaults.
// Call this before setting any particular field of it.
void MSInitConfiguration(MSConfiguration* config);
void msInitConfiguration(MSConfiguration* config);
typedef enum {
RESULT_SUCCESS = 0,
@ -124,6 +124,9 @@ typedef enum {
// Allocate initialize and returns a new VM
MSVM* msNewVM(MSConfiguration* config);
// Clean the VM and dispose all the resources allocated by the VM.
void msFreeVM(MSVM* vm);
// Compile and execut file at given path.
MSInterpretResult msInterpret(MSVM* vm, const char* file);

View File

@ -173,7 +173,7 @@ static _Keyword _keywords[] = {
{ "continue", 8, TK_CONTINUE },
{ "return", 6, TK_RETURN },
{ NULL, (TokenType)(0) }, // Sentinal to mark the end of the array
{ NULL, 0, (TokenType)(0) }, // Sentinal to mark the end of the array
};
typedef struct {
@ -195,7 +195,6 @@ typedef struct {
// Precedence parsing references:
// https://en.wikipedia.org/wiki/Shunting-yard_algorithm
// TODO: I should explicitly state wren-lang as a reference "globaly".
typedef enum {
PREC_NONE,
@ -299,9 +298,7 @@ struct Compiler {
int var_count; //< Number of locals in [variables].
int global_count; //< Number of globals in [variables].
int stack_size; //< Current size including locals ind temps.
// TODO: compiler should mark Script* below not to be garbage collected.
int stack_size; //< Current size including locals ind temps.=
Script* script; //< Current script.
Loop* loop; //< Current loop.
@ -1035,7 +1032,8 @@ static void exprName(Compiler* compiler, bool can_assign) {
if (can_assign && matchAssignment(parser)) {
TokenType assignment = parser->previous.type;
if (assignment != TK_EQ) {
emitPushVariable(compiler, result.index, result.type == NAME_GLOBAL_VAR);
emitPushVariable(compiler, result.index,
result.type == NAME_GLOBAL_VAR);
compileExpression(compiler);
switch (assignment) {
@ -1800,3 +1798,17 @@ Script* compileSource(MSVM* vm, const char* path) {
if (compiler.parser.has_errors) return NULL;
return script;
}
///////////////////////////////////////////////////////////////////////////////
void compilerMarkObjects(Compiler* compiler, MSVM* vm) {
// Mark the script which is currently being compiled.
markObject(&compiler->script->_super, vm);
// Mark the string literals (they haven't added to the script's literal
// buffer yet).
markValue(compiler->parser.current.value, vm);
markValue(compiler->parser.previous.value, vm);
markValue(compiler->parser.next.value, vm);
}

View File

@ -13,4 +13,6 @@ typedef struct Compiler Compiler;
Script* compileSource(MSVM* vm, const char* path);
void compilerMarkObjects(Compiler* compiler, MSVM* vm);
#endif // COMPILER_H

View File

@ -14,7 +14,7 @@
typedef struct {
const char* name; //< Name of the function.
int length; //< Length of the name.
Function fn; //< Native function pointer.
Function* fn; //< Native function pointer.
} _BuiltinFn;
// Count of builtin function +1 for termination.
@ -24,26 +24,23 @@ typedef struct {
#define _AS_INTEGER(var) (int32_t)trunc(AS_NUM(var))
// Array of all builtin functions.
_BuiltinFn builtins[BUILTIN_COUNT];
static _BuiltinFn builtins[BUILTIN_COUNT];
static int builtins_count = 0;
static void initializeBuiltinFN(MSVM* vm, _BuiltinFn* bfn, const char* name,
int length, int arity, MiniScriptNativeFn ptr) {
bfn->name = name;
bfn->length = length;
varInitObject(&bfn->fn._super, vm, OBJ_FUNC);
bfn->fn.name = name;
bfn->fn.arity = arity;
bfn->fn.owner = NULL;
bfn->fn.is_native = true;
bfn->fn.native = ptr;
bfn->fn = newFunction(vm, name, length, NULL, true);
bfn->fn->arity = arity;
bfn->fn->native = ptr;
}
int findBuiltinFunction(const char* name, int length) {
for (int i = 0; i < BUILTIN_COUNT; i++) {
if (builtins[i].name == NULL) return -1;
if (length == builtins[i].length && strncmp(name, builtins[i].name, length) == 0) {
for (int i = 0; i < builtins_count; i++) {
if (length == builtins[i].length &&
strncmp(name, builtins[i].name, length) == 0) {
return i;
}
}
@ -118,12 +115,12 @@ static inline bool validateIndex(MSVM* vm, int32_t index, int32_t size,
} while (false)
Function* getBuiltinFunction(int index) {
ASSERT(index < BUILTIN_COUNT, "Index out of bound.");
return &builtins[index].fn;
ASSERT_INDEX(index, builtins_count);
return builtins[index].fn;
}
const char* getBuiltinFunctionName(int index) {
ASSERT(index < BUILTIN_COUNT, "Index out of bound.");
ASSERT_INDEX(index, builtins_count);
return builtins[index].name;
}
@ -219,16 +216,17 @@ void stdOsClock(MSVM* vm) {
/*****************************************************************************/
void initializeCore(MSVM* vm) {
int i = 0; //< Iterate through builtins.
ASSERT(builtins_count == 0, "Initialize core only once.");
#define INITALIZE_BUILTIN_FN(name, fn, argc) \
initializeBuiltinFN(vm, &builtins[i++], name, (int)strlen(name), argc, fn);
#define INITALIZE_BUILTIN_FN(name, fn, argc) \
initializeBuiltinFN(vm, &builtins[builtins_count++], name, \
(int)strlen(name), argc, fn);
// Initialize builtin functions.
INITALIZE_BUILTIN_FN("is_null", coreIsNull, 1);
INITALIZE_BUILTIN_FN("is_bool", coreIsBool, 1);
INITALIZE_BUILTIN_FN("is_num", coreIsNum, 1);
INITALIZE_BUILTIN_FN("is_string", coreIsString, 1);
INITALIZE_BUILTIN_FN("is_list", coreIsList, 1);
INITALIZE_BUILTIN_FN("is_map", coreIsMap, 1);
@ -236,13 +234,13 @@ void initializeCore(MSVM* vm) {
INITALIZE_BUILTIN_FN("is_function", coreIsFunction, 1);
INITALIZE_BUILTIN_FN("is_script", coreIsScript, 1);
INITALIZE_BUILTIN_FN("is_userobj", coreIsUserObj, 1);
INITALIZE_BUILTIN_FN("to_string", coreToString, 1);
INITALIZE_BUILTIN_FN("print", corePrint, -1);
INITALIZE_BUILTIN_FN("import", coreImport, 1);
// Sentinal to mark the end of the array.
initializeBuiltinFN(vm, &builtins[i], NULL, 0, 0, NULL);
//initializeBuiltinFN(vm, &builtins[i], NULL, 0, 0, NULL);
// Make STD scripts.
Script* std; // A temporary pointer to the current std script.
@ -273,6 +271,12 @@ void initializeCore(MSVM* vm) {
STD_ADD_FUNCTION("clock", stdOsClock, 0);
}
void markCoreObjects(MSVM* vm) {
for (int i = 0; i < builtins_count; i++) {
markObject(&builtins[i].fn->_super, vm);
}
}
/*****************************************************************************/
/* OPERATORS */
/*****************************************************************************/
@ -287,7 +291,32 @@ Var varAdd(MSVM* vm, Var v1, Var v2) {
return VAR_NULL;
}
TODO; //string addition/ array addition etc.
if (IS_OBJ(v1) && IS_OBJ(v2)) {
Object *o1 = AS_OBJ(v1), *o2 = AS_OBJ(v2);
switch (o1->type) {
case OBJ_STRING:
{
if (o2->type == OBJ_STRING) {
TODO; // Implement String.format('@@', s1, s2);
}
} break;
case OBJ_LIST:
case OBJ_MAP:
case OBJ_RANGE:
case OBJ_SCRIPT:
case OBJ_FUNC:
case OBJ_FIBER:
case OBJ_USER:
TODO;
}
}
msSetRuntimeError(vm, "Unsupported operand types for operator '-' "
"%s and %s", varTypeName(v1), varTypeName(v2));
return VAR_NULL;
}
@ -300,6 +329,8 @@ Var varSubtract(MSVM* vm, Var v1, Var v2) {
return VAR_NULL;
}
TODO; // for user objects call vm.config.sub_userobj_sub(handles).
msSetRuntimeError(vm, "Unsupported operand types for operator '-' "
"%s and %s", varTypeName(v1), varTypeName(v2));

View File

@ -19,6 +19,9 @@
// 2. Destroy scripts buffer only when the last VM die.
void initializeCore(MSVM* vm);
// Mark the heap allocated core object at the mark phase.
void markCoreObjects(MSVM* vm);
// Find the builtin function name and returns it's index in the builtins array
// if not found returns -1.
int findBuiltinFunction(const char* name, int length);

118
src/var.c
View File

@ -40,9 +40,31 @@ const char* msAsString(MSVM* vm, Var value) {
void varInitObject(Object* self, MSVM* vm, ObjectType type) {
self->type = type;
self->is_marked = false;
self->next = vm->first;
vm->first = self;
// TODO: set isGray = false;
}
void markObject(Object* self, MSVM* vm) {
if (self == NULL || self->is_marked) return;
self->is_marked = true;
// Add the object to the VM's gray_list so that we can recursively mark
// it's referenced objects later.
if (vm->marked_list_count >= vm->marked_list_capacity) {
vm->marked_list_capacity *= 2;
vm->marked_list = (Object**)vm->config.realloc_fn(
vm->marked_list,
vm->marked_list_capacity * sizeof(Object*),
vm->config.user_data);
}
vm->marked_list[vm->marked_list_count++] = self;
}
void markValue(Var self, MSVM* vm) {
if (!IS_OBJ(self)) return;
markObject(AS_OBJ(self), vm);
}
#if VAR_NAN_TAGGING
@ -134,17 +156,26 @@ Function* newFunction(MSVM* vm, const char* name, int length, Script* owner,
Function* func = ALLOCATE(vm, Function);
varInitObject(&func->_super, vm, OBJ_FUNC);
// Add the name in the script's function buffer.
String* name_ptr;
vmPushTempRef(vm, &func->_super);
functionBufferWrite(&owner->functions, vm, func);
nameTableAdd(&owner->function_names, vm, name, length, &name_ptr);
vmPopTempRef(vm);
if (owner == NULL) {
ASSERT(is_native, OOPS);
func->name = name;
func->owner = NULL;
func->is_native = is_native;
} else {
// Add the name in the script's function buffer.
String* name_ptr;
vmPushTempRef(vm, &func->_super);
functionBufferWrite(&owner->functions, vm, func);
nameTableAdd(&owner->function_names, vm, name, length, &name_ptr);
vmPopTempRef(vm);
func->name = name_ptr->data;
func->owner = owner;
func->arity = -2; // -1 means variadic args.
func->is_native = is_native;
}
func->name = name_ptr->data;
func->owner = owner;
func->arity = -2; // -1 means variadic args.
func->is_native = is_native;
if (is_native) {
func->native = NULL;
@ -159,6 +190,71 @@ Function* newFunction(MSVM* vm, const char* name, int length, Script* owner,
return func;
}
Fiber* newFiber(MSVM* vm) {
Fiber* fiber = ALLOCATE(vm, Fiber);
memset(fiber, 0, sizeof(Fiber));
varInitObject(&fiber->_super, vm, OBJ_FIBER);
return fiber;
}
void freeObject(MSVM* vm, Object* obj) {
// TODO: Debug trace memory here.
// First clean the object's referencs, but we're not recursively doallocating
// them because they're not marked and will be cleaned later.
// Example: List's `elements` is VarBuffer that contain a heap allocated
// array of `var*` which will be cleaned below but the actual `var` elements
// will won't be freed here instead they havent marked at all, and will be
// removed at the sweeping phase of the garbage collection.
switch (obj->type) {
case OBJ_STRING:
break;
case OBJ_LIST:
varBufferClear(&(((List*)obj)->elements), vm);
break;
case OBJ_MAP:
TODO;
break;
case OBJ_RANGE:
break;
case OBJ_SCRIPT: {
Script* scr = (Script*)obj;
varBufferClear(&scr->globals, vm);
nameTableClear(&scr->global_names, vm);
varBufferClear(&scr->literals, vm);
functionBufferClear(&scr->functions, vm);
nameTableClear(&scr->function_names, vm);
stringBufferClear(&scr->names, vm);
} break;
case OBJ_FUNC:
{
Function* func = (Function*)obj;
if (!func->is_native) {
byteBufferClear(&func->fn->opcodes, vm);
intBufferClear(&func->fn->oplines, vm);
}
} break;
case OBJ_FIBER:
{
Fiber* fiber = (Fiber*)obj;
DEALLOCATE(vm, fiber->stack);
DEALLOCATE(vm, fiber->frames);
} break;
case OBJ_USER:
break;
}
DEALLOCATE(vm, obj);
}
// Utility functions //////////////////////////////////////////////////////////
const char* varTypeName(Var v) {

View File

@ -136,7 +136,7 @@
// Check types.
#define IS_CONST(value) ((value & _MASK_CONST) == _MASK_CONST)
#define IS_NULL(value) ((value) == VAR_NULL)
#define IS_UNDEF(value) ((value) == VAR_UNDEF)
#define IS_UNDEF(value) ((value) == VAR_UNDEFINED)
#define IS_FALSE(value) ((value) == VAR_FALSE)
#define IS_TRUE(value) ((value) == VAR_TRUE)
#define IS_BOOL(value) (IS_TRUE(value) || IS_FALSE(value))
@ -210,6 +210,7 @@ typedef enum /* ObjectType */ {
// Base struct for all heap allocated objects.
struct Object {
ObjectType type; //< Type of the object in \ref var_Object_Type.
bool is_marked; //< Marked when garbage collection's marking phase.
//Class* is; //< The class the object IS. // No OOP in MS.
Object* next; //< Next object in the heap allocated link list.
@ -280,10 +281,59 @@ struct Function {
};
};
typedef struct {
uint8_t* ip; //< Pointer to the next instruction byte code.
Function* fn; //< Function of the frame.
Var* rbp; //< Stack base pointer. (%rbp)
} CallFrame;
struct Fiber {
Object _super;
// The root function of the fiber. (For script it'll be the script's implicit
// body function).
Function* func;
// The stack of the execution holding locals and temps. A heap will be
// allocated and grow as needed.
Var* stack;
// The stack pointer (%rsp) pointing to the stack top.
Var* sp;
// The stack base pointer of the current frame. It'll be updated before
// calling a native function. (`fiber->ret` === `curr_call_frame->rbp`).
Var* ret;
// Size of the allocated stack.
int stack_size;
// Heap allocated array of call frames will grow as needed.
CallFrame* frames;
// Capacity of the frames array.
int frame_capacity;
// Number of frame entry in frames.
int frame_count;
// Runtime error initially NULL, heap allocated.
String* error;
};
// Methods ////////////////////////////////////////////////////////////////////
// Initialize the object with it's default value.
void varInitObject(Object* self, MSVM* vm, ObjectType type);
// Mark the reachable objects at the mark-and-sweep phase of the garbage
// collection.
void markObject(Object* self, MSVM* vm);
// Mark the reachable values at the mark-and-sweep phase of the garbage
// collection.
void markValue(Var self, MSVM* vm);
// Instead use VAR_NUM(value) and AS_NUM(value)
Var doubleToVar(double value);
double varToDouble(Var value);
@ -301,10 +351,18 @@ Range* newRange(MSVM* vm, double from, double to);
Script* newScript(MSVM* vm);
// Allocate new Function object and return Function*. Parameter [name] should
// be the name in the Script's nametable.
// be the name in the Script's nametable. If the [owner] is NULL the function
// would be builtin function. For builtin function arity and the native
// function pointer would be initialized after calling this function.
Function* newFunction(MSVM* vm, const char* name, int length, Script* owner,
bool is_native);
// Allocate new Fiber object and return Fiber*.
Fiber* newFiber(MSVM* vm);
// Release all the object owned by the [obj] including itself.
void freeObject(MSVM* vm, Object* obj);
// Utility functions //////////////////////////////////////////////////////////
// Returns the type name of the var [v].

105
src/vm.c
View File

@ -25,13 +25,6 @@ static void* defaultRealloc(void* memory, size_t new_size, void* user_data) {
return realloc(memory, new_size);
}
Fiber* newFiber(MSVM* vm) {
Fiber* fiber = ALLOCATE(vm, Fiber);
memset(fiber, 0, sizeof(Fiber));
varInitObject(&fiber->_super, vm, OBJ_FIBER);
return fiber;
}
void* vmRealloc(MSVM* self, void* memory, size_t old_size, size_t new_size) {
// TODO: Debug trace allocations here.
@ -44,7 +37,7 @@ void* vmRealloc(MSVM* self, void* memory, size_t old_size, size_t new_size) {
if (new_size > 0 && self->bytes_allocated > self->next_gc) {
vmCollectGarbage(self);
}
Function* f = (Function*)memory;
if (new_size == 0) {
free(memory);
return NULL;
@ -53,14 +46,47 @@ void* vmRealloc(MSVM* self, void* memory, size_t old_size, size_t new_size) {
return self->config.realloc_fn(memory, new_size, self->config.user_data);
}
void msInitConfiguration(MSConfiguration* config) {
config->realloc_fn = defaultRealloc;
// TODO: Handle Null functions before calling them.
config->error_fn = NULL;
config->write_fn = NULL;
config->load_script_fn = NULL;
config->load_script_done_fn = NULL;
config->user_data = NULL;
}
MSVM* msNewVM(MSConfiguration* config) {
MSVM* vm = (MSVM*)malloc(sizeof(MSVM));
vmInit(vm, config);
return vm;
}
void msFreeVM(MSVM* self) {
// TODO: Check if vm already freed.
Object* obj = self->first;
while (obj != NULL) {
Object* next = obj->next;
freeObject(self, obj);
obj = next;
}
self->marked_list = (Object**)self->config.realloc_fn(
self->marked_list, 0, self->config.user_data);
self->config.realloc_fn(self, 0, self->config.user_data);
}
void vmInit(MSVM* self, MSConfiguration* config) {
memset(self, 0, sizeof(MSVM));
self->config = *config;
self->gray_list_count = 0;
self->gray_list_capacity = 8; // TODO: refactor the magic '8' here.
self->gray_list = (Object**)self->config.realloc_fn(
NULL, sizeof(Object*) * self->gray_list_capacity, NULL);
self->marked_list_count = 0;
self->marked_list_capacity = 8; // TODO: refactor the magic '8' here.
self->marked_list = (Object**)self->config.realloc_fn(
NULL, sizeof(Object*) * self->marked_list_capacity, NULL);
self->next_gc = 1024 * 1024 * 10; // TODO:
// TODO: no need to initialize if already done by another vm.
@ -85,6 +111,33 @@ void vmCollectGarbage(MSVM* self) {
// required to know the size of each object that'll be freeing.
self->bytes_allocated = 0;
// Mark core objects (mostlikely builtin functions).
markCoreObjects(self);
// Mark all the 'std' scripts.
for (int i = 0; i < self->std_count; i++) {
markObject(&(self->std_scripts[i]->_super), self);
}
// Mark temp references.
for (int i = 0; i < self->temp_reference_count; i++) {
markObject(self->temp_reference[i], self);
}
// Garbage collection triggered at the middle of a compilation.
if (self->compiler != NULL) {
compilerMarkObjects(self->compiler, self);
}
// Garbage collection triggered at the middle of runtime.
if (self->script != NULL) {
markObject(&self->script->_super, self);
}
if (self->fiber != NULL) {
markObject(&self->fiber->_super, self);
}
TODO;
}
@ -154,31 +207,11 @@ void vmReportError(MSVM* vm) {
ASSERT(false, "TODO: create debug.h");
}
void MSInitConfiguration(MSConfiguration* config) {
config->realloc_fn = defaultRealloc;
// TODO: Handle Null functions before calling them.
config->error_fn = NULL;
config->write_fn = NULL;
config->load_script_fn = NULL;
config->load_script_done_fn = NULL;
config->user_data = NULL;
}
MSVM* msNewVM(MSConfiguration* config) {
MSVM* vm = (MSVM*)malloc(sizeof(MSVM));
vmInit(vm, config);
return vm;
}
MSInterpretResult msInterpret(MSVM* vm, const char* file) {
Script* script = compileSource(vm, file);
if (script == NULL) return RESULT_COMPILE_ERROR;
// TODO: The below assertion should be an error report.
ASSERT(vm->script_count + 1 < MAX_SCRIPT_CACHE, "Scripts cache out of bound.");
vm->scripts[vm->script_count++] = script;
vm->script = script;
return vmRunScript(vm, script);
}
@ -469,13 +502,15 @@ MSInterpretResult vmRunScript(MSVM* vm, Script* _script) {
DISPATCH();
}
OPCODE(JUMP): {
OPCODE(JUMP):
{
int offset = READ_SHORT();
ip += offset;
DISPATCH();
}
OPCODE(LOOP): {
OPCODE(LOOP):
{
int offset = READ_SHORT();
ip -= offset;
DISPATCH();

View File

@ -23,46 +23,6 @@ typedef enum {
#undef OPCODE
} Opcode;
typedef struct {
uint8_t* ip; //< Pointer to the next instruction byte code.
Function* fn; //< Function of the frame.
Var* rbp; //< Stack base pointer. (%rbp)
} CallFrame;
struct Fiber {
Object _super;
// The root function of the fiber. (For script it'll be the script's implicit
// body function).
Function* func;
// The stack of the execution holding locals and temps. A heap will be
// allocated and grow as needed.
Var* stack;
// The stack pointer (%rsp) pointing to the stack top.
Var* sp;
// The stack base pointer of the current frame. It'll be updated before
// calling a native function. (`fiber->ret` === `curr_call_frame->rbp`).
Var* ret;
// Size of the allocated stack.
int stack_size;
// Heap allocated array of call frames will grow as needed.
CallFrame* frames;
// Capacity of the frames array.
int frame_capacity;
// Number of frame entry in frames.
int frame_count;
// Runtime error initially NULL, heap allocated.
String* error;
};
struct MSVM {
// The first object in the link list of all heap allocated objects.
@ -76,9 +36,9 @@ struct MSVM {
// In the tri coloring scheme gray is the working list. We recursively pop
// from the list color it balck and add it's referenced objects to gray_list.
Object** gray_list;
int gray_list_count;
int gray_list_capacity;
Object** marked_list;
int marked_list_count;
int marked_list_capacity;
// A stack of temporary object references to ensure that the object
// doesn't garbage collected.
@ -88,29 +48,24 @@ struct MSVM {
// VM's configurations.
MSConfiguration config;
// Current compiler reference to mark it's heap allocated objects.
// Current compiler reference to mark it's heap allocated objects. Note that
// The compiler isn't heap allocated.
Compiler* compiler;
// Std scripts array. (TODO: assert "std" scripts doesn't have global vars).
Script* std_scripts[MAX_SCRIPT_CACHE];
// Std scripts count.
int std_count;
// Execution variables ////////////////////////////////////////////////////
// Compiled script cache.
Script* scripts[MAX_SCRIPT_CACHE];
// Number of script cache.
int script_count;
// The root script of the runtime and it's one of the VM's reference root.
// VM is responsible to manage the memory (TODO: implement handlers).
Script* script;
// Current fiber.
Fiber* fiber;
};
Fiber* newFiber(MSVM* vm);
// A realloc wrapper which handles memory allocations of the VM.
// - To allocate new memory pass NULL to parameter [memory] and 0 to
// parameter [old_size] on failure it'll return NULL.

View File

@ -9,7 +9,7 @@
#include "miniscript.h"
void errorPrint(MSVM* vm, MSErrorType type, const char* file, int line,
const char* message) {
const char* message) {
fprintf(stderr, "Error: %s\n\tat %s:%i\n", message, file, line);
}
@ -68,7 +68,7 @@ int main(int argc, char** argv) {
const char* source_path = argv[1];
MSConfiguration config;
MSInitConfiguration(&config);
msInitConfiguration(&config);
config.error_fn = errorPrint;
config.write_fn = writeFunction;
config.load_script_fn = loadScript;
@ -76,6 +76,7 @@ int main(int argc, char** argv) {
MSVM* vm = msNewVM(&config);
MSInterpretResult result = msInterpret(vm, source_path);
msFreeVM(vm);
return result;
}