From 9dade8313ff0fe56ca5a6e34dd67ac773e860880 Mon Sep 17 00:00:00 2001 From: Thakee Nathees Date: Sat, 9 Apr 2022 00:23:12 +0530 Subject: [PATCH] closure, upvalue type were added. It's just the types that were added and only object creation and garbage collection of that types are implemented, the reset of the implementation is not part of this commit. --- README.md | 8 +++-- src/pk_core.c | 21 +++++++++++++ src/pk_value.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++-- src/pk_value.h | 81 +++++++++++++++++++++++++++++++++++++++++++++++++ src/pk_vm.c | 3 ++ 5 files changed, 190 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index e124f11..6e34f3e 100644 --- a/README.md +++ b/README.md @@ -107,8 +107,10 @@ If you weren't able to compile it, please report us by [opening an issue](https: - Mark W. Bailey, Nathan C. Weston (June 2001) Technical report. *Performance Benefits of Tail Recursion Removal in Procedural Languages* [online] Available at http://cs.hamilton.edu/~mbailey/pubs/techreps/TR-2001-2.pdf +- Roberto Ierusalimschy, Luiz Henrique de Figueiredo, Waldemar Celes *Closures in Lua* [pdf] Available at +https://www.cs.tufts.edu/~nr/cs257/archive/roberto-ierusalimschy/closures-draft.pdf (Accessed March 2022) + - Leonard schütz.(2020) *Dynamic Typing and NaN Boxing* [online] Available at https://leonardschuetz.ch/blog/nan-boxing/ (Accessed December 2020) -- Bob Nystrom.(2011) *Pratt Parsers: Expression Parsing Made Easy* [online] Avaliable at http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ (Accessed December 2020) - -- Carol E. (Wolf of Pace University), P. Oser. *The Shunting Yard Algorithm* [online] Available at http://mathcenter.oxford.emory.edu/site/cs171/shuntingYardAlgorithm/ (Accessed September 2020) +- Bob Nystrom.(2011) *Pratt Parsers: Expression Parsing Made Easy* [online] Avaliable at +http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ (Accessed December 2020) diff --git a/src/pk_core.c b/src/pk_core.c index f3421d6..cccd219 100644 --- a/src/pk_core.c +++ b/src/pk_core.c @@ -418,6 +418,8 @@ static inline bool validateCond(PKVM* vm, bool condition, const char* err) { VALIDATE_ARG_OBJ(List, OBJ_LIST, "list") VALIDATE_ARG_OBJ(Map, OBJ_MAP, "map") VALIDATE_ARG_OBJ(Function, OBJ_FUNC, "function") + VALIDATE_ARG_OBJ(Closure, OBJ_CLOSURE, "closure") + VALIDATE_ARG_OBJ(Upvalue, OBJ_UPVALUE, "upvalue") VALIDATE_ARG_OBJ(Fiber, OBJ_FIBER, "fiber") VALIDATE_ARG_OBJ(Class, OBJ_CLASS, "class") @@ -1391,6 +1393,8 @@ Var varAdd(PKVM* vm, Var v1, Var v2) { case OBJ_RANGE: case OBJ_SCRIPT: case OBJ_FUNC: + case OBJ_CLOSURE: + case OBJ_UPVALUE: case OBJ_FIBER: case OBJ_CLASS: case OBJ_INST: @@ -1602,6 +1606,8 @@ bool varContains(PKVM* vm, Var elem, Var container) { case OBJ_RANGE: case OBJ_SCRIPT: case OBJ_FUNC: + case OBJ_CLOSURE: + case OBJ_UPVALUE: case OBJ_FIBER: case OBJ_CLASS: case OBJ_INST: @@ -1753,6 +1759,11 @@ Var varGetAttrib(PKVM* vm, Var on, String* attrib) { UNREACHABLE(); } + case OBJ_CLOSURE: + case OBJ_UPVALUE: + TODO; + UNREACHABLE(); + case OBJ_FIBER: { Fiber* fb = (Fiber*)obj; @@ -1878,6 +1889,12 @@ do { \ ERR_NO_ATTRIB(vm, on, attrib); return; + case OBJ_CLOSURE: + case OBJ_UPVALUE: + TODO; + ERR_NO_ATTRIB(vm, on, attrib); + return; + case OBJ_FIBER: ERR_NO_ATTRIB(vm, on, attrib); return; @@ -1967,6 +1984,8 @@ Var varGetSubscript(PKVM* vm, Var on, Var key) { case OBJ_RANGE: case OBJ_SCRIPT: case OBJ_FUNC: + case OBJ_CLOSURE: + case OBJ_UPVALUE: case OBJ_FIBER: case OBJ_CLASS: case OBJ_INST: @@ -2017,6 +2036,8 @@ void varsetSubscript(PKVM* vm, Var on, Var key, Var value) { case OBJ_RANGE: case OBJ_SCRIPT: case OBJ_FUNC: + case OBJ_CLOSURE: + case OBJ_UPVALUE: case OBJ_FIBER: case OBJ_CLASS: case OBJ_INST: diff --git a/src/pk_value.c b/src/pk_value.c index c2a3b28..b457064 100644 --- a/src/pk_value.c +++ b/src/pk_value.c @@ -248,6 +248,32 @@ static void popMarkedObjectsInternal(Object* obj, PKVM* vm) { } } break; + case OBJ_CLOSURE: + { + Closure* closure = (Closure*)obj; + markObject(vm, &closure->fn->_super); + for (int i = 0; i < closure->fn->upvalue_count; i++) { + markObject(vm, &(closure->upvalues[i]->_super)); + } + + vm->bytes_allocated += sizeof(Closure); + vm->bytes_allocated += sizeof(Upvalue*) * closure->fn->upvalue_count; + + } break; + + case OBJ_UPVALUE: + { + Upvalue* upvalue = (Upvalue*)obj; + + // We don't have to mark upvalue->ptr since the [ptr] points to a local + // in the stack, however we need to mark upvalue->closed incase if it's + // closed. + markValue(vm, upvalue->closed); + + vm->bytes_allocated += sizeof(Upvalue); + + } break; + case OBJ_FIBER: { Fiber* fiber = (Fiber*)obj; @@ -421,7 +447,6 @@ Function* newFunction(PKVM* vm, const char* name, int length, Script* owner, ASSERT(is_native, OOPS); func->name = name; func->owner = NULL; - func->is_native = is_native; } else { pkFunctionBufferWrite(&owner->functions, vm, func); @@ -430,9 +455,11 @@ Function* newFunction(PKVM* vm, const char* name, int length, Script* owner, func->name = owner->names.data[name_index]->data; func->owner = owner; func->arity = -2; // -1 means variadic args. - func->is_native = is_native; } + func->is_native = is_native; + func->upvalue_count = 0; + if (is_native) { func->native = NULL; @@ -451,9 +478,35 @@ Function* newFunction(PKVM* vm, const char* name, int length, Script* owner, return func; } +Closure* newClosure(PKVM* vm, Function* fn) { + Closure* closure = ALLOCATE_DYNAMIC(vm, Closure, + fn->upvalue_count, Upvalue*); + varInitObject(&closure->_super, vm, OBJ_CLOSURE); + + closure->fn = fn; + + for (int i = 0; i < fn->upvalue_count; i++) { + closure->upvalues[i] = NULL; + } + + return closure; +} + +Upvalue* newUpvalue(PKVM* vm, Var* value) { + Upvalue* upvalue = ALLOCATE(vm, Upvalue); + varInitObject(&upvalue->_super, vm, OBJ_UPVALUE); + + upvalue->ptr = value; + upvalue->closed = VAR_NULL; + upvalue->next = NULL; +} + Fiber* newFiber(PKVM* vm, Function* fn) { Fiber* fiber = ALLOCATE(vm, Fiber); + + // Not sure why this memset is needed here. If it doesn't then remove it. memset(fiber, 0, sizeof(Fiber)); + varInitObject(&fiber->_super, vm, OBJ_FIBER); fiber->state = FIBER_NEW; @@ -1045,6 +1098,10 @@ void freeObject(PKVM* vm, Object* self) { } } break; + case OBJ_CLOSURE: + case OBJ_UPVALUE: + break; + case OBJ_FIBER: { Fiber* fiber = (Fiber*)self; DEALLOCATE(vm, fiber->stack); @@ -1302,7 +1359,11 @@ const char* getPkVarTypeName(PkVarType type) { case PK_MAP: return "Map"; case PK_RANGE: return "Range"; case PK_SCRIPT: return "Script"; + + // TODO: since functions are not first class citizens anymore, remove it + // and add closure (maybe with the same name PK_FUNCTION). case PK_FUNCTION: return "Function"; + case PK_FIBER: return "Fiber"; case PK_CLASS: return "Class"; case PK_INST: return "Inst"; @@ -1319,6 +1380,8 @@ const char* getObjectTypeName(ObjectType type) { case OBJ_RANGE: return "Range"; case OBJ_SCRIPT: return "Script"; case OBJ_FUNC: return "Func"; + case OBJ_CLOSURE: return "Closure"; + case OBJ_UPVALUE: return "Upvalue"; case OBJ_FIBER: return "Fiber"; case OBJ_CLASS: return "Class"; case OBJ_INST: return "Inst"; @@ -1594,6 +1657,15 @@ static void _toStringInternal(PKVM* vm, const Var v, pkByteBuffer* buff, return; } + case OBJ_CLOSURE: { + const Closure* closure = (const Closure*)obj; + pkByteBufferAddString(buff, vm, "[Closure:", 9); + pkByteBufferAddString(buff, vm, closure->fn->name, + (uint32_t)strlen(closure->fn->name)); + pkByteBufferWrite(buff, vm, ']'); + return; + } + case OBJ_FIBER: { const Fiber* fb = (const Fiber*)obj; pkByteBufferAddString(buff, vm, "[Fiber:", 7); @@ -1603,6 +1675,12 @@ static void _toStringInternal(PKVM* vm, const Var v, pkByteBuffer* buff, return; } + case OBJ_UPVALUE: { + const Upvalue* upvalue = (const Upvalue*)obj; + pkByteBufferAddString(buff, vm, "[Upvalue]", 9); + return; + } + case OBJ_CLASS: { const Class* ty = (const Class*)obj; pkByteBufferAddString(buff, vm, "[Class:", 7); diff --git a/src/pk_value.h b/src/pk_value.h index 42d05e3..6107d22 100644 --- a/src/pk_value.h +++ b/src/pk_value.h @@ -190,6 +190,8 @@ typedef struct Map Map; typedef struct Range Range; typedef struct Script Script; typedef struct Function Function; +typedef struct Closure Closure; +typedef struct Upvalue Upvalue; typedef struct Fiber Fiber; typedef struct Class Class; typedef struct Instance Instance; @@ -216,6 +218,8 @@ typedef enum { OBJ_RANGE, OBJ_SCRIPT, OBJ_FUNC, + OBJ_CLOSURE, + OBJ_UPVALUE, OBJ_FIBER, OBJ_CLASS, OBJ_INST, @@ -308,6 +312,10 @@ struct Function { Script* owner; //< Owner script of the function. int arity; //< Number of argument the function expects. + // Number of upvalues it uses, we're defining it here (and not in object Fn) + // is prevent checking is_native everytime (which might be a bit faster). + int upvalue_count; + // Docstring of the function, currently it's just the C string literal // pointer, refactor this into String* so that we can support public // native functions to provide a docstring. @@ -320,6 +328,73 @@ struct Function { }; }; +// Closure are the first class citizen callables which wraps around a function +// [fn] which will be invoked each time the closure is called. In contrary to +// functions, closures have lexical scoping support via Upvalues. Consider the +// following function 'foo' +// +// def foo() +// bar = "bar" +// return func() ##< We'll be using the name 'baz' to identify this. +// print(bar) +// end +// end +// +// The inner literal function 'baz' need variable named 'bar', It's only exists +// as long as the function 'foo' is active. Once the function 'foo' is returned +// all of it's local variables (including 'bar') will be popped and 'baz' will +// becom in-accessible. +// +// This is where closure and upvalues comes into picture. A closure will use +// upvalues to hold a reference of the variable ('bar') and when the variable +// ran out of it's scope / popped from stack, the upvalue will make it's own +// copy of that variable to make sure that a closure referenceing the variable +// via this upvalue has still access to the variable. +struct Closure { + Object _super; + + Function* fn; + Upvalue* upvalues[DYNAMIC_TAIL_ARRAY]; + +}; + +// In addition to locals (which lives on the stack), a closure has upvalues. +// When a closure is created, an array of upvalues will be created for every +// closures, They works as a bridge between a closure and it's non-local +// variable. When the variable is still on the stack, upvalue has a state of +// 'open' and will points to the variable on the stack. When the variable is +// popped from the stack, the upvalue will be changed to the state 'closed' +// it'll make a copy of that variable inside of it and the pointer will points +// to the copyied (closed) variable. +// +// | | .----------------v +// | | | .-------------------. +// | | '-| u1 closed | (qux) | +// | | '-------------------' +// | | <- stack top +// | baz | .------------------. +// | bar | <---| u2 open | null | +// | foo | '------------------' +// '-------' +// stack +// +struct Upvalue { + Object _super; + + // The pointer which points to the non-local variable, once the variable is + // out of scope the [ptr] will points to the below value [closed]. + Var* ptr; + + // The copyied value of the non-local. + Var closed; + + // To prevent multiple upvalues created for a single variable we keep track + // of all the open upvalues on a linked list. Once we need an upvalue and if + // it's already exists on the chain we re-use it, otherwise a new upvalue + // instance will be created (here [next] is the next upvalue on the chain). + Upvalue* next; +}; + typedef struct { const uint8_t* ip; //< Pointer to the next instruction byte code. const Function* fn; //< Function of the frame. @@ -449,6 +524,12 @@ Script* newScript(PKVM* vm, String* name, bool is_core); Function* newFunction(PKVM* vm, const char* name, int length, Script* owner, bool is_native, const char* docstring); +// Allocate a new closure object and return it. +Closure* newClosure(PKVM* vm, Function* fn); + +// Allocate a new upvalue object for the [value] and return it. +Upvalue* newUpvalue(PKVM* vm, Var* value); + // Allocate new Fiber object around the function [fn] and return Fiber*. Fiber* newFiber(PKVM* vm, Function* fn); diff --git a/src/pk_vm.c b/src/pk_vm.c index 7eb4590..87e2ba8 100644 --- a/src/pk_vm.c +++ b/src/pk_vm.c @@ -1005,6 +1005,7 @@ L_vm_main_loop: DISPATCH(); } + // TODO: move this to a function in pk_core.c. OPCODE(ITER): { Var* value = (vm->fiber->sp - 1); @@ -1082,6 +1083,8 @@ L_vm_main_loop: case OBJ_SCRIPT: case OBJ_FUNC: + case OBJ_CLOSURE: + case OBJ_UPVALUE: case OBJ_FIBER: case OBJ_CLASS: case OBJ_INST: