diff --git a/README.md b/README.md index e124f11..6e34f3e 100644 --- a/README.md +++ b/README.md @@ -107,8 +107,10 @@ If you weren't able to compile it, please report us by [opening an issue](https: - Mark W. Bailey, Nathan C. Weston (June 2001) Technical report. *Performance Benefits of Tail Recursion Removal in Procedural Languages* [online] Available at http://cs.hamilton.edu/~mbailey/pubs/techreps/TR-2001-2.pdf +- Roberto Ierusalimschy, Luiz Henrique de Figueiredo, Waldemar Celes *Closures in Lua* [pdf] Available at +https://www.cs.tufts.edu/~nr/cs257/archive/roberto-ierusalimschy/closures-draft.pdf (Accessed March 2022) + - Leonard schütz.(2020) *Dynamic Typing and NaN Boxing* [online] Available at https://leonardschuetz.ch/blog/nan-boxing/ (Accessed December 2020) -- Bob Nystrom.(2011) *Pratt Parsers: Expression Parsing Made Easy* [online] Avaliable at http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ (Accessed December 2020) - -- Carol E. (Wolf of Pace University), P. Oser. *The Shunting Yard Algorithm* [online] Available at http://mathcenter.oxford.emory.edu/site/cs171/shuntingYardAlgorithm/ (Accessed September 2020) +- Bob Nystrom.(2011) *Pratt Parsers: Expression Parsing Made Easy* [online] Avaliable at +http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ (Accessed December 2020) diff --git a/src/pk_core.c b/src/pk_core.c index f3421d6..cccd219 100644 --- a/src/pk_core.c +++ b/src/pk_core.c @@ -418,6 +418,8 @@ static inline bool validateCond(PKVM* vm, bool condition, const char* err) { VALIDATE_ARG_OBJ(List, OBJ_LIST, "list") VALIDATE_ARG_OBJ(Map, OBJ_MAP, "map") VALIDATE_ARG_OBJ(Function, OBJ_FUNC, "function") + VALIDATE_ARG_OBJ(Closure, OBJ_CLOSURE, "closure") + VALIDATE_ARG_OBJ(Upvalue, OBJ_UPVALUE, "upvalue") VALIDATE_ARG_OBJ(Fiber, OBJ_FIBER, "fiber") VALIDATE_ARG_OBJ(Class, OBJ_CLASS, "class") @@ -1391,6 +1393,8 @@ Var varAdd(PKVM* vm, Var v1, Var v2) { case OBJ_RANGE: case OBJ_SCRIPT: case OBJ_FUNC: + case OBJ_CLOSURE: + case OBJ_UPVALUE: case OBJ_FIBER: case OBJ_CLASS: case OBJ_INST: @@ -1602,6 +1606,8 @@ bool varContains(PKVM* vm, Var elem, Var container) { case OBJ_RANGE: case OBJ_SCRIPT: case OBJ_FUNC: + case OBJ_CLOSURE: + case OBJ_UPVALUE: case OBJ_FIBER: case OBJ_CLASS: case OBJ_INST: @@ -1753,6 +1759,11 @@ Var varGetAttrib(PKVM* vm, Var on, String* attrib) { UNREACHABLE(); } + case OBJ_CLOSURE: + case OBJ_UPVALUE: + TODO; + UNREACHABLE(); + case OBJ_FIBER: { Fiber* fb = (Fiber*)obj; @@ -1878,6 +1889,12 @@ do { \ ERR_NO_ATTRIB(vm, on, attrib); return; + case OBJ_CLOSURE: + case OBJ_UPVALUE: + TODO; + ERR_NO_ATTRIB(vm, on, attrib); + return; + case OBJ_FIBER: ERR_NO_ATTRIB(vm, on, attrib); return; @@ -1967,6 +1984,8 @@ Var varGetSubscript(PKVM* vm, Var on, Var key) { case OBJ_RANGE: case OBJ_SCRIPT: case OBJ_FUNC: + case OBJ_CLOSURE: + case OBJ_UPVALUE: case OBJ_FIBER: case OBJ_CLASS: case OBJ_INST: @@ -2017,6 +2036,8 @@ void varsetSubscript(PKVM* vm, Var on, Var key, Var value) { case OBJ_RANGE: case OBJ_SCRIPT: case OBJ_FUNC: + case OBJ_CLOSURE: + case OBJ_UPVALUE: case OBJ_FIBER: case OBJ_CLASS: case OBJ_INST: diff --git a/src/pk_value.c b/src/pk_value.c index c2a3b28..b457064 100644 --- a/src/pk_value.c +++ b/src/pk_value.c @@ -248,6 +248,32 @@ static void popMarkedObjectsInternal(Object* obj, PKVM* vm) { } } break; + case OBJ_CLOSURE: + { + Closure* closure = (Closure*)obj; + markObject(vm, &closure->fn->_super); + for (int i = 0; i < closure->fn->upvalue_count; i++) { + markObject(vm, &(closure->upvalues[i]->_super)); + } + + vm->bytes_allocated += sizeof(Closure); + vm->bytes_allocated += sizeof(Upvalue*) * closure->fn->upvalue_count; + + } break; + + case OBJ_UPVALUE: + { + Upvalue* upvalue = (Upvalue*)obj; + + // We don't have to mark upvalue->ptr since the [ptr] points to a local + // in the stack, however we need to mark upvalue->closed incase if it's + // closed. + markValue(vm, upvalue->closed); + + vm->bytes_allocated += sizeof(Upvalue); + + } break; + case OBJ_FIBER: { Fiber* fiber = (Fiber*)obj; @@ -421,7 +447,6 @@ Function* newFunction(PKVM* vm, const char* name, int length, Script* owner, ASSERT(is_native, OOPS); func->name = name; func->owner = NULL; - func->is_native = is_native; } else { pkFunctionBufferWrite(&owner->functions, vm, func); @@ -430,9 +455,11 @@ Function* newFunction(PKVM* vm, const char* name, int length, Script* owner, func->name = owner->names.data[name_index]->data; func->owner = owner; func->arity = -2; // -1 means variadic args. - func->is_native = is_native; } + func->is_native = is_native; + func->upvalue_count = 0; + if (is_native) { func->native = NULL; @@ -451,9 +478,35 @@ Function* newFunction(PKVM* vm, const char* name, int length, Script* owner, return func; } +Closure* newClosure(PKVM* vm, Function* fn) { + Closure* closure = ALLOCATE_DYNAMIC(vm, Closure, + fn->upvalue_count, Upvalue*); + varInitObject(&closure->_super, vm, OBJ_CLOSURE); + + closure->fn = fn; + + for (int i = 0; i < fn->upvalue_count; i++) { + closure->upvalues[i] = NULL; + } + + return closure; +} + +Upvalue* newUpvalue(PKVM* vm, Var* value) { + Upvalue* upvalue = ALLOCATE(vm, Upvalue); + varInitObject(&upvalue->_super, vm, OBJ_UPVALUE); + + upvalue->ptr = value; + upvalue->closed = VAR_NULL; + upvalue->next = NULL; +} + Fiber* newFiber(PKVM* vm, Function* fn) { Fiber* fiber = ALLOCATE(vm, Fiber); + + // Not sure why this memset is needed here. If it doesn't then remove it. memset(fiber, 0, sizeof(Fiber)); + varInitObject(&fiber->_super, vm, OBJ_FIBER); fiber->state = FIBER_NEW; @@ -1045,6 +1098,10 @@ void freeObject(PKVM* vm, Object* self) { } } break; + case OBJ_CLOSURE: + case OBJ_UPVALUE: + break; + case OBJ_FIBER: { Fiber* fiber = (Fiber*)self; DEALLOCATE(vm, fiber->stack); @@ -1302,7 +1359,11 @@ const char* getPkVarTypeName(PkVarType type) { case PK_MAP: return "Map"; case PK_RANGE: return "Range"; case PK_SCRIPT: return "Script"; + + // TODO: since functions are not first class citizens anymore, remove it + // and add closure (maybe with the same name PK_FUNCTION). case PK_FUNCTION: return "Function"; + case PK_FIBER: return "Fiber"; case PK_CLASS: return "Class"; case PK_INST: return "Inst"; @@ -1319,6 +1380,8 @@ const char* getObjectTypeName(ObjectType type) { case OBJ_RANGE: return "Range"; case OBJ_SCRIPT: return "Script"; case OBJ_FUNC: return "Func"; + case OBJ_CLOSURE: return "Closure"; + case OBJ_UPVALUE: return "Upvalue"; case OBJ_FIBER: return "Fiber"; case OBJ_CLASS: return "Class"; case OBJ_INST: return "Inst"; @@ -1594,6 +1657,15 @@ static void _toStringInternal(PKVM* vm, const Var v, pkByteBuffer* buff, return; } + case OBJ_CLOSURE: { + const Closure* closure = (const Closure*)obj; + pkByteBufferAddString(buff, vm, "[Closure:", 9); + pkByteBufferAddString(buff, vm, closure->fn->name, + (uint32_t)strlen(closure->fn->name)); + pkByteBufferWrite(buff, vm, ']'); + return; + } + case OBJ_FIBER: { const Fiber* fb = (const Fiber*)obj; pkByteBufferAddString(buff, vm, "[Fiber:", 7); @@ -1603,6 +1675,12 @@ static void _toStringInternal(PKVM* vm, const Var v, pkByteBuffer* buff, return; } + case OBJ_UPVALUE: { + const Upvalue* upvalue = (const Upvalue*)obj; + pkByteBufferAddString(buff, vm, "[Upvalue]", 9); + return; + } + case OBJ_CLASS: { const Class* ty = (const Class*)obj; pkByteBufferAddString(buff, vm, "[Class:", 7); diff --git a/src/pk_value.h b/src/pk_value.h index 42d05e3..6107d22 100644 --- a/src/pk_value.h +++ b/src/pk_value.h @@ -190,6 +190,8 @@ typedef struct Map Map; typedef struct Range Range; typedef struct Script Script; typedef struct Function Function; +typedef struct Closure Closure; +typedef struct Upvalue Upvalue; typedef struct Fiber Fiber; typedef struct Class Class; typedef struct Instance Instance; @@ -216,6 +218,8 @@ typedef enum { OBJ_RANGE, OBJ_SCRIPT, OBJ_FUNC, + OBJ_CLOSURE, + OBJ_UPVALUE, OBJ_FIBER, OBJ_CLASS, OBJ_INST, @@ -308,6 +312,10 @@ struct Function { Script* owner; //< Owner script of the function. int arity; //< Number of argument the function expects. + // Number of upvalues it uses, we're defining it here (and not in object Fn) + // is prevent checking is_native everytime (which might be a bit faster). + int upvalue_count; + // Docstring of the function, currently it's just the C string literal // pointer, refactor this into String* so that we can support public // native functions to provide a docstring. @@ -320,6 +328,73 @@ struct Function { }; }; +// Closure are the first class citizen callables which wraps around a function +// [fn] which will be invoked each time the closure is called. In contrary to +// functions, closures have lexical scoping support via Upvalues. Consider the +// following function 'foo' +// +// def foo() +// bar = "bar" +// return func() ##< We'll be using the name 'baz' to identify this. +// print(bar) +// end +// end +// +// The inner literal function 'baz' need variable named 'bar', It's only exists +// as long as the function 'foo' is active. Once the function 'foo' is returned +// all of it's local variables (including 'bar') will be popped and 'baz' will +// becom in-accessible. +// +// This is where closure and upvalues comes into picture. A closure will use +// upvalues to hold a reference of the variable ('bar') and when the variable +// ran out of it's scope / popped from stack, the upvalue will make it's own +// copy of that variable to make sure that a closure referenceing the variable +// via this upvalue has still access to the variable. +struct Closure { + Object _super; + + Function* fn; + Upvalue* upvalues[DYNAMIC_TAIL_ARRAY]; + +}; + +// In addition to locals (which lives on the stack), a closure has upvalues. +// When a closure is created, an array of upvalues will be created for every +// closures, They works as a bridge between a closure and it's non-local +// variable. When the variable is still on the stack, upvalue has a state of +// 'open' and will points to the variable on the stack. When the variable is +// popped from the stack, the upvalue will be changed to the state 'closed' +// it'll make a copy of that variable inside of it and the pointer will points +// to the copyied (closed) variable. +// +// | | .----------------v +// | | | .-------------------. +// | | '-| u1 closed | (qux) | +// | | '-------------------' +// | | <- stack top +// | baz | .------------------. +// | bar | <---| u2 open | null | +// | foo | '------------------' +// '-------' +// stack +// +struct Upvalue { + Object _super; + + // The pointer which points to the non-local variable, once the variable is + // out of scope the [ptr] will points to the below value [closed]. + Var* ptr; + + // The copyied value of the non-local. + Var closed; + + // To prevent multiple upvalues created for a single variable we keep track + // of all the open upvalues on a linked list. Once we need an upvalue and if + // it's already exists on the chain we re-use it, otherwise a new upvalue + // instance will be created (here [next] is the next upvalue on the chain). + Upvalue* next; +}; + typedef struct { const uint8_t* ip; //< Pointer to the next instruction byte code. const Function* fn; //< Function of the frame. @@ -449,6 +524,12 @@ Script* newScript(PKVM* vm, String* name, bool is_core); Function* newFunction(PKVM* vm, const char* name, int length, Script* owner, bool is_native, const char* docstring); +// Allocate a new closure object and return it. +Closure* newClosure(PKVM* vm, Function* fn); + +// Allocate a new upvalue object for the [value] and return it. +Upvalue* newUpvalue(PKVM* vm, Var* value); + // Allocate new Fiber object around the function [fn] and return Fiber*. Fiber* newFiber(PKVM* vm, Function* fn); diff --git a/src/pk_vm.c b/src/pk_vm.c index 7eb4590..87e2ba8 100644 --- a/src/pk_vm.c +++ b/src/pk_vm.c @@ -1005,6 +1005,7 @@ L_vm_main_loop: DISPATCH(); } + // TODO: move this to a function in pk_core.c. OPCODE(ITER): { Var* value = (vm->fiber->sp - 1); @@ -1082,6 +1083,8 @@ L_vm_main_loop: case OBJ_SCRIPT: case OBJ_FUNC: + case OBJ_CLOSURE: + case OBJ_UPVALUE: case OBJ_FIBER: case OBJ_CLASS: case OBJ_INST: