pocketlang/src/var.h

324 lines
10 KiB
C
Raw Normal View History

2021-02-07 15:40:00 +08:00
/*
* Copyright (c) 2020-2021 Thakee Nathees
* Licensed under: MIT License
*/
#ifndef VAR_H
#define VAR_H
/** @file
* A simple single header dynamic type system library for small dynamic typed
* languages using a technique called NaN-tagging (optional). The method is
* inspired from the wren (https://wren.io/) an awsome language written by the
* author of "Crafting Interpreters" Bob Nystrom and it's contrbuters.
* Reference:
* https://github.com/wren-lang/wren/blob/main/src/vm/wren_value.h
* https://leonardschuetz.ch/blog/nan-boxing/
*
* The previous implementation was to add a type field to every \ref var
* and use smart pointers(C++17) to object with custom destructors,
* which makes the programme in effect for small types such null, bool,
* int and float.
*/
/** __STDC_LIMIT_MACROS and __STDC_CONSTANT_MACROS are a workaround to
* allow C++ programs to use stdint.h macros specified in the C99
* standard that aren't in the C++ standard */
#define __STDC_LIMIT_MACROS
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include "common.h"
2021-02-07 15:40:00 +08:00
#include "types/gen/byte_buffer.h"
#include "types/gen/function_buffer.h"
#include "types/gen/int_buffer.h"
#include "types/gen/var_buffer.h"
#include "types/name_table.h"
// To use dynamic variably-sized struct with a tail array add an array at the
// end of the struct with size \ref DYNAMIC_TAIL_ARRAY. This method was a
// legacy standard called "struct hack".
#if __STDC_VERSION__ >= 199901L
/** for std >= c99 it's just `arr[]` */
#define DYNAMIC_TAIL_ARRAY
#else
#define DYNAMIC_TAIL_ARRAY 0
#endif
// Number of maximum import statements in a script.
#define MAX_IMPORT_SCRIPTS 16
/**
* The IEEE 754 double precision float bit representation.
*
* 1 Sign bit
* | 11 Exponent bits
* | | 52 Mantissa (i.e. fraction values) bits
* | | |
* S[Exponent-][Mantissa------------------------------------------]
*
* if all bits of the exponent are set it's a NaN ("Not a Number") value.
*
* v~~~~~~~~~~ NaN value
* -11111111111----------------------------------------------------
*
* We define a our variant \ref var as an unsigned 64 bit integer (we treat it
* like a bit array) if the exponent bits were not set, just reinterprit it as
* a IEEE 754 double precision 64 bit number. Other wise we there are a lot of
* different combination of bits we can use for our custom tagging, this method
* is called NaN-Tagging.
*
* There are two kinds of NaN values "signalling" and "quiet". The first one is
* intended to halt the execution but the second one is to continue the
* execution quietly. We get the quiet NaN by setting the highest mentissa bit.
*
* v~Highest mestissa bit
* -[NaN ]1---------------------------------------------------
*
* if sign bit set, it's a heap allocated pointer.
* | these 2 bits are type tags representing 8 different types
* | vv
* S[NaN ]1cXX------------------------------------------------
* | ^~~~~~~~ 48 bits to represent the value (51 for pointer)
* '- if this (const) bit set, it's a constant.
*
* On a 32-bit machine a pointer size is 32 and on a 64-bit machine actually 48
* bits are used for pointers. Ta-da, now we have double precision number,
* primitives, pointers all inside a 64 bit sequence and for numbers it doesn't
* require any bit mask operations, which means math on the var is now even
* faster.
*
* our custom 2 bits type tagging
* c00 : NULL
* c01 ... 0 : UNDEF (used in unused map keys)
* ... 1 : VOID (void function return void not null)
* ... 10 : FALSE
* ... 11 : TRUE
* c10 : INTEGER
* |
* '-- c is const bit.
*
*/
#if VAR_NAN_TAGGING
// Masks and payloads.
#define _MASK_SIGN ((uint64_t)0x8000000000000000)
#define _MASK_QNAN ((uint64_t)0x7ffc000000000000)
#define _MASK_TYPE ((uint64_t)0x0003000000000000)
#define _MASK_CONST ((uint64_t)0x0004000000000000)
#define _MASK_INTEGER (_MASK_QNAN | (uint64_t)0x0002000000000000)
#define _MASK_OBJECT (_MASK_QNAN | (uint64_t)0x8000000000000000)
#define _PAYLOAD_INTEGER ((uint64_t)0x00000000ffffffff)
#define _PAYLOAD_OBJECT ((uint64_t)0x0000ffffffffffff)
// Primitive types.
#define VAR_NULL (_MASK_QNAN | (uint64_t)0x0000000000000000)
#define VAR_UNDEFINED (_MASK_QNAN | (uint64_t)0x0001000000000000)
#define VAR_VOID (_MASK_QNAN | (uint64_t)0x0001000000000001)
#define VAR_FALSE (_MASK_QNAN | (uint64_t)0x0001000000000002)
#define VAR_TRUE (_MASK_QNAN | (uint64_t)0x0001000000000003)
// Encode types.
#define VAR_BOOL(value) ((value)? VAR_TRUE : VAR_FALSE)
#define VAR_INT(value) (_MASK_INTEGER | (uint32_t)(int32_t)(value))
#define VAR_NUM(value) (doubleToVar(value))
#define VAR_OBJ(value) ((Var)(_MASK_OBJECT | (uint64_t)(uintptr_t)(value)))
// Const casting.
#define ADD_CONST(value) ((value) | _MASK_CONST)
#define REMOVE_CONST(value) ((value) & ~_MASK_CONST)
// Check types.
#define IS_CONST(value) ((value & _MASK_CONST) == _MASK_CONST)
#define IS_NULL(value) ((value) == VAR_NULL)
#define IS_UNDEF(value) ((value) == VAR_UNDEF)
#define IS_FALSE(value) ((value) == VAR_FALSE)
#define IS_TRUE(value) ((value) == VAR_TRUE)
#define IS_BOOL(value) (IS_TRUE(value) || IS_FALSE(value))
#define IS_INT(value) ((value & _MASK_INTEGER) == _MASK_INTEGER)
#define IS_NUM(value) ((value & _MASK_QNAN) != _MASK_QNAN)
#define IS_OBJ(value) ((value & _MASK_OBJECT) == _MASK_OBJECT)
// Decode types.
#define AS_BOOL(value) ((value) == VAR_TRUE)
#define AS_INT(value) ((int32_t)((value) & _PAYLOAD_INTEGER))
#define AS_NUM(value) (varToDouble(value))
#define AS_OBJ(value) ((Object*)(value & _PAYLOAD_OBJECT))
#define AS_STRING(value) ((String*)AS_OBJ(value))
#define AS_CSTRING(value) (AS_STRING(value)->data)
2021-02-13 01:40:19 +08:00
#define AS_ARRAY(value) ((List*)AS_OBJ(value))
2021-02-07 15:40:00 +08:00
#define AS_MAP(value) ((Map*)AS_OBJ(value))
#define AS_RANGE(value) ((Range*)AS_OBJ(value))
#else
// TODO: Union tagging implementation of all the above macros ignore macros
// starts with an underscore.
typedef enum {
2021-02-12 01:35:43 +08:00
VAR_UNDEFINED, //< Internal type for exceptions.
VAR_NULL, //< Null pointer type.
VAR_BOOL, //< Yin and yang of software.
VAR_INT, //< Only 32bit integers (to consistance with Nan-Tagging).
VAR_FLOAT, //< Floats are stored as (64bit) double.
2021-02-07 15:40:00 +08:00
2021-02-12 01:35:43 +08:00
VAR_OBJECT, //< Base type for all \ref var_Object types.
2021-02-07 15:40:00 +08:00
} VarType;
typedef struct {
2021-02-12 01:35:43 +08:00
VarType type;
union {
bool _bool;
int _int;
double _float;
Object* _obj;
};
2021-02-07 15:40:00 +08:00
} var;
#endif // VAR_NAN_TAGGING
typedef enum /* ObjectType */ {
2021-02-12 01:35:43 +08:00
OBJ_STRING,
2021-02-13 01:40:19 +08:00
OBJ_LIST,
2021-02-12 01:35:43 +08:00
OBJ_MAP,
OBJ_RANGE,
2021-02-07 15:40:00 +08:00
2021-02-12 01:35:43 +08:00
OBJ_SCRIPT,
OBJ_FUNC,
2021-02-07 15:40:00 +08:00
OBJ_FIBER,
2021-02-12 01:35:43 +08:00
OBJ_USER,
2021-02-07 15:40:00 +08:00
} ObjectType;
// This will terminate compiler (because of 1/0 evaluvated) if ObjectType max
// is not [count]. Use this to ensure every time switching ObjectType will
// cover all object types.
#if DEBUG
#define CHECK_MISSING_OBJ_TYPE(count) (1/ ((int)(!(count ^ OBJ_USER))) )
#else
#define CHECK_MISSING_OBJ_TYPE(count) do {} while (false)
#endif
2021-02-07 15:40:00 +08:00
// Base struct for all heap allocated objects.
struct Object {
2021-02-12 01:35:43 +08:00
ObjectType type; //< Type of the object in \ref var_Object_Type.
//Class* is; //< The class the object IS. // No OOP in MS.
2021-02-07 15:40:00 +08:00
2021-02-12 01:35:43 +08:00
Object* next; //< Next object in the heap allocated link list.
2021-02-07 15:40:00 +08:00
};
struct String {
2021-02-12 01:35:43 +08:00
Object _super;
2021-02-07 15:40:00 +08:00
2021-02-12 01:35:43 +08:00
uint32_t length; //< Length of the string in \ref data.
uint32_t capacity; //< Size of allocated \ref data.
char data[DYNAMIC_TAIL_ARRAY];
2021-02-07 15:40:00 +08:00
};
2021-02-13 01:40:19 +08:00
struct List {
2021-02-12 01:35:43 +08:00
Object _super;
2021-02-07 15:40:00 +08:00
2021-02-12 01:35:43 +08:00
VarBuffer elements; //< Elements of the array.
2021-02-07 15:40:00 +08:00
};
// TODO: struct Map here.
struct Range {
2021-02-12 01:35:43 +08:00
Object _super;
2021-02-07 15:40:00 +08:00
2021-02-12 01:35:43 +08:00
double from; //< Beggining of the range inclusive.
double to; //< End of the range exclusive.
2021-02-07 15:40:00 +08:00
};
struct Script {
2021-02-12 01:35:43 +08:00
Object _super;
2021-02-07 15:40:00 +08:00
// One of the below is null and other one is not. Since "std" script names
// are hardcoded and user script names are constructed.
2021-02-16 02:51:00 +08:00
const char* name; //< Std script's name. Null for user script.
String* path; //< Absolute path of the script. Null for std scripts.
2021-02-08 02:30:29 +08:00
2021-02-12 01:35:43 +08:00
ID imports[MAX_IMPORT_SCRIPTS]; //< Imported script IDs.
int import_count; //< Number of import in imports.
2021-02-07 15:40:00 +08:00
2021-02-12 01:35:43 +08:00
VarBuffer globals; //< Script level global variables.
NameTable global_names; //< Name map to index in globals.
VarBuffer literals; //< Script literal constant values.
FunctionBuffer functions; //< Script level functions.
NameTable function_names; //< Name map to index in functions.
StringBuffer names; //< Name literals, attribute names, etc.
2021-02-07 15:40:00 +08:00
2021-02-12 01:35:43 +08:00
Function* body; //< Script body is an anonymous function.
2021-02-07 15:40:00 +08:00
};
// Script function pointer.
typedef struct {
2021-02-12 01:35:43 +08:00
ByteBuffer opcodes; //< Buffer of opcodes.
IntBuffer oplines; //< Line number of opcodes for debug (1 based).
int stack_size; //< Maximum size of stack required.
2021-02-07 15:40:00 +08:00
} Fn;
struct Function {
2021-02-12 01:35:43 +08:00
Object _super;
2021-02-07 15:40:00 +08:00
2021-02-12 01:35:43 +08:00
const char* name; //< Name in the script [owner].
Script* owner; //< Owner script of the function.
int arity; //< Number of argument the function expects.
2021-02-07 15:40:00 +08:00
2021-02-12 01:35:43 +08:00
bool is_native; //< True if Native function.
union {
MiniScriptNativeFn native; //< Native function pointer.
Fn* fn; //< Script function pointer.
};
2021-02-07 15:40:00 +08:00
};
// Methods ////////////////////////////////////////////////////////////////////
2021-02-07 15:40:00 +08:00
2021-02-08 02:30:29 +08:00
void varInitObject(Object* self, MSVM* vm, ObjectType type);
2021-02-07 15:40:00 +08:00
// Instead use VAR_NUM(value) and AS_NUM(value)
Var doubleToVar(double value);
double varToDouble(Var value);
// Allocate new String object and return String*.
2021-02-08 02:30:29 +08:00
String* newString(MSVM* vm, const char* text, uint32_t length);
2021-02-07 15:40:00 +08:00
2021-02-13 01:40:19 +08:00
// Allocate new List and return List*.
List* newList(MSVM* vm, uint32_t size);
// Allocate new Range object and return Range*.
Range* newRange(MSVM* vm, double from, double to);
2021-02-07 15:40:00 +08:00
// Allocate new Script object and return Script*.
2021-02-08 02:30:29 +08:00
Script* newScript(MSVM* vm);
2021-02-07 15:40:00 +08:00
// Allocate new Function object and return Function*. Parameter [name] should
// be the name in the Script's nametable.
2021-02-16 02:51:00 +08:00
Function* newFunction(MSVM* vm, const char* name, int length, Script* owner,
2021-02-09 16:21:10 +08:00
bool is_native);
// Utility functions //////////////////////////////////////////////////////////
2021-02-07 15:40:00 +08:00
2021-02-16 02:51:00 +08:00
// Returns the type name of the var [v].
const char* varTypeName(Var v);
2021-02-12 01:35:43 +08:00
// Returns true if both variables are the same.
2021-02-09 16:21:10 +08:00
bool isVauesSame(Var v1, Var v2);
2021-02-07 15:40:00 +08:00
2021-02-13 01:40:19 +08:00
// Returns the string version of the value. Note: pass false as [_recursive]
// It's for internal use (or may be I could make a wrapper around).
2021-02-13 01:40:19 +08:00
String* toString(MSVM* vm, Var v, bool _recursive);
2021-02-12 01:35:43 +08:00
2021-02-15 20:49:19 +08:00
// Returns the truthy value of the var.
bool toBool(Var v);
2021-02-07 15:40:00 +08:00
#endif // VAR_H