pocketlang/src/var.h
2021-02-12 23:14:07 +05:30

307 lines
9.7 KiB
C

/*
* Copyright (c) 2020-2021 Thakee Nathees
* Licensed under: MIT License
*/
#ifndef VAR_H
#define VAR_H
/** @file
* A simple single header dynamic type system library for small dynamic typed
* languages using a technique called NaN-tagging (optional). The method is
* inspired from the wren (https://wren.io/) an awsome language written by the
* author of "Crafting Interpreters" Bob Nystrom and it's contrbuters.
* Reference:
* https://github.com/wren-lang/wren/blob/main/src/vm/wren_value.h
* https://leonardschuetz.ch/blog/nan-boxing/
*
* The previous implementation was to add a type field to every \ref var
* and use smart pointers(C++17) to object with custom destructors,
* which makes the programme in effect for small types such null, bool,
* int and float.
*/
/** __STDC_LIMIT_MACROS and __STDC_CONSTANT_MACROS are a workaround to
* allow C++ programs to use stdint.h macros specified in the C99
* standard that aren't in the C++ standard */
#define __STDC_LIMIT_MACROS
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include "miniscript.h"
#include "types/gen/byte_buffer.h"
#include "types/gen/function_buffer.h"
#include "types/gen/int_buffer.h"
#include "types/gen/var_buffer.h"
#include "types/name_table.h"
// To use dynamic variably-sized struct with a tail array add an array at the
// end of the struct with size \ref DYNAMIC_TAIL_ARRAY. This method was a
// legacy standard called "struct hack".
#if __STDC_VERSION__ >= 199901L
/** for std >= c99 it's just `arr[]` */
#define DYNAMIC_TAIL_ARRAY
#else
#define DYNAMIC_TAIL_ARRAY 0
#endif
// Number of maximum import statements in a script.
#define MAX_IMPORT_SCRIPTS 16
/**
* The IEEE 754 double precision float bit representation.
*
* 1 Sign bit
* | 11 Exponent bits
* | | 52 Mantissa (i.e. fraction values) bits
* | | |
* S[Exponent-][Mantissa------------------------------------------]
*
* if all bits of the exponent are set it's a NaN ("Not a Number") value.
*
* v~~~~~~~~~~ NaN value
* -11111111111----------------------------------------------------
*
* We define a our variant \ref var as an unsigned 64 bit integer (we treat it
* like a bit array) if the exponent bits were not set, just reinterprit it as
* a IEEE 754 double precision 64 bit number. Other wise we there are a lot of
* different combination of bits we can use for our custom tagging, this method
* is called NaN-Tagging.
*
* There are two kinds of NaN values "signalling" and "quiet". The first one is
* intended to halt the execution but the second one is to continue the
* execution quietly. We get the quiet NaN by setting the highest mentissa bit.
*
* v~Highest mestissa bit
* -[NaN ]1---------------------------------------------------
*
* if sign bit set, it's a heap allocated pointer.
* | these 2 bits are type tags representing 8 different types
* | vv
* S[NaN ]1cXX------------------------------------------------
* | ^~~~~~~~ 48 bits to represent the value (51 for pointer)
* '- if this (const) bit set, it's a constant.
*
* On a 32-bit machine a pointer size is 32 and on a 64-bit machine actually 48
* bits are used for pointers. Ta-da, now we have double precision number,
* primitives, pointers all inside a 64 bit sequence and for numbers it doesn't
* require any bit mask operations, which means math on the var is now even
* faster.
*
* our custom 2 bits type tagging
* c00 : NULL
* c01 ... 0 : UNDEF (used in unused map keys)
* ... 1 : VOID (void function return void not null)
* ... 10 : FALSE
* ... 11 : TRUE
* c10 : INTEGER
* |
* '-- c is const bit.
*
*/
#if VAR_NAN_TAGGING
// Masks and payloads.
#define _MASK_SIGN ((uint64_t)0x8000000000000000)
#define _MASK_QNAN ((uint64_t)0x7ffc000000000000)
#define _MASK_TYPE ((uint64_t)0x0003000000000000)
#define _MASK_CONST ((uint64_t)0x0004000000000000)
#define _MASK_INTEGER (_MASK_QNAN | (uint64_t)0x0002000000000000)
#define _MASK_OBJECT (_MASK_QNAN | (uint64_t)0x8000000000000000)
#define _PAYLOAD_INTEGER ((uint64_t)0x00000000ffffffff)
#define _PAYLOAD_OBJECT ((uint64_t)0x0000ffffffffffff)
// Primitive types.
#define VAR_NULL (_MASK_QNAN | (uint64_t)0x0000000000000000)
#define VAR_UNDEFINED (_MASK_QNAN | (uint64_t)0x0001000000000000)
#define VAR_VOID (_MASK_QNAN | (uint64_t)0x0001000000000001)
#define VAR_FALSE (_MASK_QNAN | (uint64_t)0x0001000000000002)
#define VAR_TRUE (_MASK_QNAN | (uint64_t)0x0001000000000003)
// Encode types.
#define VAR_BOOL(value) ((value)? VAR_TRUE : VAR_FALSE)
#define VAR_INT(value) (_MASK_INTEGER | (uint32_t)(int32_t)(value))
#define VAR_NUM(value) (doubleToVar(value))
#define VAR_OBJ(value) ((Var)(_MASK_OBJECT | (uint64_t)(uintptr_t)(value)))
// Const casting.
#define ADD_CONST(value) ((value) | _MASK_CONST)
#define REMOVE_CONST(value) ((value) & ~_MASK_CONST)
// Check types.
#define IS_CONST(value) ((value & _MASK_CONST) == _MASK_CONST)
#define IS_NULL(value) ((value) == VAR_NULL)
#define IS_UNDEF(value) ((value) == VAR_UNDEF)
#define IS_FALSE(value) ((value) == VAR_FALSE)
#define IS_TRUE(value) ((value) == VAR_TRUE)
#define IS_BOOL(value) (IS_TRUE(value) || IS_FALSE(value))
#define IS_INT(value) ((value & _MASK_INTEGER) == _MASK_INTEGER)
#define IS_NUM(value) ((value & _MASK_QNAN) != _MASK_QNAN)
#define IS_OBJ(value) ((value & _MASK_OBJECT) == _MASK_OBJECT)
// Decode types.
#define AS_BOOL(value) ((value) == VAR_TRUE)
#define AS_INT(value) ((int32_t)((value) & _PAYLOAD_INTEGER))
#define AS_NUM(value) (varToDouble(value))
#define AS_OBJ(value) ((Object*)(value & _PAYLOAD_OBJECT))
#define AS_STRING(value) ((String*)AS_OBJ(value))
#define AS_CSTRING(value) (AS_STRING(value)->data)
#define AS_ARRAY(value) ((List*)AS_OBJ(value))
#define AS_MAP(value) ((Map*)AS_OBJ(value))
#define AS_RANGE(value) ((Range*)AS_OBJ(value))
typedef uint64_t Var;
#else
// TODO: Union tagging implementation of all the above macros ignore macros
// starts with an underscore.
typedef enum {
VAR_UNDEFINED, //< Internal type for exceptions.
VAR_NULL, //< Null pointer type.
VAR_BOOL, //< Yin and yang of software.
VAR_INT, //< Only 32bit integers (to consistance with Nan-Tagging).
VAR_FLOAT, //< Floats are stored as (64bit) double.
VAR_OBJECT, //< Base type for all \ref var_Object types.
} VarType;
typedef struct {
VarType type;
union {
bool _bool;
int _int;
double _float;
Object* _obj;
};
} var;
#endif // VAR_NAN_TAGGING
typedef enum /* ObjectType */ {
OBJ_STRING,
OBJ_LIST,
OBJ_MAP,
OBJ_RANGE,
OBJ_SCRIPT,
OBJ_FUNC,
OBJ_USER,
} ObjectType;
// Base struct for all heap allocated objects.
struct Object {
ObjectType type; //< Type of the object in \ref var_Object_Type.
//Class* is; //< The class the object IS. // No OOP in MS.
Object* next; //< Next object in the heap allocated link list.
};
struct String {
Object _super;
uint32_t length; //< Length of the string in \ref data.
uint32_t capacity; //< Size of allocated \ref data.
char data[DYNAMIC_TAIL_ARRAY];
};
struct List {
Object _super;
VarBuffer elements; //< Elements of the array.
};
// TODO: struct Map here.
struct Range {
Object _super;
double from; //< Beggining of the range inclusive.
double to; //< End of the range exclusive.
};
struct Script {
Object _super;
String* path; //< Absolute path of the script.
ID imports[MAX_IMPORT_SCRIPTS]; //< Imported script IDs.
int import_count; //< Number of import in imports.
VarBuffer globals; //< Script level global variables.
NameTable global_names; //< Name map to index in globals.
VarBuffer literals; //< Script literal constant values.
FunctionBuffer functions; //< Script level functions.
NameTable function_names; //< Name map to index in functions.
StringBuffer names; //< Name literals, attribute names, etc.
Function* body; //< Script body is an anonymous function.
};
// Script function pointer.
typedef struct {
ByteBuffer opcodes; //< Buffer of opcodes.
IntBuffer oplines; //< Line number of opcodes for debug (1 based).
int stack_size; //< Maximum size of stack required.
} Fn;
struct Function {
Object _super;
const char* name; //< Name in the script [owner].
Script* owner; //< Owner script of the function.
int arity; //< Number of argument the function expects.
bool is_native; //< True if Native function.
union {
MiniScriptNativeFn native; //< Native function pointer.
Fn* fn; //< Script function pointer.
};
};
// Methods.
void varInitObject(Object* self, MSVM* vm, ObjectType type);
// Instead use VAR_NUM(value) and AS_NUM(value)
Var doubleToVar(double value);
double varToDouble(Var value);
// Allocate new String object and return String*.
String* newString(MSVM* vm, const char* text, uint32_t length);
// Allocate new List and return List*.
List* newList(MSVM* vm, uint32_t size);
// Allocate new Range object and return Range*.
Range* newRange(MSVM* vm, double from, double to);
// Allocate new Script object and return Script*.
Script* newScript(MSVM* vm);
// Allocate new Function object and return Function*. Parameter [name] should
// be the name in the Script's nametable.
Function* newFunction(MSVM* vm, const char* name, Script* owner,
bool is_native);
// Utility functions //////////////////////////////////////////////////////////
// Returns true if both variables are the same.
bool isVauesSame(Var v1, Var v2);
// Returns the string version of the value. Note: pass false as [_recursive]
// It's an internal use (or may be I could make a wrapper around).
String* toString(MSVM* vm, Var v, bool _recursive);
#endif // VAR_H