mirror of
https://github.com/zekexiao/pocketlang.git
synced 2025-02-06 12:46:53 +08:00
307 lines
9.7 KiB
C
307 lines
9.7 KiB
C
/*
|
|
* Copyright (c) 2020-2021 Thakee Nathees
|
|
* Licensed under: MIT License
|
|
*/
|
|
|
|
#ifndef VAR_H
|
|
#define VAR_H
|
|
|
|
/** @file
|
|
* A simple single header dynamic type system library for small dynamic typed
|
|
* languages using a technique called NaN-tagging (optional). The method is
|
|
* inspired from the wren (https://wren.io/) an awsome language written by the
|
|
* author of "Crafting Interpreters" Bob Nystrom and it's contrbuters.
|
|
* Reference:
|
|
* https://github.com/wren-lang/wren/blob/main/src/vm/wren_value.h
|
|
* https://leonardschuetz.ch/blog/nan-boxing/
|
|
*
|
|
* The previous implementation was to add a type field to every \ref var
|
|
* and use smart pointers(C++17) to object with custom destructors,
|
|
* which makes the programme in effect for small types such null, bool,
|
|
* int and float.
|
|
*/
|
|
|
|
/** __STDC_LIMIT_MACROS and __STDC_CONSTANT_MACROS are a workaround to
|
|
* allow C++ programs to use stdint.h macros specified in the C99
|
|
* standard that aren't in the C++ standard */
|
|
#define __STDC_LIMIT_MACROS
|
|
#include <stdint.h>
|
|
|
|
#include <stdbool.h>
|
|
#include <string.h>
|
|
|
|
#include "miniscript.h"
|
|
|
|
#include "types/gen/byte_buffer.h"
|
|
#include "types/gen/function_buffer.h"
|
|
#include "types/gen/int_buffer.h"
|
|
#include "types/gen/var_buffer.h"
|
|
#include "types/name_table.h"
|
|
|
|
// To use dynamic variably-sized struct with a tail array add an array at the
|
|
// end of the struct with size \ref DYNAMIC_TAIL_ARRAY. This method was a
|
|
// legacy standard called "struct hack".
|
|
#if __STDC_VERSION__ >= 199901L
|
|
/** for std >= c99 it's just `arr[]` */
|
|
#define DYNAMIC_TAIL_ARRAY
|
|
#else
|
|
#define DYNAMIC_TAIL_ARRAY 0
|
|
#endif
|
|
|
|
// Number of maximum import statements in a script.
|
|
#define MAX_IMPORT_SCRIPTS 16
|
|
|
|
/**
|
|
* The IEEE 754 double precision float bit representation.
|
|
*
|
|
* 1 Sign bit
|
|
* | 11 Exponent bits
|
|
* | | 52 Mantissa (i.e. fraction values) bits
|
|
* | | |
|
|
* S[Exponent-][Mantissa------------------------------------------]
|
|
*
|
|
* if all bits of the exponent are set it's a NaN ("Not a Number") value.
|
|
*
|
|
* v~~~~~~~~~~ NaN value
|
|
* -11111111111----------------------------------------------------
|
|
*
|
|
* We define a our variant \ref var as an unsigned 64 bit integer (we treat it
|
|
* like a bit array) if the exponent bits were not set, just reinterprit it as
|
|
* a IEEE 754 double precision 64 bit number. Other wise we there are a lot of
|
|
* different combination of bits we can use for our custom tagging, this method
|
|
* is called NaN-Tagging.
|
|
*
|
|
* There are two kinds of NaN values "signalling" and "quiet". The first one is
|
|
* intended to halt the execution but the second one is to continue the
|
|
* execution quietly. We get the quiet NaN by setting the highest mentissa bit.
|
|
*
|
|
* v~Highest mestissa bit
|
|
* -[NaN ]1---------------------------------------------------
|
|
*
|
|
* if sign bit set, it's a heap allocated pointer.
|
|
* | these 2 bits are type tags representing 8 different types
|
|
* | vv
|
|
* S[NaN ]1cXX------------------------------------------------
|
|
* | ^~~~~~~~ 48 bits to represent the value (51 for pointer)
|
|
* '- if this (const) bit set, it's a constant.
|
|
*
|
|
* On a 32-bit machine a pointer size is 32 and on a 64-bit machine actually 48
|
|
* bits are used for pointers. Ta-da, now we have double precision number,
|
|
* primitives, pointers all inside a 64 bit sequence and for numbers it doesn't
|
|
* require any bit mask operations, which means math on the var is now even
|
|
* faster.
|
|
*
|
|
* our custom 2 bits type tagging
|
|
* c00 : NULL
|
|
* c01 ... 0 : UNDEF (used in unused map keys)
|
|
* ... 1 : VOID (void function return void not null)
|
|
* ... 10 : FALSE
|
|
* ... 11 : TRUE
|
|
* c10 : INTEGER
|
|
* |
|
|
* '-- c is const bit.
|
|
*
|
|
*/
|
|
|
|
#if VAR_NAN_TAGGING
|
|
|
|
// Masks and payloads.
|
|
#define _MASK_SIGN ((uint64_t)0x8000000000000000)
|
|
#define _MASK_QNAN ((uint64_t)0x7ffc000000000000)
|
|
#define _MASK_TYPE ((uint64_t)0x0003000000000000)
|
|
#define _MASK_CONST ((uint64_t)0x0004000000000000)
|
|
|
|
#define _MASK_INTEGER (_MASK_QNAN | (uint64_t)0x0002000000000000)
|
|
#define _MASK_OBJECT (_MASK_QNAN | (uint64_t)0x8000000000000000)
|
|
|
|
#define _PAYLOAD_INTEGER ((uint64_t)0x00000000ffffffff)
|
|
#define _PAYLOAD_OBJECT ((uint64_t)0x0000ffffffffffff)
|
|
|
|
// Primitive types.
|
|
#define VAR_NULL (_MASK_QNAN | (uint64_t)0x0000000000000000)
|
|
#define VAR_UNDEFINED (_MASK_QNAN | (uint64_t)0x0001000000000000)
|
|
#define VAR_VOID (_MASK_QNAN | (uint64_t)0x0001000000000001)
|
|
#define VAR_FALSE (_MASK_QNAN | (uint64_t)0x0001000000000002)
|
|
#define VAR_TRUE (_MASK_QNAN | (uint64_t)0x0001000000000003)
|
|
|
|
// Encode types.
|
|
#define VAR_BOOL(value) ((value)? VAR_TRUE : VAR_FALSE)
|
|
#define VAR_INT(value) (_MASK_INTEGER | (uint32_t)(int32_t)(value))
|
|
#define VAR_NUM(value) (doubleToVar(value))
|
|
#define VAR_OBJ(value) ((Var)(_MASK_OBJECT | (uint64_t)(uintptr_t)(value)))
|
|
|
|
// Const casting.
|
|
#define ADD_CONST(value) ((value) | _MASK_CONST)
|
|
#define REMOVE_CONST(value) ((value) & ~_MASK_CONST)
|
|
|
|
// Check types.
|
|
#define IS_CONST(value) ((value & _MASK_CONST) == _MASK_CONST)
|
|
#define IS_NULL(value) ((value) == VAR_NULL)
|
|
#define IS_UNDEF(value) ((value) == VAR_UNDEF)
|
|
#define IS_FALSE(value) ((value) == VAR_FALSE)
|
|
#define IS_TRUE(value) ((value) == VAR_TRUE)
|
|
#define IS_BOOL(value) (IS_TRUE(value) || IS_FALSE(value))
|
|
#define IS_INT(value) ((value & _MASK_INTEGER) == _MASK_INTEGER)
|
|
#define IS_NUM(value) ((value & _MASK_QNAN) != _MASK_QNAN)
|
|
#define IS_OBJ(value) ((value & _MASK_OBJECT) == _MASK_OBJECT)
|
|
|
|
// Decode types.
|
|
#define AS_BOOL(value) ((value) == VAR_TRUE)
|
|
#define AS_INT(value) ((int32_t)((value) & _PAYLOAD_INTEGER))
|
|
#define AS_NUM(value) (varToDouble(value))
|
|
#define AS_OBJ(value) ((Object*)(value & _PAYLOAD_OBJECT))
|
|
|
|
#define AS_STRING(value) ((String*)AS_OBJ(value))
|
|
#define AS_CSTRING(value) (AS_STRING(value)->data)
|
|
#define AS_ARRAY(value) ((List*)AS_OBJ(value))
|
|
#define AS_MAP(value) ((Map*)AS_OBJ(value))
|
|
#define AS_RANGE(value) ((Range*)AS_OBJ(value))
|
|
|
|
typedef uint64_t Var;
|
|
|
|
#else
|
|
|
|
// TODO: Union tagging implementation of all the above macros ignore macros
|
|
// starts with an underscore.
|
|
|
|
|
|
typedef enum {
|
|
VAR_UNDEFINED, //< Internal type for exceptions.
|
|
VAR_NULL, //< Null pointer type.
|
|
VAR_BOOL, //< Yin and yang of software.
|
|
VAR_INT, //< Only 32bit integers (to consistance with Nan-Tagging).
|
|
VAR_FLOAT, //< Floats are stored as (64bit) double.
|
|
|
|
VAR_OBJECT, //< Base type for all \ref var_Object types.
|
|
} VarType;
|
|
|
|
typedef struct {
|
|
VarType type;
|
|
union {
|
|
bool _bool;
|
|
int _int;
|
|
double _float;
|
|
Object* _obj;
|
|
};
|
|
} var;
|
|
|
|
#endif // VAR_NAN_TAGGING
|
|
|
|
typedef enum /* ObjectType */ {
|
|
OBJ_STRING,
|
|
OBJ_LIST,
|
|
OBJ_MAP,
|
|
OBJ_RANGE,
|
|
|
|
OBJ_SCRIPT,
|
|
OBJ_FUNC,
|
|
|
|
OBJ_USER,
|
|
} ObjectType;
|
|
|
|
// Base struct for all heap allocated objects.
|
|
struct Object {
|
|
ObjectType type; //< Type of the object in \ref var_Object_Type.
|
|
//Class* is; //< The class the object IS. // No OOP in MS.
|
|
|
|
Object* next; //< Next object in the heap allocated link list.
|
|
};
|
|
|
|
struct String {
|
|
Object _super;
|
|
|
|
uint32_t length; //< Length of the string in \ref data.
|
|
uint32_t capacity; //< Size of allocated \ref data.
|
|
char data[DYNAMIC_TAIL_ARRAY];
|
|
};
|
|
|
|
struct List {
|
|
Object _super;
|
|
|
|
VarBuffer elements; //< Elements of the array.
|
|
};
|
|
|
|
// TODO: struct Map here.
|
|
|
|
struct Range {
|
|
Object _super;
|
|
|
|
double from; //< Beggining of the range inclusive.
|
|
double to; //< End of the range exclusive.
|
|
};
|
|
|
|
struct Script {
|
|
Object _super;
|
|
|
|
String* path; //< Absolute path of the script.
|
|
|
|
ID imports[MAX_IMPORT_SCRIPTS]; //< Imported script IDs.
|
|
int import_count; //< Number of import in imports.
|
|
|
|
VarBuffer globals; //< Script level global variables.
|
|
NameTable global_names; //< Name map to index in globals.
|
|
VarBuffer literals; //< Script literal constant values.
|
|
FunctionBuffer functions; //< Script level functions.
|
|
NameTable function_names; //< Name map to index in functions.
|
|
StringBuffer names; //< Name literals, attribute names, etc.
|
|
|
|
Function* body; //< Script body is an anonymous function.
|
|
};
|
|
|
|
// Script function pointer.
|
|
typedef struct {
|
|
ByteBuffer opcodes; //< Buffer of opcodes.
|
|
IntBuffer oplines; //< Line number of opcodes for debug (1 based).
|
|
int stack_size; //< Maximum size of stack required.
|
|
} Fn;
|
|
|
|
struct Function {
|
|
Object _super;
|
|
|
|
const char* name; //< Name in the script [owner].
|
|
Script* owner; //< Owner script of the function.
|
|
int arity; //< Number of argument the function expects.
|
|
|
|
bool is_native; //< True if Native function.
|
|
union {
|
|
MiniScriptNativeFn native; //< Native function pointer.
|
|
Fn* fn; //< Script function pointer.
|
|
};
|
|
};
|
|
|
|
// Methods.
|
|
|
|
void varInitObject(Object* self, MSVM* vm, ObjectType type);
|
|
|
|
// Instead use VAR_NUM(value) and AS_NUM(value)
|
|
Var doubleToVar(double value);
|
|
double varToDouble(Var value);
|
|
|
|
// Allocate new String object and return String*.
|
|
String* newString(MSVM* vm, const char* text, uint32_t length);
|
|
|
|
// Allocate new List and return List*.
|
|
List* newList(MSVM* vm, uint32_t size);
|
|
|
|
// Allocate new Range object and return Range*.
|
|
Range* newRange(MSVM* vm, double from, double to);
|
|
|
|
// Allocate new Script object and return Script*.
|
|
Script* newScript(MSVM* vm);
|
|
|
|
// Allocate new Function object and return Function*. Parameter [name] should
|
|
// be the name in the Script's nametable.
|
|
Function* newFunction(MSVM* vm, const char* name, Script* owner,
|
|
bool is_native);
|
|
|
|
// Utility functions //////////////////////////////////////////////////////////
|
|
|
|
// Returns true if both variables are the same.
|
|
bool isVauesSame(Var v1, Var v2);
|
|
|
|
// Returns the string version of the value. Note: pass false as [_recursive]
|
|
// It's an internal use (or may be I could make a wrapper around).
|
|
String* toString(MSVM* vm, Var v, bool _recursive);
|
|
|
|
#endif // VAR_H
|