From c77801daf317491a7309fde9526bd38d99220a07 Mon Sep 17 00:00:00 2001 From: Thakee Nathees Date: Sun, 7 Feb 2021 13:10:00 +0530 Subject: [PATCH] initial commit --- .gitignore | 55 +++ LICENSE | 21 + README.md | 1 + SConscript | 36 ++ SConstruct | 211 ++++++++ include/miniscript.h | 30 ++ src/common.h | 107 ++++ src/compiler.c | 945 ++++++++++++++++++++++++++++++++++++ src/compiler.h | 17 + src/types/.gitignore | 6 + src/types/buffer.template.c | 44 ++ src/types/buffer.template.h | 45 ++ src/types/buffergen.py | 101 ++++ src/types/name_table.c | 31 ++ src/types/name_table.h | 27 ++ src/utils.c | 144 ++++++ src/utils.h | 112 +++++ src/var.c | 96 ++++ src/var.h | 295 +++++++++++ src/vm.c | 34 ++ src/vm.h | 48 ++ test/clogger.h | 296 +++++++++++ test/main.c | 54 +++ 23 files changed, 2756 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 SConscript create mode 100644 SConstruct create mode 100644 include/miniscript.h create mode 100644 src/common.h create mode 100644 src/compiler.c create mode 100644 src/compiler.h create mode 100644 src/types/.gitignore create mode 100644 src/types/buffer.template.c create mode 100644 src/types/buffer.template.h create mode 100644 src/types/buffergen.py create mode 100644 src/types/name_table.c create mode 100644 src/types/name_table.h create mode 100644 src/utils.c create mode 100644 src/utils.h create mode 100644 src/var.c create mode 100644 src/var.h create mode 100644 src/vm.c create mode 100644 src/vm.h create mode 100644 test/clogger.h create mode 100644 test/main.c diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b3aca09 --- /dev/null +++ b/.gitignore @@ -0,0 +1,55 @@ + +# MiniScript ignore list +build/ +MiniScript/test/ +src/test/ +MiniScript/.import/ +release/ +debug/ +*__pycache__/ +.vs/ +.vscode/ + +config.json +run.bat +*.dblite +*.idb +*.pdb +*.sln +*.vcxproj +*.vcxproj.filters +*.vcxproj.user +*.obj.enc + +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..005bb78 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 Thakee Nathees + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..ae2f33d --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# MiniScript \ No newline at end of file diff --git a/SConscript b/SConscript new file mode 100644 index 0000000..f5ac58a --- /dev/null +++ b/SConscript @@ -0,0 +1,36 @@ +Import('env') +import os + +env.PROJECT_NAME = "MiniScript" +env.RUN_TARGET = os.path.join(env['variant_dir'], 'bin/miniscript') + +## MiniScript source files +SOURCES = [ + Glob('src/*.c'), + Glob('src/types/*.c'), + Glob('src/types/gen/*.c'), +] + +CPPPATH = [ + 'include/', +] + +## Compile miniscript lib. +vm = env.Library( + target = 'bin/miniscript', + source = SOURCES, + CPPPATH = CPPPATH, +) + +## Test executable +test = env.Program( + target = 'bin/miniscript', + source = ['test/main.c'], + CPPPATH = CPPPATH, + LIBPATH = 'bin', + LIBS = 'miniscript', +) + +Requires(test, vm) + + diff --git a/SConstruct b/SConstruct new file mode 100644 index 0000000..dce0c4b --- /dev/null +++ b/SConstruct @@ -0,0 +1,211 @@ +#!python +import os, subprocess, sys + +def get_variant_dir(env): + ret = 'build/' + env['platform'] + '/' + env['target']; + if env['platform'] == 'windows': + return ret + '/' + env['bits'] + return ret + +opts = Variables([], ARGUMENTS) +## Define our options +opts.Add(EnumVariable('platform', "Compilation platform", '', ['', 'windows', 'x11', 'linux', 'osx'])) +opts.Add(EnumVariable('target', "Compilation target", 'debug', ['debug', 'release'])) +opts.Add(EnumVariable('bits', 'output program bits', '64', ['32', '64'])) +opts.Add(BoolVariable('use_llvm', "Use the LLVM / Clang compiler", False)) +opts.Add(BoolVariable('use_mingw', "Use Mingw compiler", False)) + +opts.Add(BoolVariable('vsproj', "make a visual studio project", False)) +opts.Add(BoolVariable('verbose', "use verbose build command", False)) + +opts.Add(BoolVariable('libs', "include unit tests in main", False)) + +## Setup the Environment +DefaultEnvironment(tools=[]) ## not using any tools +env = Environment() + +## Updates the environment with the option variables. +opts.Update(env) + +if env['use_llvm']: + env['CC'] = 'clang' + env['CXX'] = 'clang++' +elif env['use_mingw']: + env['tools'] = ['mingw'] + +## find platform +if env['platform'] == '': + if sys.platform == 'win32': + env['platform'] = 'windows' + elif sys.platform in ('x11', 'linux', 'linux2'): + env['platform'] = 'linux' + elif sys.platform == 'darwin': + env['platform'] = 'osx' + else: + print("platform(%s) not supported." % sys.platform) + quit() + +## For the reference: +## - CCFLAGS are compilation flags shared between C and C++ +## - CFLAGS are for C-specific compilation flags +## - CXXFLAGS are for C++-specific compilation flags +## - CPPFLAGS are for pre-processor flags +## - CPPDEFINES are for pre-processor defines +## - LINKFLAGS are for linking flags + +## Check our platform specifics +if env['platform'] == "osx": + env.Append(CXXFLAGS=['-std=c++17']) + if env['target'] == 'debug': + env.Append(CCFLAGS=['-g', '-O2', '-arch', 'x86_64']) + env.Append(LINKFLAGS=['-arch', 'x86_64']) + else: + env.Append(CCFLAGS=['-g', '-O3', '-arch', 'x86_64']) + env.Append(LINKFLAGS=['-arch', 'x86_64']) + +elif env['platform'] == 'x11': + env.Append(LIBS=['dl', 'pthread']) + env.Append(CXXFLAGS=['-std=c++17']) + if env['target'] == 'debug': + env.Append(CCFLAGS=['-fPIC', '-g3', '-Og']) + else: + env.Append(CCFLAGS=['-fPIC', '-g', '-O3']) + +elif env['platform'] == "windows": + env.Append(CXXFLAGS=['/std:c++17', '/bigobj']) + env.Append(CPPDEFINES=['_CRT_SECURE_NO_WARNINGS']) + env.Append(CPPDEFINES=['WIN32', '_WIN32', '_WINDOWS']) + env.Append(CCFLAGS=['-W3', '-GR', '/FS']) + env.Append(LINKFLAGS='-SUBSYSTEM:CONSOLE') + env.Append(LIBS=[]) + + if env['bits'] == '32': env['TARGET_ARCH'] = 'x86' + else: env['TARGET_ARCH'] = 'x86_64' + + if env['target'] == 'debug': + env.Append(CPPDEFINES=['DEBUG']) + env.Append(CCFLAGS=['-EHsc', '-MDd', '-ZI']) + env.Append(LINKFLAGS=['-DEBUG']) + else: + env.Append(CPPDEFINES=['NDEBUG']) + env.Append(CCFLAGS=['-O2', '-EHsc', '-MD']) + +## -------------------------------------------------------------------------------- + +## no_verbose function is from : https://github.com/godotengine/godot/blob/master/methods.py +def no_verbose(sys, env): + colors = {} + # Colors are disabled in non-TTY environments such as pipes. This means + # that if output is redirected to a file, it will not contain color codes + + colors["cyan"] = "\033[96m" if sys.stdout.isatty() else "" + colors["purple"] = "\033[95m" if sys.stdout.isatty() else "" + colors["blue"] = "\033[94m" if sys.stdout.isatty() else "" + colors["green"] = "\033[92m" if sys.stdout.isatty() else "" + colors["yellow"] = "\033[93m" if sys.stdout.isatty() else "" + colors["red"] = "\033[91m" if sys.stdout.isatty() else "" + colors["end"] = "\033[0m" if sys.stdout.isatty() else "" + + compile_source_message = "{}Compiling {}==> {}$SOURCE{}".format( + colors["blue"], colors["purple"], colors["yellow"], colors["end"] + ) + java_compile_source_message = "{}Compiling {}==> {}$SOURCE{}".format( + colors["blue"], colors["purple"], colors["yellow"], colors["end"] + ) + compile_shared_source_message = "{}Compiling shared {}==> {}$SOURCE{}".format( + colors["blue"], colors["purple"], colors["yellow"], colors["end"] + ) + link_program_message = "{}Linking Program {}==> {}$TARGET{}".format( + colors["red"], colors["purple"], colors["yellow"], colors["end"] + ) + link_library_message = "{}Linking Static Library {}==> {}$TARGET{}".format( + colors["red"], colors["purple"], colors["yellow"], colors["end"] + ) + ranlib_library_message = "{}Ranlib Library {}==> {}$TARGET{}".format( + colors["red"], colors["purple"], colors["yellow"], colors["end"] + ) + link_shared_library_message = "{}Linking Shared Library {}==> {}$TARGET{}".format( + colors["red"], colors["purple"], colors["yellow"], colors["end"] + ) + java_library_message = "{}Creating Java Archive {}==> {}$TARGET{}".format( + colors["red"], colors["purple"], colors["yellow"], colors["end"] + ) + env.Append(CXXCOMSTR=[compile_source_message]) + env.Append(CCCOMSTR=[compile_source_message]) + env.Append(SHCCCOMSTR=[compile_shared_source_message]) + env.Append(SHCXXCOMSTR=[compile_shared_source_message]) + env.Append(ARCOMSTR=[link_library_message]) + env.Append(RANLIBCOMSTR=[ranlib_library_message]) + env.Append(SHLINKCOMSTR=[link_shared_library_message]) + env.Append(LINKCOMSTR=[link_program_message]) + env.Append(JARCOMSTR=[java_library_message]) + env.Append(JAVACCOMSTR=[java_compile_source_message]) + +if not env['verbose']: + no_verbose(sys, env) + +Export('env') +env['variant_dir'] = get_variant_dir(env) +SConscript('SConscript', variant_dir=env['variant_dir'], duplicate=0) + +## -------------------------------------------------------------------------------- + +## visual studio targets +def get_vsproj_context(): + targets = [] + variants = [] ## ["debug|Win32", "debug|x64", "release|Win32", "release|x64"] + for target in 'debug', 'release': + for bits in '32', '64': + variants.append(target+'|'+('Win32' if bits=='32' else 'x64')) + targets.append(env.RUN_TARGET) + return variants, targets + + +def recursive_collect(dir, suffix): + ret = [] + for file in os.listdir(dir): + file = os.path.join(dir, file) + if os.path.isfile(file): + for suff in suffix: + if (file.endswith(suff)): + ret.append('$(ProjectDir)' + os.path.relpath(file)) + elif os.path.isdir(file): + ret += recursive_collect(file, suffix) + return ret + +def msvs_collect_header(): + return recursive_collect('.', ('.h', '.hpp')) + +def msvc_collect_sources(): + return recursive_collect('.', ('.c', '.cpp', '.cc', '.cxx')) + +def msvc_build_commandline(commands): + common_build_prefix = [ + 'cmd /V /C set "bits=64"', + '(if "$(PlatformTarget)"=="x86" (set "bits=32"))', + ] + return " ^& ".join(common_build_prefix + [commands]) + + +if env['vsproj']: + env["MSVSBUILDCOM"] = msvc_build_commandline( + "scons --directory=\"$(ProjectDir.TrimEnd('\\'))\" platform=windows target=$(Configuration) bits=!bits!" + ) + env["MSVSREBUILDCOM"] = msvc_build_commandline( + "scons --directory=\"$(ProjectDir.TrimEnd('\\'))\" platform=windows target=$(Configuration) bits=!bits! vsproj=yes" + ) + env["MSVSCLEANCOM"] = msvc_build_commandline( + "scons --directory=\"$(ProjectDir.TrimEnd('\\'))\" --clean platform=windows bits=!bits! target=$(Configuration)" + ) + variants, targets = get_vsproj_context() + env.MSVSProject(target = env.PROJECT_NAME + env['MSVSPROJECTSUFFIX'], + srcs = msvc_collect_sources(), + incs = msvs_collect_header(), + variant = variants, + runfile = targets, + buildtarget = targets, + ) + + +## Generates help for the -h scons option. +Help(opts.GenerateHelpText(env)) \ No newline at end of file diff --git a/include/miniscript.h b/include/miniscript.h new file mode 100644 index 0000000..3940e37 --- /dev/null +++ b/include/miniscript.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2021 Thakee Nathees + * Licensed under: MIT License + */ + +#ifndef MINISCRIPT_H +#define MINISCRIPT_H + +#include +#include + +// The version number macros. +#define MS_VERSION_MAJOR 0 +#define MS_VERSION_MINOR 1 +#define MS_VERSION_PATCH 0 + +// String representation of the value. +#define MS_VERSION_STRING "0.1.0" + +// MiniScript Virtual Machine. +// it'll contain the state of the execution, stack, heap, and manage memory +// allocations. +typedef struct VM VM; + +// C function pointer which is callable from MiniScript. +typedef void (*MiniScriptNativeFn)(VM* vm); + + + +#endif // MINISCRIPT_H diff --git a/src/common.h b/src/common.h new file mode 100644 index 0000000..5d7fa6b --- /dev/null +++ b/src/common.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2021 Thakee Nathees + * Licensed under: MIT License + */ + +#ifndef MS_COMMON_H +#define MS_COMMON_H + +#include +#include +#include +#include +#include + +// miniscript visibility macros. define MS_DLL for using miniscript as a +// shared library and define MS_COMPILE to export symbols. + +#ifdef _MSC_VER + #define _MS_EXPORT __declspec(dllexport) + #define MS_IMPORT __declspec(dllimport) +#elif defined(__GNUC__) + #define _MS_EXPORT __attribute__((visibility ("default"))) + #define _MS_IMPORT +#else + #define _MS_EXPORT + #define _MS_IMPORT +#endif + +#ifdef MS_DLL + #ifdef MS_COMPILE + #define MS_PUBLIC _MS_EXPORT + #else + #define MS_PUBLIC _MS_IMPORT + #endif +#else + #define MS_PUBLIC +#endif + +// Unique number to identify for various cases. +typedef uint32_t ID; + +// Nan-Tagging could be disable for debugging/portability purposes. +// To disable define `VAR_NAN_TAGGING 0`, otherwise it defaults to Nan-Tagging. +#ifndef VAR_NAN_TAGGING + #define VAR_NAN_TAGGING 1 +#endif + +#if VAR_NAN_TAGGING +typedef uint64_t Var; +#else +typedef struct Var Var; +#endif + +typedef struct Object Object; +typedef struct String String; +typedef struct Array Array; +typedef struct Range Range; + +typedef struct Script Script; +//typedef struct Class Class; +typedef struct Function Function; + +#ifdef DEBUG + +#include + +#define ASSERT(condition, message) \ + do { \ + if (!(condition)) { \ + fprintf(stderr, "Assertion failed: %s\n\tat %s() (%s:%i)\n", \ + message, __func__, __FILE__, __LINE__); \ + abort(); \ + } \ + } while (false) + +#define UNREACHABLE() \ + do { \ + fprintf(stderr, "Execution reached an unreachable path\n" \ + "\tat %s() (%s:%i)\n", __FILE__, __LINE__, __func__); \ + abort(); \ + } while (false) + +#else + +#define ASSERT(condition, message) do { } while (false) + +// Reference : https://github.com/wren-lang/ +#if defined( _MSC_VER ) + #define UNREACHABLE() __assume(0) +#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)) + #define UNREACHABLE() __builtin_unreachable() +#else + #define UNREACHABLE() +#endif + +#endif // DEBUG + +// Allocate object of [type] using the vmRealloc function. +#define ALLOCATE(vm, type) \ + ((type*)vmRealloc(vm, NULL, 0, sizeof(type))) + +// Allocate object of [type] which has a dynamic tail array of type [tail_type] +// with [count] entries. +#define ALLOCATE_DYNAMIC(vm, type, count, tail_type) \ + ((type*)vmRealloc(vm, NULL, 0, sizeof(type) + sizeof(tail_type) * (count))) + +#endif //MS_COMMON_H diff --git a/src/compiler.c b/src/compiler.c new file mode 100644 index 0000000..93edd2e --- /dev/null +++ b/src/compiler.c @@ -0,0 +1,945 @@ +/* + * Copyright (c) 2021 Thakee Nathees + * Licensed under: MIT License + */ + +#include "compiler.h" + +#include "types/name_table.h" +#include "types/gen/byte_buffer.h" +#include "utils.h" +#include "vm.h" + +// The maximum number of variables (or global if compiling top level script) +// to lookup from the compiling context. Also it's limited by it's opcode +// which is using a single byte value to identify the local. +#define MAX_VARIABLES 256 + +typedef enum { + + TK_ERROR = 0, + TK_EOF, + TK_LINE, + + // symbols + TK_DOT, // . + TK_DOTDOT, // .. + TK_COMMA, // , + TK_COLLON, // : + TK_SEMICOLLON, // ; + TK_HASH, // # + TK_LPARAN, // ( + TK_RPARAN, // ) + TK_LBRACKET, // [ + TK_RBRACKET, // ] + TK_LBRACE, // { + TK_RBRACE, // } + TK_PERCENT, // % + + TK_TILD, // ~ + TK_AMP, // & + TK_PIPE, // | + TK_CARET, // ^ + + TK_PLUS, // + + TK_MINUS, // - + TK_STAR, // * + TK_FSLASH, // / + TK_BSLASH, // \. + TK_EQ, // = + TK_GT, // > + TK_LT, // < + //TK_BANG, // ! parsed as TK_NOT + + TK_EQEQ, // == + TK_NOTEQ, // != + TK_GTEQ, // >= + TK_LTEQ, // <= + + TK_PLUSEQ, // += + TK_MINUSEQ, // -= + TK_STAREQ, // *= + TK_DIVEQ, // /= + TK_SRIGHT, // >> + TK_SLEFT, // << + + //TODO: + // >>= <<= + //TK_PLUSPLUS, // ++ + //TK_MINUSMINUS, // -- + //TK_MODEQ, // %= + //TK_XOREQ, // ^= + + // Keywords. + //TK_TYPE, // type + TK_IMPORT, // import + TK_ENUM, // enum + TK_DEF, // def + TK_NATIVE, // native (C function declaration) + TK_END, // end + + TK_NULL, // null + TK_SELF, // self + TK_IS, // is + TK_IN, // in + TK_AND, // and + TK_OR, // or + TK_NOT, // not + TK_TRUE, // true + TK_FALSE, // false + + // Type names for is test. + // TK_NULL already defined. + TK_BOOL_T, // Bool + TK_NUM_T, // Num + TK_STRING_T, // String + TK_ARRAY_T, // Array + TK_MAP_T, // Map + TK_RANGE_T, // Range + TK_FUNC_T, // Function + TK_OBJ_T, // Object (self, user data, etc.) + + TK_DO, // do + TK_WHILE, // while + TK_FOR, // for + TK_IF, // if + TK_ELIF, // elif + TK_ELSE, // else + TK_BREAK, // break + TK_CONTINUE, // continue + TK_RETURN, // return + + TK_NAME, // identifier + + TK_NUMBER, // number literal + TK_STRING, // string literal + + /* String interpolation (reference wren-lang) + * but it doesn't support recursive ex: "a \(b + "\(c)")" + * "a \(b) c \(d) e" + * tokenized as: + * TK_STR_INTERP "a " + * TK_NAME b + * TK_STR_INTERP " c " + * TK_NAME d + * TK_STRING " e" */ + // TK_STR_INTERP, //< not yet. + +} TokenType; + +typedef struct { + TokenType type; + + const char* start; //< Begining of the token in the source. + int length; //< Number of chars of the token. + int line; //< Line number of the token (1 based). + Var value; //< Literal value of the token. +} Token; + +typedef struct { + const char* identifier; + int length; + TokenType tk_type; +} _Keyword; + +// List of keywords mapped into their identifiers. +static _Keyword _keywords[] = { + //{ "type", 4, TK_TYPE }, + { "import", 6, TK_IMPORT }, + { "enum", 4, TK_ENUM }, + { "def", 3, TK_DEF }, + { "native", 6, TK_NATIVE }, + { "end", 3, TK_END }, + { "null", 4, TK_NULL }, + { "self", 4, TK_SELF }, + { "is", 2, TK_IS }, + { "in", 2, TK_IN }, + { "and", 3, TK_AND }, + { "or", 2, TK_OR }, + { "not", 3, TK_NOT }, + { "true", 4, TK_TRUE }, + { "false", 5, TK_FALSE }, + { "do", 2, TK_DO }, + { "while", 5, TK_WHILE }, + { "for", 3, TK_FOR }, + { "if", 2, TK_IF }, + { "elif", 4, TK_ELIF }, + { "else", 4, TK_ELSE }, + { "break", 5, TK_BREAK }, + { "continue", 8, TK_CONTINUE }, + { "return", 6, TK_RETURN }, + + // Type names. + { "Bool", 4, TK_BOOL_T }, + { "Num", 3, TK_NUM_T }, + { "String", 6, TK_STRING_T }, + { "Array", 5, TK_ARRAY_T }, + { "Map", 3, TK_MAP_T }, + { "Range", 5, TK_RANGE_T }, + { "Object", 6, TK_OBJ_T }, + { "Function", 8, TK_FUNC_T }, + + { NULL, (TokenType)(0) }, // Sentinal to mark the end of the array +}; + +typedef struct { + VM* vm; //< Owner of the parser (for reporting errors, etc). + + const char* source; //< Currently compiled source. + + const char* token_start; //< Start of the currently parsed token. + const char* current_char; //< Current char position in the source. + int current_line; //< Line number of the current char. + + Token previous, current, next; //< Currently parsed tokens. + + bool has_errors; //< True if any syntex error occured at compile time. +} Parser; + +// Compiler Types //////////////////////////////////////////////////////////// + +// Precedence parsing references: +// https://en.wikipedia.org/wiki/Shunting-yard_algorithm +// TODO: I should explicitly state wren-lang as a reference "globaly". + +typedef enum { + PREC_NONE, + PREC_LOWEST, + PREC_ASSIGNMENT, // = + PREC_LOGICAL_OR, // or + PREC_LOGICAL_AND, // and + PREC_LOGICAL_NOT, // not + PREC_EQUALITY, // == != + PREC_IN, // in + PREC_IS, // is + PREC_COMPARISION, // < > <= >= + PREC_BITWISE_OR, // | + PREC_BITWISE_XOR, // ^ + PREC_BITWISE_AND, // & + PREC_BITWISE_SHIFT, // << >> + PREC_RANGE, // .. + PREC_TERM, // + - + PREC_FACTOR, // * / % + PREC_UNARY, // - ! ~ + PREC_CALL, // () + PREC_SUBSCRIPT, // [] + PREC_ATTRIB, // .index + PREC_PRIMARY, +} Precedence; + +typedef void (*GrammarFn)(Compiler* compiler, bool can_assign); + +typedef struct { + GrammarFn prefix; + GrammarFn infix; + Precedence precedence; +} GrammarRule; + +typedef struct { + const char* name; //< Directly points into the source string. + int length; //< Length of the name. + int depth; //< The depth the local is defined in. (-1 means global) +} Variable; + +typedef struct sLoop { + + // Index of the loop's start instruction where the execution will jump + // back to once it reach the loop end. + int start; + + // Index of the jump out address instruction to patch it's value once done + // compiling the loop. + int exit_jump; + + // Index of the first body instruction. Needed to start patching jump + // address from which till the loop end. + int body; + + // The outer loop of the current loop used to set and reset the compiler's + // current loop context. + struct sLoop* outer_loop; + +} Loop; + +struct Compiler { + + VM* vm; + Parser parser; + + // Current depth the compiler in (-1 means top level) 0 means function + // level and > 0 is inner scope. + int scope_depth; + + Variable variables[MAX_VARIABLES]; //< Variables in the current context. + int var_count; //< Number of locals in [variables]. + + // TODO: compiler should mark Script* below not to be garbage collected. + + Script* script; //< Current script. + Loop* loop; //< Current loop. + Function* fn; //< Current function. +}; + +/***************************************************************************** + * LEXING * + *****************************************************************************/ + +// Forward declaration of lexer methods. + +static char eatChar(Parser* parser); +static void setNextValueToken(Parser* parser, TokenType type, Var value); +static void setNextToken(Parser* parser, TokenType type); +static bool matchChar(Parser* parser, char c); +static bool matchLine(Parser* parser); + +static void eatString(Parser* parser) { + ByteBuffer buff; + byteBufferInit(&buff); + + while (true) { + char c = eatChar(parser); + + if (c == '"') break; + + if (c == '\0') { + // TODO: syntaxError() + + // Null byte is required by TK_EOF. + parser->current_char--; + break; + } + + if (c == '\\') { + switch (eatChar(parser)) { + case '"': byteBufferWrite(&buff, parser->vm, '"'); break; + case '\\': byteBufferWrite(&buff, parser->vm, '\\'); break; + case 'n': byteBufferWrite(&buff, parser->vm, '\n'); break; + case 'r': byteBufferWrite(&buff, parser->vm, '\r'); break; + case 't': byteBufferWrite(&buff, parser->vm, '\t'); break; + + default: + // TODO: syntaxError("Error: invalid escape character") + break; + } + } else { + byteBufferWrite(&buff, parser->vm, c); + } + } + + // '\0' will be added by varNewSring(); + Var string = VAR_OBJ(&newString(parser->vm, (const char*)buff.data, + (uint32_t)buff.count)->_super); + + byteBufferClear(&buff, parser->vm); + + setNextValueToken(parser, TK_STRING, string); +} + +// Returns the current char of the parser on. +static char peekChar(Parser* parser) { + return *parser->current_char; +} + +// Returns the next char of the parser on. +static char peekNextChar(Parser* parser) { + if (peekChar(parser) == '\0') return '\0'; + return *(parser->current_char + 1); +} + +// Advance the parser by 1 char. +static char eatChar(Parser* parser) { + char c = peekChar(parser); + parser->current_char++; + if (c == '\n') parser->current_line++; + return c; +} + +// Complete lexing an identifier name. +static void eatName(Parser* parser) { + + char c = peekChar(parser); + while (utilIsName(c) || utilIsDigit(c)) { + eatChar(parser); + c = peekChar(parser); + } + + const char* name_start = parser->token_start; + + TokenType type = TK_NAME; + + int length = (int)(parser->current_char - name_start); + for (int i = 0; _keywords[i].identifier != NULL; i++) { + if (_keywords[i].length == length && + strncmp(name_start, _keywords[i].identifier, length) == 0) { + type = _keywords[i].tk_type; + break; + } + } + + setNextToken(parser, type); +} + +// Complete lexing a number literal. +static void eatNumber(Parser* parser) { + + // TODO: hex, binary and scientific literals. + + while (utilIsDigit(peekChar(parser))) + eatChar(parser); + + if (matchChar(parser, '.')) { + while (utilIsDigit(peekChar(parser))) + eatChar(parser); + } + + Var value = VAR_NUM(strtod(parser->token_start, NULL)); + setNextValueToken(parser, TK_NUMBER, value); +} + +// Read and ignore chars till it reach new line or EOF. +static void skipLineComment(Parser* parser) { + char c = eatChar(parser); + + while (c != '\n' && c != '\0') { + c = eatChar(parser); + } +} + +// Will skip multiple new lines. +static void skipNewLines(Parser* parser) { + matchLine(parser); +} + +// If the current char is [c] consume it and advance char by 1 and returns +// true otherwise returns false. +static bool matchChar(Parser* parser, char c) { + if (peekChar(parser) != c) return false; + eatChar(parser); + return true; +} + +// If the current char is [c] eat the char and add token two otherwise eat +// append token one. +static void setNextTwoCharToken(Parser* parser, char c, TokenType one, + TokenType two) { + if (matchChar(parser, c)) { + setNextToken(parser, two); + } else { + setNextToken(parser, one); + } +} + +// Initialize the next token as the type. +static void setNextToken(Parser* parser, TokenType type) { + parser->next.type = type; + parser->next.start = parser->token_start; + parser->next.length = (int)(parser->current_char - parser->token_start); + parser->next.line = parser->current_line - ((type == TK_LINE) ? 1 : 0); +} + +// Initialize the next token as the type and assign the value. +static void setNextValueToken(Parser* parser, TokenType type, Var value) { + setNextToken(parser, type); + parser->next.value = value; +} + +// Lex the next token and set it as the next token. +static void lexToken(Parser* parser) { + parser->previous = parser->current; + parser->current = parser->next; + + if (parser->current.type == TK_EOF) return; + + while (peekChar(parser) != '\0') { + parser->token_start = parser->current_char; + char c = eatChar(parser); + + switch (c) { + case ',': setNextToken(parser, TK_COMMA); return; + case ':': setNextToken(parser, TK_COLLON); return; + case ';': setNextToken(parser, TK_SEMICOLLON); return; + case '#': setNextToken(parser, TK_HASH); return; + case '(': setNextToken(parser, TK_LPARAN); return; + case ')': setNextToken(parser, TK_RPARAN); return; + case '[': setNextToken(parser, TK_LBRACKET); return; + case ']': setNextToken(parser, TK_RBRACKET); return; + case '{': setNextToken(parser, TK_LBRACE); return; + case '}': setNextToken(parser, TK_RBRACE); return; + case '%': setNextToken(parser, TK_PERCENT); return; + + case '~': setNextToken(parser, TK_TILD); return; + case '&': setNextToken(parser, TK_AMP); return; + case '|': setNextToken(parser, TK_PIPE); return; + case '^': setNextToken(parser, TK_CARET); return; + + case '\n': setNextToken(parser, TK_LINE); return; + + case ' ': + case '\t': + case '\r': { + char c = peekChar(parser); + while (c == ' ' || c == '\t' || c == '\r') { + eatChar(parser); + c = peekChar(parser); + } + break; + } + + case '.': // TODO: ".5" should be a valid number. + setNextTwoCharToken(parser, '.', TK_DOT, TK_DOTDOT); + return; + + case '=': + setNextTwoCharToken(parser, '=', TK_EQ, TK_EQEQ); + return; + + case '!': + setNextTwoCharToken(parser, '=', TK_NOT, TK_NOTEQ); + return; + + case '>': + if (matchChar(parser, '>')) + setNextToken(parser, TK_SRIGHT); + else + setNextTwoCharToken(parser, '=', TK_GT, TK_GTEQ); + return; + + case '<': + if (matchChar(parser, '<')) + setNextToken(parser, TK_SLEFT); + else + setNextTwoCharToken(parser, '=', TK_LT, TK_LTEQ); + return; + + case '+': + setNextTwoCharToken(parser, '=', TK_PLUS, TK_PLUSEQ); + return; + + case '-': + setNextTwoCharToken(parser, '=', TK_MINUS, TK_MINUSEQ); + return; + + case '*': + setNextTwoCharToken(parser, '=', TK_STAR, TK_STAREQ); + return; + + case '/': + setNextTwoCharToken(parser, '=', TK_FSLASH, TK_DIVEQ); + return; + + case '"': eatString(parser); return; + + default: { + + if (utilIsDigit(c)) { + eatNumber(parser); + } else if (utilIsName(c)) { + eatName(parser); + } else { + if (c >= 32 && c <= 126) { + // TODO: syntaxError("Invalid character %c", c); + } else { + // TODO: syntaxError("Invalid byte 0x%x", (uint8_t)c); + } + setNextToken(parser, TK_ERROR); + } + return; + } + } + } + + setNextToken(parser, TK_EOF); + parser->next.start = parser->current_char; +} + +/***************************************************************************** + * PARSING * + *****************************************************************************/ + + // Initialize the parser. +static void parserInit(Parser* self, VM* vm, const char* source) { + self->vm = vm; + self->source = source; + self->token_start = source; + self->current_char = source; + self->current_line = 1; + self->has_errors = false; + + self->next.type = TK_ERROR; + self->next.start = NULL; + self->next.length = 0; + self->next.line = 1; + self->next.value = VAR_UNDEFINED; +} + +// Returns current token type. +static TokenType peek(Parser* self) { + return self->current.type; +} + +// Returns next token type. +static TokenType peekNext(Parser* self) { + return self->next.type; +} + +// Consume the current token if it's expected and lex for the next token +// and return true otherwise reutrn false. It'll skips all the new lines +// inbetween thus matching TK_LINE is invalid. +static bool match(Parser* self, TokenType expected) { + ASSERT(expected != TK_LINE, "Can't match TK_LINE."); + matchLine(self); + + if (peek(self) != expected) return false; + lexToken(self); + return true; +} + +// Match one or more lines and return true if there any. +static bool matchLine(Parser* parser) { + if (peek(parser) != TK_LINE) return false; + while (peek(parser) == TK_LINE) + lexToken(parser); + return true; +} + +// Match semi collon or multiple new lines. +static void matchEndStatement(Parser* parser) { + + // Semi collon must be on the same line. + if (peek(parser) == TK_SEMICOLLON) + match(parser, TK_SEMICOLLON); + + matchLine(parser); +} + +// Match optional "do" keyword and new lines. +static void matchStartBlock(Parser* parser) { + + // "do" must be on the same line. + if (peek(parser) == TK_DO) + match(parser, TK_DO); + + matchLine(parser); +} + +// Consume the the current token and if it's not [expected] emits error log +// and continue parsing for more error logs. It'll skips all the new lines +// inbetween thus matching TK_LINE is invald. +static void consume(Parser* self, TokenType expected, const char* err_msg) { + ASSERT(expected != TK_LINE, "Can't match TK_LINE."); + matchLine(self); + + lexToken(self); + if (self->previous.type != expected) { + // TODO: syntaxError(err_msg); + + // If the next token is expected discard the current to minimize + // cascaded errors and continue parsing. + if (peek(self) == expected) { + lexToken(self); + } + } +} + +/***************************************************************************** + * PARSING GRAMMAR * + *****************************************************************************/ + +// Forward declaration of grammar functions. + +static void exprAssignment(Compiler* compiler, bool can_assign); + +// Bool, Num, String, Null, -and- bool_t, Array_t, String_t, ... +static void exprLiteral(Compiler* compiler, bool can_assign); +static void exprName(Compiler* compiler, bool can_assign); + + +static void exprBinaryOp(Compiler* compiler, bool can_assign); +static void exprUnaryOp(Compiler* compiler, bool can_assign); + +static void exprGrouping(Compiler* compiler, bool can_assign); +static void exprArray(Compiler* compiler, bool can_assign); +static void exprMap(Compiler* compiler, bool can_assign); + +static void exprCall(Compiler* compiler, bool can_assign); +static void exprAttrib(Compiler* compiler, bool can_assign); +static void exprSubscript(Compiler* compiler, bool can_assign); + +#define NO_RULE { NULL, NULL, PREC_NONE } +#define NO_INFIX PREC_NONE + +GrammarRule rules[] = { // Prefix Infix Infix Precedence + /* TK_ERROR */ NO_RULE, + /* TK_EOF */ NO_RULE, + /* TK_LINE */ NO_RULE, + /* TK_DOT */ { exprAttrib, NULL, PREC_ATTRIB }, + /* TK_DOTDOT */ { NULL, exprBinaryOp, PREC_RANGE }, + /* TK_COMMA */ NO_RULE, + /* TK_COLLON */ NO_RULE, + /* TK_SEMICOLLON */ NO_RULE, + /* TK_HASH */ NO_RULE, + /* TK_LPARAN */ { exprGrouping, exprCall, PREC_CALL }, + /* TK_RPARAN */ NO_RULE, + /* TK_LBRACKET */ { exprArray, exprSubscript, PREC_SUBSCRIPT }, + /* TK_RBRACKET */ NO_RULE, + /* TK_LBRACE */ { exprMap, NULL, NO_INFIX }, + /* TK_RBRACE */ NO_RULE, + /* TK_PERCENT */ { NULL, exprBinaryOp, PREC_FACTOR }, + /* TK_TILD */ { exprUnaryOp, NULL, NO_INFIX }, + /* TK_AMP */ { NULL, exprBinaryOp, PREC_BITWISE_AND }, + /* TK_PIPE */ { NULL, exprBinaryOp, PREC_BITWISE_OR }, + /* TK_CARET */ { NULL, exprBinaryOp, PREC_BITWISE_XOR }, + /* TK_PLUS */ { NULL, exprBinaryOp, PREC_TERM }, + /* TK_MINUS */ { NULL, exprBinaryOp, PREC_TERM }, + /* TK_STAR */ { NULL, exprBinaryOp, PREC_FACTOR }, + /* TK_FSLASH */ { NULL, exprBinaryOp, PREC_FACTOR }, + /* TK_BSLASH */ NO_RULE, + /* TK_EQ */ { NULL, exprAssignment, PREC_ASSIGNMENT }, + /* TK_GT */ { NULL, exprBinaryOp, PREC_COMPARISION }, + /* TK_LT */ { NULL, exprBinaryOp, PREC_COMPARISION }, + /* TK_EQEQ */ { NULL, exprBinaryOp, PREC_EQUALITY }, + /* TK_NOTEQ */ { NULL, exprBinaryOp, PREC_EQUALITY }, + /* TK_GTEQ */ { NULL, exprBinaryOp, PREC_COMPARISION }, + /* TK_LTEQ */ { NULL, exprBinaryOp, PREC_COMPARISION }, + /* TK_PLUSEQ */ { NULL, exprAssignment, PREC_ASSIGNMENT }, + /* TK_MINUSEQ */ { NULL, exprAssignment, PREC_ASSIGNMENT }, + /* TK_STAREQ */ { NULL, exprAssignment, PREC_ASSIGNMENT }, + /* TK_DIVEQ */ { NULL, exprAssignment, PREC_ASSIGNMENT }, + /* TK_SRIGHT */ { NULL, exprBinaryOp, PREC_BITWISE_SHIFT }, + /* TK_SLEFT */ { NULL, exprBinaryOp, PREC_BITWISE_SHIFT }, + /* TK_IMPORT */ NO_RULE, + /* TK_ENUM */ NO_RULE, + /* TK_DEF */ NO_RULE, + /* TK_EXTERN */ NO_RULE, + /* TK_END */ NO_RULE, + /* TK_NULL */ NO_RULE, + /* TK_SELF */ NO_RULE, + /* TK_IS */ { NULL, exprBinaryOp, PREC_IS }, + /* TK_IN */ { NULL, exprBinaryOp, PREC_IN }, + /* TK_AND */ { NULL, exprBinaryOp, PREC_LOGICAL_AND }, + /* TK_OR */ { NULL, exprBinaryOp, PREC_LOGICAL_OR }, + /* TK_NOT */ { NULL, exprUnaryOp, PREC_LOGICAL_NOT }, + /* TK_TRUE */ { exprLiteral, NULL, NO_INFIX }, + /* TK_FALSE */ { exprLiteral, NULL, NO_INFIX }, + /* TK_BOOL_T */ { exprLiteral, NULL, NO_INFIX }, + /* TK_NUM_T */ { exprLiteral, NULL, NO_INFIX }, + /* TK_STRING_T */ { exprLiteral, NULL, NO_INFIX }, + /* TK_ARRAY_T */ { exprLiteral, NULL, NO_INFIX }, + /* TK_MAP_T */ { exprLiteral, NULL, NO_INFIX }, + /* TK_RANGE_T */ { exprLiteral, NULL, NO_INFIX }, + /* TK_FUNC_T */ { exprLiteral, NULL, NO_INFIX }, + /* TK_OBJ_T */ { exprLiteral, NULL, NO_INFIX }, + /* TK_DO */ NO_RULE, + /* TK_WHILE */ NO_RULE, + /* TK_FOR */ NO_RULE, + /* TK_IF */ NO_RULE, + /* TK_ELIF */ NO_RULE, + /* TK_ELSE */ NO_RULE, + /* TK_BREAK */ NO_RULE, + /* TK_CONTINUE */ NO_RULE, + /* TK_RETURN */ NO_RULE, + /* TK_NAME */ { exprName, NULL, NO_INFIX }, + /* TK_NUMBER */ { exprLiteral, NULL, NO_INFIX }, + /* TK_STRING */ { exprLiteral, NULL, NO_INFIX }, +}; + +static GrammarRule* getRule(TokenType type) { + return &(rules[(int)type]); +} + +static void exprAssignment(Compiler* compiler, bool can_assign) { /*TODO*/ } + +static void exprLiteral(Compiler* compiler, bool can_assign) { /*TODO*/ } +static void exprName(Compiler* compiler, bool can_assign) { /*TODO*/ } + + +static void exprBinaryOp(Compiler* compiler, bool can_assign) { /*TODO*/ } +static void exprUnaryOp(Compiler* compiler, bool can_assign) { /*TODO*/ } + +static void exprGrouping(Compiler* compiler, bool can_assign) { /*TODO*/ } +static void exprArray(Compiler* compiler, bool can_assign) { /*TODO*/ } +static void exprMap(Compiler* compiler, bool can_assign) { /*TODO*/ } + +static void exprCall(Compiler* compiler, bool can_assign) { /*TODO*/ } +static void exprAttrib(Compiler* compiler, bool can_assign) { /*TODO*/ } +static void exprSubscript(Compiler* compiler, bool can_assign) { /*TODO*/ } + + +/***************************************************************************** + * COMPILING * + *****************************************************************************/ + +// Used in searching for local variables. +typedef enum { + SCOPE_ANY = -3, + SCOPE_CURRENT, +} ScopeType; + +// Result type for an identifier definition. +typedef enum { + NAME_NOT_DEFINED, + NAME_LOCAL_VAR, //< Including parameter. + NAME_GLOBAL_VAR, + NAME_SCRIPT_FN, +} NameDefnType; + +// Identifier search result. +typedef struct { + + NameDefnType type; + + // Could be found in one of the imported script or in it's imported script + // recursively. If true [_extern] will be the script ID. + bool is_extern; + + // Extern script's ID. + ID _extern; + + union { + int local; + int global; + int func; + } index; + +} NameSearchResult; + +static void compilerInit(Compiler* compiler, VM* vm, const char* source) { + parserInit(&compiler->parser, vm, source); + compiler->vm = vm; + compiler->scope_depth = -1; + compiler->var_count = 0; + Loop* loop = NULL; + Function* fn = NULL; +} + +// Search for the name through compiler's variables. Returns -1 if not found. +static int compilerSearchVariables(Compiler* compiler, const char* name, + int length, ScopeType scope) { + + for (int i = 0; i < compiler->var_count; i++) { + Variable* variable = &compiler->variables[i]; + if (scope == SCOPE_CURRENT && + compiler->scope_depth != variable->depth) { + continue; + } + if (variable->length == length && + strncmp(variable->name, name, length) == 0) { + return i; + } + } + + return -1; +} + +// Will check if the name already defined. +static NameSearchResult compilerSearchName(Compiler* compiler, + const char* name, int length) { + // TODO: + NameSearchResult result; + result.type = NAME_NOT_DEFINED; + return result; +} + +// Add a variable and return it's index to the context. Assumes that the +// variable name is unique and not defined before in the current scope. +static int compilerAddVariable(Compiler* compiler, const char* name, + int length) { + Variable* variable = &compiler->variables[compiler->var_count]; + variable->name = name; + variable->length = length; + variable->depth = compiler->scope_depth; + return compiler->var_count++; +} + +static void compileFunction(Compiler* compiler, bool is_native) { + + Parser* parser = &compiler->parser; + + consume(&compiler->parser, TK_NAME, "Expected a function name."); + + const char* name_start = parser->previous.start; + int name_length = parser->previous.length; + NameSearchResult result = compilerSearchName(compiler, name_start, + name_length); + + if (result.type != NAME_NOT_DEFINED) { + // TODO: multiple definition error(); + } + + int index = nameTableAdd(&compiler->script->function_names, compiler->vm, + name_start, name_length); + + Function* func = newFunction(compiler->vm, nameTableGet( + &compiler->script->function_names, index), compiler->script, is_native); + + vmPushTempRef(compiler->vm, &func->_super); + functionBufferWrite(&compiler->script->functions, compiler->vm, func); + vmPopTempRef(compiler->vm); + + compiler->fn = func; + + consume(parser, TK_LPARAN, "Expected '(' after function name."); + + compiler->scope_depth++; // Parameter scope. + + // Compile parameter list. + while (match(parser, TK_NAME)) { + int predef = compilerSearchVariables(compiler, parser->previous.start, + parser->previous.length, SCOPE_CURRENT); + if (predef != -1) { + // TODO: error("Multiple definition of a parameter"); + } + match(parser, TK_COMMA); + } + + consume(parser, TK_RPARAN, "Expected ')' after parameters end."); + matchEndStatement(parser); + + if (is_native) { // Done here. + compiler->scope_depth--; // Parameter scope. + compiler->fn = NULL; + return; + } + + // TODO: Compile body. + + compiler->scope_depth--; // Parameter scope. + compiler->fn = NULL; +} + +Script* compileSource(VM* vm, const char* source) { + + // Skip utf8 BOM if there is any. + if (strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3; + + Compiler compiler; + compilerInit(&compiler, vm, source); + + Script* script = newScript(vm); + compiler.script = script; + + // Parser pointer for quick access. + Parser* parser = &compiler.parser; + + // Lex initial tokens. current <-- next. + lexToken(parser); + lexToken(parser); + skipNewLines(parser); + + while (!match(parser, TK_EOF)) { + + if (match(parser, TK_NATIVE)) { + compileFunction(&compiler, true); + + } else if (match(parser, TK_DEF)) { + compileFunction(&compiler, false); + + } else if (match(parser, TK_IMPORT)) { + // TODO: + + } else { + // name = value # Variable defn. + // name() # statement + } + } +} diff --git a/src/compiler.h b/src/compiler.h new file mode 100644 index 0000000..80b3188 --- /dev/null +++ b/src/compiler.h @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2021 Thakee Nathees + * Licensed under: MIT License + */ + +#ifndef COMPILER_H +#define COMPILER_H + +#include "common.h" +#include "var.h" + +typedef struct Compiler Compiler; + +Script* compileSource(VM* vm, const char* source); + + +#endif // COMPILER_H diff --git a/src/types/.gitignore b/src/types/.gitignore new file mode 100644 index 0000000..555baf5 --- /dev/null +++ b/src/types/.gitignore @@ -0,0 +1,6 @@ + +## Ignore all generated source files. +*.gen.h +*.gen.c +gen/ +*.bat \ No newline at end of file diff --git a/src/types/buffer.template.c b/src/types/buffer.template.c new file mode 100644 index 0000000..9eeb88c --- /dev/null +++ b/src/types/buffer.template.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021 Thakee Nathees + * Licensed under: MIT License + */ + + /** A template header to emulate C++ template and every occurence of + * $name$ will be replaced by the name of the buffer and $type$ will be + * replaced by the element type of the buffer (by a pre compile script) */ + +// Replace the following line with "$name$_buffer.h" +#include "buffer.template.h" +#include "../utils.h" +#include "../vm.h" + +void $name_l$BufferInit($name$Buffer* self) { + self->data = NULL; + self->count = 0; + self->capacity = 0; +} + +void $name_l$BufferClear($name$Buffer* self, VM* vm) { + vmRealloc(vm, self->data, self->capacity * sizeof($type$), 0); + self->data = NULL; + self->count = 0; + self->capacity = 0; +} + +void $name_l$BufferFill($name$Buffer* self, VM* vm, $type$ data, int count) { + + if (self->capacity < self->count + count) { + int capacity = utilPowerOf2Ceil((int)self->count + count); + self->data = ($type$*)vmRealloc(vm, self->data, + self->capacity * sizeof($type$), capacity * sizeof($type$)); + self->capacity = capacity; + } + + for (int i = 0; i < count; i++) { + self->data[self->count++] = data; + } +} + +void $name_l$BufferWrite($name$Buffer* self, VM* vm, $type$ data) { + $name_l$BufferFill(self, vm, data, 1); +} diff --git a/src/types/buffer.template.h b/src/types/buffer.template.h new file mode 100644 index 0000000..1c8a834 --- /dev/null +++ b/src/types/buffer.template.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Thakee Nathees + * Licensed under: MIT License + */ + +/** A template header to emulate C++ template and every occurence of + * $name$ will be replaced by the name of the buffer and $type$ will be + * replaced by the element type of the buffer (by a pre compile script) */ + +#ifndef $name_u$_BUFFER_H +#define $name_u$_BUFFER_H + +#include "../common.h" +#include "miniscript.h" + +// The factor by which the buffer will grow when it's capacity reached. +#define GROW_FACTOR 2 + +// The initial capacity of the buffer. +#define MIN_CAPACITY 16 + +// A place holder typedef to prevent IDE syntax errors. Remove this line +// when generating the source. +typedef uint8_t $type$; + +typedef struct { + $type$* data; + size_t count; + size_t capacity; +} $name$Buffer; + +// Initialize a new buffer int instance. +void $name_l$BufferInit($name$Buffer* self); + +// Clears the allocated elementes from the VM's realloc function. +void $name_l$BufferClear($name$Buffer* self, VM* vm); + +// Fill the buffer at the end of it with provided data if the capacity isn't +// enough using VM's realloc function. +void $name_l$BufferFill($name$Buffer* self, VM* vm, $type$ data, int count); + +// Write to the buffer with provided data at the end of the buffer. +void $name_l$BufferWrite($name$Buffer* self, VM* vm, $type$ data); + +#endif // $name_u$_BUFFER_H diff --git a/src/types/buffergen.py b/src/types/buffergen.py new file mode 100644 index 0000000..64b1836 --- /dev/null +++ b/src/types/buffergen.py @@ -0,0 +1,101 @@ +from pathlib import Path ## python 3.4 +import shutil +import os, sys + +## usage buffergen.py [--clean] + +SCRIPT_PATH = Path(os.path.realpath(__file__)) +ROOT = str(SCRIPT_PATH.parent) + +GEN_LIST = [ + ## name type + ('Int', 'int'), + ('Byte', 'uint8_t'), + ('Var', 'Var'), + ('String', 'String*'), + ('Function', 'Function*'), +] + +def log(msg): + print('[buffergen.py]', msg) + +def gen(): + cwd = os.getcwd() + os.chdir(ROOT) + _gen() + os.chdir(cwd) + return 0 + +def clean(): + cwd = os.getcwd() + os.chdir(ROOT) + _clean() + os.chdir(cwd) + return 0 + +def _replace(text, _data): + text = text.replace('$name$', _data[0]) + text = text.replace('$name_l$', _data[0].lower()) + text = text.replace('$name_u$', _data[0].upper()) + text = text.replace('$type$', _data[1]) + + ## Fix relative imports. + text = text.replace('../vm.h', '../../vm.h') + text = text.replace('../utils.h', '../../utils.h') + text = text.replace('../common.h', '../../common.h') + + return text + +def _gen(): + + header = '' + source = '' + with open('buffer.template.h', 'r') as f: + header = f.read() + with open('buffer.template.c', 'r') as f: + source = f.read() + + for _data in GEN_LIST: + _header = header.replace('''\ +// A place holder typedef to prevent IDE syntax errors. Remove this line +// when generating the source. +typedef uint8_t $type$; +''', '') + _header = _replace(_header, _data) + + _source = source.replace('''\ +// Replace the following line with "$name$_buffer.h" +#include "buffer.template.h"''', '#include "%s_buffer.h"' % _data[0].lower()) + _source = _replace(_source, _data) + + if not os.path.exists('gen/'): + os.mkdir('gen/') + + with open('gen/' + _data[0].lower() + '_buffer.h', 'w') as f: + f.write(_header) + log(_data[0].lower() + '_buffer.h' + ' generated' ) + with open('gen/' + _data[0].lower() + '_buffer.c', 'w') as f: + f.write(_source) + log(_data[0].lower() + '_buffer.c' + ' generated' ) + + +def _clean(): + shutil.rmtree('gen/') + log("Buffer source files removed") + + +def error_exit(msg): + print("Error: %s\n\tusage buffergen.py [--clean]" % msg) + exit(1) + +if __name__ == '__main__': + if len(sys.argv) > 2: + error_exit("invalid arg count") + if len(sys.argv) == 2: + if sys.argv[1] == '--clean': + _clean() + else: + error_exit("unknown argument") + else: + _gen() + exit(0) \ No newline at end of file diff --git a/src/types/name_table.c b/src/types/name_table.c new file mode 100644 index 0000000..09f9af9 --- /dev/null +++ b/src/types/name_table.c @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2021 Thakee Nathees + * Licensed under: MIT License + */ + +#include "name_table.h" +#include "../var.h" +#include "../vm.h" + +void nameTableInit(NameTable* self) { + stringBufferInit(self); +} + +void nameTableClear(NameTable* self, VM* vm) { + stringBufferClear(self, vm); +} + +int nameTableAdd(NameTable* self, VM* vm, const char* name, size_t length) { + String* string = newString(vm, name, (uint32_t)length); + + vmPushTempRef(vm, &string->_super); + stringBufferWrite(self, vm, string); + vmPopTempRef(vm); + + return (int)(self->count - 1); +} + +const char* nameTableGet(NameTable* self, int index) { + ASSERT(0 <= index && index < self->count, "Index out of bounds."); + return self->data[index]->data; +} diff --git a/src/types/name_table.h b/src/types/name_table.h new file mode 100644 index 0000000..99d6537 --- /dev/null +++ b/src/types/name_table.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2021 Thakee Nathees + * Licensed under: MIT License + */ + +#ifndef SYMBOL_TABLE_H +#define SYMBOL_TABLE_H + +// Symbol table maps the names to it's member indecies in the VarBuffer. +#include "gen/string_buffer.h" + +// TODO: Change this to use Map. +typedef StringBuffer NameTable; + +// Initialize the symbol table. +void nameTableInit(NameTable* self); + +// Remove the elements of the symbol table. +void nameTableClear(NameTable* self, VM* vm); + +// Add a name to the name table and return the index of the name in the table. +int nameTableAdd(NameTable* self, VM* vm, const char* name, size_t length); + +// Return name at index. +const char* nameTableGet(NameTable* self, int index); + +#endif // SYMBOL_TABLE_H diff --git a/src/utils.c b/src/utils.c new file mode 100644 index 0000000..e4381b5 --- /dev/null +++ b/src/utils.c @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2021 Thakee Nathees + * Licensed under: MIT License + */ + +#include "utils.h" + +int utilPowerOf2Ceil(int n) { + n--; + n |= n >> 1; + n |= n >> 2; + n |= n >> 4; + n |= n >> 8; + n |= n >> 16; + n++; + + return n; +} + +bool utilIsName(char c) { + return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || (c == '_'); +} + +bool utilIsDigit(char c) { + return ('0' <= c && c <= '9'); +} + +/**************************************************************************** + * UTF8 * + ****************************************************************************/ + +#define B1(first) 0b##first +#define B2(first, last) 0b##first##last +#define B3(first, second, last) 0b##first##second##last +#define B4(first, second, third, last) 0b##first##second##third##last + +int utf8_encodeBytesCount(int value) { + if (value <= 0x7f) return 1; + if (value <= 0x7ff) return 2; + if (value <= 0xffff) return 3; + if (value <= 0x10ffff) return 4; + + // if we're here means it's an invalid leading byte + return 0; +} + +int utf8_decodeBytesCount(uint8_t byte) { + + if ((byte >> 7) == 0b0) return 1; + if ((byte >> 6) == 0b10) return 1; //< continuation byte + if ((byte >> 5) == 0b110) return 2; + if ((byte >> 4) == 0b1110) return 3; + if ((byte >> 3) == 0b11110) return 4; + + // if we're here means it's an invalid utf8 byte + return 1; +} + +int utf8_encodeValue(int value, uint8_t* bytes) { + + if (value <= 0x7f) { + *bytes = value & 0x7f; + return 1; + } + + // 2 byte character 110xxxxx 10xxxxxx -> last 6 bits write to 2nd byte and + // first 5 bit write to first byte + if (value <= 0x7ff) { + *(bytes++) = B2(110, 00000) | ((value & B2(11111, 000000)) >> 6); + *(bytes) = B2(10, 000000) | ((value & B1(111111))); + return 2; + } + + // 3 byte character 1110xxxx 10xxxxxx 10xxxxxx -> from last, 6 bits write + // to 3rd byte, next 6 bits write to 2nd byte, and 4 bits to first byte. + if (value <= 0xffff) { + *(bytes++) = B2(1110, 0000) | ((value & B3(1111, 000000, 000000)) >> 12); + *(bytes++) = B2(10, 000000) | ((value & B2(111111, 000000)) >> 6); + *(bytes) = B2(10, 000000) | ((value & B1(111111))); + return 3; + } + + // 4 byte character 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx -> last 6 bits to + // to 4th byte, next 6 bits to 3rd byte, next 6 bits to 2nd byte, 3 bits + // first byte. + if (value <= 0x10ffff) { + *(bytes++) = B2(11110, 000) | ((value & B4(111, 000000, 000000, 000000)) >> 18); + *(bytes++) = B2(10, 000000) | ((value & B3(111111, 000000, 000000)) >> 12); + *(bytes++) = B2(10, 000000) | ((value & B2(111111, 000000)) >> 6); + *(bytes) = B2(10, 000000) | ((value & B1(111111))); + return 4; + } + + return 0; +} + +int utf8_decodeBytes(uint8_t* bytes, int* value) { + + int continue_bytes = 0; + int byte_count = 1; + int _value = 0; + + if ((*bytes & B2(11, 000000)) == B2(10, 000000)) { + *value = *bytes; + return byte_count; + } + + else if ((*bytes & B2(111, 00000)) == B2(110, 00000)) { + continue_bytes = 1; + _value = (*bytes & B1(11111)); + } + + else if ((*bytes & B2(1111, 0000)) == B2(1110, 0000)) { + continue_bytes = 2; + _value = (*bytes & B1(1111)); + } + + else if ((*bytes & B2(11111, 000)) == B2(11110, 000)) { + continue_bytes = 3; + _value = (*bytes & B1(111)); + } + + else { + // Invalid leading byte + return -1; + } + + // now add the continuation bytes to the _value + while (continue_bytes--) { + bytes++, byte_count++; + + if ((*bytes & B2(11, 000000)) != B2(10, 000000)) return -1; + + _value = (_value << 6) | (*bytes & B2(00, 111111)); + } + + *value = _value; + return byte_count; +} + +#undef B1 +#undef B2 +#undef B3 +#undef B4 diff --git a/src/utils.h b/src/utils.h new file mode 100644 index 0000000..0c143fa --- /dev/null +++ b/src/utils.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2020-2021 Thakee Nathees + * Licensed under: MIT License + */ + +#ifndef UTILS_H +#define UTILS_H + +#include "common.h" + +// Returns the smallest power of two that is equal to or greater than [n]. +// Copyied from : https://github.com/wren-lang/wren/blob/main/src/vm/wren_utils.h#L119 +// Reference : http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2Float +int utilPowerOf2Ceil(int n); + +// Returns true if `c` is [A-Za-z_]. +bool utilIsName(char c); + +// Returns true if `c` is [0-9]. +bool utilIsDigit(char c); + +#endif // UTILS_H + + +/**************************************************************************** + * UTF8 * + ****************************************************************************/ + + +#ifndef UTF8_H +#define UTF8_H + +/** @file + * A tiny UTF-8 utility library. + * + * + * Utf-8 is an elegant character encoding which I just love it's simplicity, + * and compatibility It's just a wonderful hack of all time. A single byte + * length utf-8 character is the same as an ASCII character. In case if you + * don't know about ASCII encoding it's just how a character is represented in + * a single byte. For an example the character 'A' is 01000001, 'B' is 01000010 + * and so on. The first bit in is always 0 called parity bit, it's a way to + * check if some of the bits have flipped by noice back in the old age of + * computers. Parity bit should be equal to the sum of the rest of the bits mod + * 2. So we have 7 bits to represent ASCII which is 127 different characters. + * But utf-8 can potentially encode 2,164,864 characters. + * + * The length of a utf-8 character would vary from 1 to 4. If it's a single + * byte character, it's starts with a 0 and rest of the 7 bytes have the + * value. It's not just like ASCII, it is ASCII (compatable). For the 2 bytes + * character the first byte starts with 110....., for the 3 bytes character + * it's starts with 1110.... and for the 4 byte it's 11110... The first byte + * is called the leading byte and the rest of the bytes of the character is + * called continuation bytes. + * + *
+ * example:
+ *                  v-- leading byte   v-- continuation byte => 2 bytes
+ *             é =  11000011           10101001
+ *                  ^^^                ^^
+ *                  110 means 2 bytes  10 means continuation
+ *
+ * (note that the character é is 8 bit long with ANSI encoding)
+ * 
+ * + * USAGE: + * // define imlpementation only a single *.c source file like this + * #define UTF8_IMPLEMENT + * #include "utf8.h" +*/ + +#include + +// Returns the number of bytes the the [value] would take to encode. returns 0 +// if the value is invalid utf8 representation. +// +//
+// For single byte character, represented as 0xxxxxxx
+// the payload is 7 bytes so the maximum value would be 0x7f
+//
+// For 2 bytes characters, represented as 110xxxxx 10xxxxxx
+// the payload is 11 bits               | xxx xxxx xxxx |
+// so the maximum value would be 0x7ff  |  7   f    f   |
+//
+// For 3 bytes character, represented as 1110xxxx 10xxxxxx 10xxxxxx
+// the payload is 16 bits               | xxxx xxxx xxxx xxxx |
+// so the maximum value would be 0xffff | f    f    f    f    |
+//
+// For 4 bytes character, represented as 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+// the payload is 21 bits                     | x xxxx xxxx xxxx xxxx xxxx |
+// so the maximum value *SHOULD* be 0x1fffff  | 1 f    f    f    f    f    |
+// but in RFC3629 §3 (https://tools.ietf.org/html/rfc3629#section-3) UTF-8 is
+// limited to 0x10FFFF to match the limits of UTF-16.
+// 
+int utf8_encodeBytesCount(int value); + +// Returns the number of bytes the the leading [byte] contains. returns 1 if +// the byte is an invalid utf8 leading byte (to skip pass to the next byte). +int utf8_decodeBytesCount(uint8_t byte); + +// Encodes the 32 bit value into a byte array which should be a size of 4 and +// returns the number of bytes the value encoded (if invalid returns 0, that +// how many it write to the buffer. +int utf8_encodeValue(int value, uint8_t* bytes); + +// Decodes from the leading [byte] and write the value to param [value] and +// returns the number of bytes the value decoded, if invalid write -1 to the +// value. +int utf8_decodeBytes(uint8_t* bytes, int* value); + + +#endif // UTF8_H \ No newline at end of file diff --git a/src/var.c b/src/var.c new file mode 100644 index 0000000..5a18ca6 --- /dev/null +++ b/src/var.c @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2020-2021 Thakee Nathees + * Licensed under: MIT License + */ + +#include "var.h" +#include "vm.h" + +void varInitObject(Object* self, VM* vm, ObjectType type) { + self->type = type; + self->next = vm->first; + vm->first = self; + // TODO: set isGray = false; +} + +#if VAR_NAN_TAGGING +// A union to reinterpret a double as raw bits and back. +typedef union { + uint64_t bits64; + uint32_t bits32[2]; + double num; +} _DoubleBitsConv; +#endif + +Var doubleToVar(double value) { +#if VAR_NAN_TAGGING + _DoubleBitsConv bits; + bits.num = value; + return bits.bits64; +#else + // TODO: +#endif // VAR_NAN_TAGGING +} + +static inline double varToDouble(Var value) { +#if VAR_NAN_TAGGING + _DoubleBitsConv bits; + bits.bits64 = value; + return bits.num; +#else + // TODO: +#endif // VAR_NAN_TAGGING +} + +String* newString(VM* vm, const char* text, uint32_t length) { + + ASSERT(length == 0 || text != NULL, "Unexpected NULL string."); + + String* string = ALLOCATE_DYNAMIC(vm, String, length + 1, char); + varInitObject(&string->_super, vm, OBJ_STRING); + string->length = length; + + if (length != 0) memcpy(string->data, text, length); + string->data[length] = '\0'; + return string; +} + +Script* newScript(VM* vm) { + Script* script = ALLOCATE(vm, Script); + varInitObject(&script->_super, vm, OBJ_SCRIPT); + + varBufferInit(&script->globals); + nameTableInit(&script->global_names); + + functionBufferInit(&script->functions); + nameTableInit(&script->function_names); + + return script; +} + +Function* newFunction(VM* vm, const char* name, Script* owner, + bool is_native) { + + Function* func = ALLOCATE(vm, Function); + varInitObject(&func->_super, vm, OBJ_FUNC); + + func->name = name; + func->owner = owner; + func->arity = -1; + + func->is_native = is_native; + + if (is_native) { + func->native = NULL; + } else { + vmPushTempRef(vm, &func->_super); + Fn* fn = ALLOCATE(vm, Fn); + vmPopTempRef(vm); + + byteBufferInit(&fn->opcodes); + intBufferInit(&fn->oplines); + fn->stack_size = 0; + func->fn = fn; + } + return func; +} diff --git a/src/var.h b/src/var.h new file mode 100644 index 0000000..c8b4b7a --- /dev/null +++ b/src/var.h @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2020-2021 Thakee Nathees + * Licensed under: MIT License + */ + +#ifndef VAR_H +#define VAR_H + +/** @file + * A simple single header dynamic type system library for small dynamic typed + * languages using a technique called NaN-tagging (optional). The method is + * inspired from the wren (https://wren.io/) an awsome language written by the + * author of "Crafting Interpreters" Bob Nystrom and it's contrbuters. + * Reference: + * https://github.com/wren-lang/wren/blob/main/src/vm/wren_value.h + * https://leonardschuetz.ch/blog/nan-boxing/ + * + * The previous implementation was to add a type field to every \ref var + * and use smart pointers(C++17) to object with custom destructors, + * which makes the programme in effect for small types such null, bool, + * int and float. + */ + +/** __STDC_LIMIT_MACROS and __STDC_CONSTANT_MACROS are a workaround to + * allow C++ programs to use stdint.h macros specified in the C99 + * standard that aren't in the C++ standard */ +#define __STDC_LIMIT_MACROS +#include + +#include +#include + +#include "types/gen/byte_buffer.h" +#include "types/gen/function_buffer.h" +#include "types/gen/int_buffer.h" +#include "types/gen/var_buffer.h" +#include "types/name_table.h" + +// To use dynamic variably-sized struct with a tail array add an array at the +// end of the struct with size \ref DYNAMIC_TAIL_ARRAY. This method was a +// legacy standard called "struct hack". +#if __STDC_VERSION__ >= 199901L + /** for std >= c99 it's just `arr[]` */ + #define DYNAMIC_TAIL_ARRAY +#else + #define DYNAMIC_TAIL_ARRAY 0 +#endif + +// Number of maximum import statements in a script. +#define MAX_IMPORT_SCRIPTS 16 + +/** + * The IEEE 754 double precision float bit representation. + * + * 1 Sign bit + * | 11 Exponent bits + * | | 52 Mantissa (i.e. fraction values) bits + * | | | + * S[Exponent-][Mantissa------------------------------------------] + * + * if all bits of the exponent are set it's a NaN ("Not a Number") value. + * + * v~~~~~~~~~~ NaN value + * -11111111111---------------------------------------------------- + * + * We define a our variant \ref var as an unsigned 64 bit integer (we treat it + * like a bit array) if the exponent bits were not set, just reinterprit it as + * a IEEE 754 double precision 64 bit number. Other wise we there are a lot of + * different combination of bits we can use for our custom tagging, this method + * is called NaN-Tagging. + * + * There are two kinds of NaN values "signalling" and "quiet". The first one is + * intended to halt the execution but the second one is to continue the + * execution quietly. We get the quiet NaN by setting the highest mentissa bit. + * + * v~Highest mestissa bit + * -[NaN ]1--------------------------------------------------- + * + * if sign bit set, it's a heap allocated pointer. + * | these 2 bits are type tags representing 8 different types + * | vv + * S[NaN ]1cXX------------------------------------------------ + * | ^~~~~~~~ 48 bits to represent the value (51 for pointer) + * '- if this (const) bit set, it's a constant. + * + * On a 32-bit machine a pointer size is 32 and on a 64-bit machine actually 48 + * bits are used for pointers. Ta-da, now we have double precision number, + * primitives, pointers all inside a 64 bit sequence and for numbers it doesn't + * require any bit mask operations, which means math on the var is now even + * faster. + * + * our custom 2 bits type tagging + * c00 : NULL + * c01 ... 0 : UNDEF (used in unused map keys) + * ... 1 : VOID (void function return void not null) + * ... 10 : FALSE + * ... 11 : TRUE + * c10 : INTEGER + * | + * '-- c is const bit. + * + */ + +#if VAR_NAN_TAGGING + +// Masks and payloads. +#define _MASK_SIGN ((uint64_t)0x8000000000000000) +#define _MASK_QNAN ((uint64_t)0x7ffc000000000000) +#define _MASK_TYPE ((uint64_t)0x0003000000000000) +#define _MASK_CONST ((uint64_t)0x0004000000000000) + +#define _MASK_INTEGER (_MASK_QNAN | (uint64_t)0x0002000000000000) +#define _MASK_OBJECT (_MASK_QNAN | (uint64_t)0x8000000000000000) + +#define _PAYLOAD_INTEGER ((uint64_t)0x00000000ffffffff) +#define _PAYLOAD_OBJECT ((uint64_t)0x0000ffffffffffff) + +// Primitive types. +#define VAR_NULL (_MASK_QNAN | (uint64_t)0x0000000000000000) +#define VAR_UNDEFINED (_MASK_QNAN | (uint64_t)0x0001000000000000) +#define VAR_VOID (_MASK_QNAN | (uint64_t)0x0001000000000001) +#define VAR_FALSE (_MASK_QNAN | (uint64_t)0x0001000000000002) +#define VAR_TRUE (_MASK_QNAN | (uint64_t)0x0001000000000003) + +// Encode types. +#define VAR_BOOL(value) ((value)? VAR_TRUE : VAR_FALSE) +#define VAR_INT(value) (_MASK_INTEGER | (uint32_t)(int32_t)(value)) +#define VAR_NUM(value) (doubleToVar(value)) +#define VAR_OBJ(value) ((Var)(_MASK_OBJECT | (uint64_t)(uintptr_t)(value))) + +// Const casting. +#define ADD_CONST(value) ((value) | _MASK_CONST) +#define REMOVE_CONST(value) ((value) & ~_MASK_CONST) + +// Check types. +#define IS_CONST(value) ((value & _MASK_CONST) == _MASK_CONST) +#define IS_NULL(value) ((value) == VAR_NULL) +#define IS_UNDEF(value) ((value) == VAR_UNDEF) +#define IS_FALSE(value) ((value) == VAR_FALSE) +#define IS_TRUE(value) ((value) == VAR_TRUE) +#define IS_BOOL(value) (IS_TRUE(value) || IS_FALSE(value)) +#define IS_INT(value) ((value & _MASK_INTEGER) == _MASK_INTEGER) +#define IS_NUM(value) ((value & _MASK_QNAN) != _MASK_QNAN) +#define IS_OBJ(value) ((value & _MASK_OBJECT) == _MASK_OBJECT) + +// Decode types. +#define AS_BOOL(value) ((value) == VAR_TRUE) +#define AS_INT(value) ((int32_t)((value) & _PAYLOAD_INTEGER)) +#define AS_NUM(value) (varToDouble(value)) +#define AS_OBJ(value) ((Object*)(value & _PAYLOAD_OBJECT)) + +#define AS_STRING(value) ((String*)AS_OBJ(value)) +#define AS_CSTRING(value) (AS_STRING(value)->data) +#define AS_ARRAY(value) ((Array*)AS_OBJ(value)) +#define AS_MAP(value) ((Map*)AS_OBJ(value)) +#define AS_RANGE(value) ((Range*)AS_OBJ(value)) + +typedef uint64_t Var; + +#else + +// TODO: Union tagging implementation of all the above macros ignore macros +// starts with an underscore. + + +typedef enum { + VAR_UNDEFINED, //< Internal type for exceptions. + VAR_NULL, //< Null pointer type. + VAR_BOOL, //< Yin and yang of software. + VAR_INT, //< Only 32bit integers (to consistance with Nan-Tagging). + VAR_FLOAT, //< Floats are stored as (64bit) double. + + VAR_OBJECT, //< Base type for all \ref var_Object types. +} VarType; + +typedef struct { + VarType type; + union { + bool _bool; + int _int; + double _float; + Object* _obj; + }; +} var; + +#endif // VAR_NAN_TAGGING + +typedef enum /* ObjectType */ { + OBJ_STRING, + OBJ_ARRAY, + OBJ_MAP, + OBJ_RANGE, + + OBJ_SCRIPT, + OBJ_FUNC, + OBJ_INSTANCE, + + OBJ_USER, +} ObjectType; + +// Base struct for all heap allocated objects. +struct Object { + ObjectType type; //< Type of the object in \ref var_Object_Type. + //Class* is; //< The class the object IS. // No OOP in MS. + + Object* next; //< Next object in the heap allocated link list. +}; + +struct String { + Object _super; + + uint32_t length; //< Length of the string in \ref data. + uint32_t capacity; //< Size of allocated \ref data. + char data[DYNAMIC_TAIL_ARRAY]; +}; + +struct Array { + Object _super; + + VarBuffer elements; //< Elements of the array. +}; + +// TODO: struct Map here. + +struct Range { + Object _super; + + double from; //< Beggining of the range inclusive. + double to; //< End of the range exclusive. +}; + +struct Script { + Object _super; + + ID imports[MAX_IMPORT_SCRIPTS]; //< Imported script IDs. + int import_count; //< Number of import in imports. + + VarBuffer globals; //< Script level global variables. + NameTable global_names; //< Name map to index in globals. + + FunctionBuffer functions; //< Script level functions. + NameTable function_names; //< Name map to index in functions. + + // TODO: literal constants as Map. +}; + +// To maintain simpilicity I won't implement object oriantation in MiniScript. +//struct Class { +// Object _super; +// +// Class* _base_class; +// String* name; +//}; + +// Script function pointer. +typedef struct { + ByteBuffer opcodes; //< Buffer of opcodes. + IntBuffer oplines; //< Line number of opcodes for debug (1 based). + int stack_size; //< Maximum size of stack required. +} Fn; + +struct Function { + Object _super; + + const char* name; //< Name in the script [owner]. + Script* owner; //< Owner script of the function. + int arity; //< Number of argument the function expects. + + bool is_native; //< True if Native function. + union { + MiniScriptNativeFn native; //< Native function pointer. + Fn* fn; //< Script function pointer. + }; +}; + +// Methods. + +void varInitObject(Object* self, VM* vm, ObjectType type); + +// Instead use VAR_NUM(value) and AS_NUM(value) +Var doubleToVar(double value); +double varToDouble(Var value); + +// Allocate new String object and return String*. +String* newString(VM* vm, const char* text, uint32_t length); + +// Allocate new Script object and return Script*. +Script* newScript(VM* vm); + +// Allocate new Function object and return Function*. Parameter [name] should +// be the name in the Script's nametable. +Function* newFunction(VM* vm, const char* name, Script* owner, bool is_native); + + +#endif // VAR_H diff --git a/src/vm.c b/src/vm.c new file mode 100644 index 0000000..7660fea --- /dev/null +++ b/src/vm.c @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021 Thakee Nathees + * Licensed under: MIT License + */ + +#include "vm.h" + +void* vmRealloc(VM* self, void* memory, size_t old_size, size_t new_size) { + + // Track the total allocated memory of the VM to trigger the GC. + self->bytes_allocated += new_size - old_size; + + // TODO: If vm->bytes_allocated > some_value -> GC(); + + if (new_size == 0) { + free(memory); + return NULL; + } + + return realloc(memory, new_size); +} + +void vmPushTempRef(VM* self, Object* obj) { + ASSERT(obj != NULL, "Cannot reference to NULL."); + if (self->temp_reference_count < MAX_TEMP_REFERENCE, + "Too many temp references"); + self->temp_reference[self->temp_reference_count++] = obj; +} + +void vmPopTempRef(VM* self) { + ASSERT(self->temp_reference_count > 0, "Temporary reference is empty to pop."); + self->temp_reference_count--; +} + diff --git a/src/vm.h b/src/vm.h new file mode 100644 index 0000000..76722b5 --- /dev/null +++ b/src/vm.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2021 Thakee Nathees + * Licensed under: MIT License + */ + +#ifndef VM_H +#define VM_H + +#include "common.h" +#include "compiler.h" +#include "var.h" + +// The maximum number of temporary object reference to protect them from being +// garbage collected. +#define MAX_TEMP_REFERENCE 8 + +struct VM { + + // The first object in the link list of all heap allocated objects. + Object* first; + + size_t bytes_allocated; + + // A stack of temporary object references to ensure that the object + // doesn't garbage collected. + Object* temp_reference[MAX_TEMP_REFERENCE]; + int temp_reference_count; + + // current compiler reference to mark it's heap allocated objects. + Compiler* compiler; +}; + +// A realloc wrapper which handles memory allocations of the VM. +// - To allocate new memory pass NULL to parameter [memory] and 0 to +// parameter [old_size] on failure it'll return NULL. +// - To free an already allocated memory pass 0 to parameter [old_size] +// and it'll returns NULL. +// - The [old_size] parameter is required to keep track of the VM's +// allocations to trigger the garbage collections. +void* vmRealloc(VM* self, void* memory, size_t old_size, size_t new_size); + +// Push the object to temporary references stack. +void vmPushTempRef(VM* self, Object* obj); + +// Pop the top most object from temporary reference stack. +void vmPopTempRef(VM* self); + +#endif // VM_H \ No newline at end of file diff --git a/test/clogger.h b/test/clogger.h new file mode 100644 index 0000000..ef28827 --- /dev/null +++ b/test/clogger.h @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2020-2021 Thakee Nathees + * Licensed under: MIT License + */ + +#ifndef clogger_H +#define clogger_H + +/** @file + * Single header console color logger library + * + * USAGE: + * // define imlpementation only a single *.c source file like this + * #define CLOGGER_IMPLEMENT + * #include "clogger.h" + * + * You should call `clogger_init();` before any of your calling logging calls. + * You can define your own pallete with `clogger_ColorPalette` and apply it + * from `clogger_setColorPalette(your_pallete)` function. There is a list of + * public API functions declared. `clogger_iColor` is just a 8 bit unsigned + * integer value which, first 4 bits represent background and last 4 bits + * represent forground. COL_FG | (COL_BG << 4). You can define your won + * pallete. For examples see the implementation in `test.c`. +*/ + +#include +#include +#include +#include +#include + +/** supported max 16 different colors to maintain compatibility in windows */ +#define PALLETE_MAX_SIZE 16 + +#define CLOGGER_PROGRESS_BAR 30 + + /** color logger public API */ +typedef struct clogger_Color clogger_Color; +typedef struct clogger_ColorPalette clogger_ColorPalette; +typedef uint8_t clogger_iColor; + +clogger_Color clogger_ColorRGB(uint8_t r, uint8_t g, uint8_t b); +clogger_ColorPalette clogger_newPallete(); + +void clogger_setColorPalette(clogger_ColorPalette pallate); +void clogger_init(); + +void clogger_logf(clogger_iColor color, bool _stderr, const char* fmt, ...); +void clogger_logfVA(const char* fmt, va_list args, bool _stderr, + clogger_iColor color); +void clogger_log(const char* msg, clogger_iColor color, bool _stderr); + +void clogger_logfSuccess(const char* fmt, ...); +void clogger_logfWarning(const char* fmt, ...); +void clogger_logfError(const char* fmt, ...); + +void clogger_progress(const char* msg, int done, int total); + +/** Define our own platform macro */ +#ifndef _PLATFORM_DEFINED_ + #define _PLATFORM_DEFINED_ + #if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(__NT__) + #define PLATFORM_WINDOWS + #elif defined(__APPLE__) || defined(__MACH__) + #include + #if TARGET_IPHONE_SIMULATOR + #define PLATFORM_IOS_SIMULATOR + #elif TARGET_OS_IPHONE + #define PLATFORM_IOS + #elif TARGET_OS_MAC + #define PLATFORM_APPLE + #else + #error "Unknown Apple platform" + #endif + #elif defined(__linux__) + #define PLATFORM_LINUX + #elif defined(__unix__) + #define PLATFORM_UNIX + #elif defined(_POSIX_VERSION) + #define PLATFORM_POSIX + #else + #error "Unknown Platform" + #endif +#endif // _PLATFORM_DEFINED_ + +/** The buffer size for vnsprintf(...) */ +#ifndef VSNPRINTF_BUFF_SIZE +#define VSNPRINTF_BUFF_SIZE 8192 +#endif + +/** The platform independant color values */ +struct clogger_Color { + uint8_t r, g, b; +}; + +typedef enum clogger_Colors clogger_Colors; +enum clogger_Colors { + CLOGGER_COL_BLACK = 0, + CLOGGER_COL_WHITE = 7, + CLOGGER_COL_GREEN = 2, + CLOGGER_COL_YELLOW = 14, + CLOGGER_COL_RED = 12, + + CLOGGER_COL_CUSTOM_1 = 1, + CLOGGER_COL_CUSTOM_2 = 3, + CLOGGER_COL_CUSTOM_3 = 4, + CLOGGER_COL_CUSTOM_4 = 5, + CLOGGER_COL_CUSTOM_5 = 6, + CLOGGER_COL_CUSTOM_6 = 8, + CLOGGER_COL_CUSTOM_7 = 9, + CLOGGER_COL_CUSTOM_8 = 10, + CLOGGER_COL_CUSTOM_9 = 11, + CLOGGER_COL_CUSTOM_10 = 13, + CLOGGER_COL_CUSTOM_11 = 15, +}; + +struct clogger_ColorPalette { + clogger_Color colors[PALLETE_MAX_SIZE]; +}; + +#endif // clogger_H + +#ifdef CLOGGER_IMPLEMENT + +/** The default color palette of cprint (global) */ +clogger_ColorPalette* g_clogger_color_pallete; + +void clogger_init() { + if (g_clogger_color_pallete == NULL) { + clogger_setColorPalette(clogger_newPallete()); + } +} + +clogger_Color clogger_ColorRGB(uint8_t r, uint8_t g, uint8_t b) { + clogger_Color ret = {r, g, b}; + return ret; +} + +void clogger_logf(clogger_iColor color, bool _stderr, const char* fmt, ...) { + va_list args; + va_start(args, fmt); + clogger_logfVA(fmt, args, _stderr, color); + va_end(args); +} + +void clogger_logfSuccess(const char* p_fmt, ...) { + va_list args; + va_start(args, p_fmt); + clogger_logfVA(p_fmt, args, false, CLOGGER_COL_GREEN); + va_end(args); +} + +void clogger_logfWarning(const char* p_fmt, ...) { + va_list args; + va_start(args, p_fmt); + clogger_logfVA(p_fmt, args, true, CLOGGER_COL_YELLOW); + va_end(args); +} + +void clogger_logfError(const char* p_fmt, ...) { + va_list args; + va_start(args, p_fmt); + clogger_logfVA(p_fmt, args, true, CLOGGER_COL_RED); + va_end(args); +} + +void clogger_logfVA(const char* fmt, va_list args, bool _stderr, clogger_iColor color) { + + char buf[VSNPRINTF_BUFF_SIZE + 1]; // +1 for the terminating character + int len = vsnprintf(buf, VSNPRINTF_BUFF_SIZE, fmt, args); + + if (len <= 0) return; + // Output is too big, will be truncated + if ((unsigned int)len >= VSNPRINTF_BUFF_SIZE) len = VSNPRINTF_BUFF_SIZE; + buf[len] = 0; + clogger_log((const char*)buf, color, _stderr); +} + +/** for other terminal emulator which support ANSI (git base, mysys, putty, ...) */ +void cclogger_logANSI(const char* message, clogger_iColor color, bool _stderr) { + // \033[38;2;R;G;Bm msg \033[0;00m + assert(g_clogger_color_pallete != NULL && "did you forgot to call clogger_init()"); + + clogger_Color col = g_clogger_color_pallete->colors[color]; + + char buff_color[23]; int ptr = 0; + ptr += sprintf(buff_color + ptr, "%s", "\033[38;2;"); + ptr += sprintf(buff_color + ptr, "%i", col.r); + ptr += sprintf(buff_color + ptr, "%s", ";"); + ptr += sprintf(buff_color + ptr, "%i", col.g); + ptr += sprintf(buff_color + ptr, "%s", ";"); + ptr += sprintf(buff_color + ptr, "%i", col.b); + ptr += sprintf(buff_color + ptr, "%s", "m"); + buff_color[22] = '\0'; + fprintf((_stderr) ? stderr : stdout, "%s%s%s", buff_color, message, "\033[0;00m"); +} + +void clogger_progress(const char* msg, int done, int total) { + float precentage = (float)done / (float)total; + clogger_logf(CLOGGER_COL_WHITE, false, "\r%s [", msg); + int i = 0; + for (; i < precentage * CLOGGER_PROGRESS_BAR; i++) + clogger_log("#", CLOGGER_COL_GREEN, false); + for (; i < CLOGGER_PROGRESS_BAR; i++) + clogger_log(" ", CLOGGER_COL_WHITE, false); + clogger_logf(CLOGGER_COL_WHITE, false, "] %i%%", (int)(precentage * 100)); + fflush(stdout); + +} + +/** PLATFORM DEPENDENT CODE **************************************************/ + +#ifdef PLATFORM_WINDOWS + +#ifndef NOMINMAX /**< mingw already has defined for us. */ +#define NOMINMAX +#endif + +#include /**< for isatty() */ +#include +#undef ERROR /**< This will polute symbol `ERROR` later */ + +#include + +clogger_ColorPalette clogger_newPallete() { + clogger_ColorPalette pallete; +#ifndef __TINYC__ + if (_isatty(_fileno(stdout))) { + HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE); + CONSOLE_SCREEN_BUFFER_INFOEX info; + info.cbSize = sizeof(CONSOLE_SCREEN_BUFFER_INFOEX); + + GetConsoleScreenBufferInfoEx(hConsole, &info); + for (int i = 1; i < PALLETE_MAX_SIZE; i++) { + COLORREF color = info.ColorTable[i]; + pallete.colors[i].r = GetRValue(color); + pallete.colors[i].g = GetGValue(color); + pallete.colors[i].b = GetBValue(color); + } + } +#endif + + return pallete; +} + +void clogger_setColorPalette(clogger_ColorPalette pallete) { + static clogger_ColorPalette s_pallete; + s_pallete = pallete; + g_clogger_color_pallete = &s_pallete; +#ifndef __TINYC__ + if (_isatty(_fileno(stdout))) { + HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE); + CONSOLE_SCREEN_BUFFER_INFOEX info; + info.cbSize = sizeof(CONSOLE_SCREEN_BUFFER_INFOEX); + + GetConsoleScreenBufferInfoEx(hConsole, &info); + for (int i = 1; i < PALLETE_MAX_SIZE; i++) { + uint8_t r = g_clogger_color_pallete->colors[i].r; + uint8_t g = g_clogger_color_pallete->colors[i].g; + uint8_t b = g_clogger_color_pallete->colors[i].b; + info.ColorTable[i] = RGB(r, g, b); + } + SetConsoleScreenBufferInfoEx(hConsole, &info); + } +#endif + // else we use ANSI color codes +} + +static void _win32_setConsoleColor(clogger_iColor color) { + assert(g_clogger_color_pallete != NULL && "did you forgot to call clogger_init()"); + SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), color); +} + +void clogger_log(const char* message, clogger_iColor color, bool _stderr) { + FILE* out = (_stderr) ? stderr : stdout; + if (_isatty(_fileno(out))) { + _win32_setConsoleColor(color); + fprintf(out, "%s", message); + _win32_setConsoleColor(CLOGGER_COL_WHITE); + } else { + cclogger_logANSI(message, color, _stderr); + fflush(out); + } +} + +#elif defined(PLATFORM_LINUX) + +#error "TODO:" + +#else + +#error "TODO:" + +#endif // PLATFORM_WINDOWS + +#endif // CLOGGER_IMPLEMENT \ No newline at end of file diff --git a/test/main.c b/test/main.c new file mode 100644 index 0000000..ada5d0d --- /dev/null +++ b/test/main.c @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2021 Thakee Nathees + * Licensed under: MIT License + */ + +#include + +#define CLOGGER_IMPLEMENT +#include "clogger.h" + +// FIXME: +#include "../src/common.h" +#include "../src/var.h" +#include "../src/vm.h" + +#include "../src/types/gen/string_buffer.h" +#include "../src/types/gen/byte_buffer.h" + +int main() { + clogger_init(); + //clogger_logfError("[DummyError] dummy error\n"); + //clogger_logfWarning("[DummyWarning] dummy warning\n"); + + FILE* fp = fopen("test.ms", "r"); + if (fp != NULL) { + char buff[1024]; + size_t read = fread(buff, 1, sizeof(buff), fp); + buff[read] = '\0'; + printf("%s\n", buff); + fclose(fp); + } else { + clogger_logfError("[Error] cannot open file test.ms\n"); + } + + VM* vm = (VM*)malloc(sizeof(VM)); + memset(vm, 0, sizeof(VM)); + + ByteBuffer buff; + byteBufferInit(&buff); + + byteBufferWrite(&buff, vm, 'a'); + byteBufferWrite(&buff, vm, 'b'); + byteBufferWrite(&buff, vm, 'c'); + + String* str = newString(vm, (const char*)buff.data, 3); + Var vstr = VAR_OBJ(&str->_super); + if (strcmp(AS_CSTRING(vstr), "abc") != 0) { + clogger_logfError("[Error] something went wrong.\n"); + } + + compileSource(vm, "native someNativeFn(a, b, c);\n"); + + return 0; +}