mirror of
https://github.com/zekexiao/pocketlang.git
synced 2025-02-05 20:26:53 +08:00
initial commit
This commit is contained in:
commit
c77801daf3
55
.gitignore
vendored
Normal file
55
.gitignore
vendored
Normal file
@ -0,0 +1,55 @@
|
||||
|
||||
# MiniScript ignore list
|
||||
build/
|
||||
MiniScript/test/
|
||||
src/test/
|
||||
MiniScript/.import/
|
||||
release/
|
||||
debug/
|
||||
*__pycache__/
|
||||
.vs/
|
||||
.vscode/
|
||||
|
||||
config.json
|
||||
run.bat
|
||||
*.dblite
|
||||
*.idb
|
||||
*.pdb
|
||||
*.sln
|
||||
*.vcxproj
|
||||
*.vcxproj.filters
|
||||
*.vcxproj.user
|
||||
*.obj.enc
|
||||
|
||||
# Prerequisites
|
||||
*.d
|
||||
|
||||
# Compiled Object files
|
||||
*.slo
|
||||
*.lo
|
||||
*.o
|
||||
*.obj
|
||||
|
||||
# Precompiled Headers
|
||||
*.gch
|
||||
*.pch
|
||||
|
||||
# Compiled Dynamic libraries
|
||||
*.so
|
||||
*.dylib
|
||||
*.dll
|
||||
|
||||
# Fortran module files
|
||||
*.mod
|
||||
*.smod
|
||||
|
||||
# Compiled Static libraries
|
||||
*.lai
|
||||
*.la
|
||||
*.a
|
||||
*.lib
|
||||
|
||||
# Executables
|
||||
*.exe
|
||||
*.out
|
||||
*.app
|
21
LICENSE
Normal file
21
LICENSE
Normal file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2021 Thakee Nathees
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
36
SConscript
Normal file
36
SConscript
Normal file
@ -0,0 +1,36 @@
|
||||
Import('env')
|
||||
import os
|
||||
|
||||
env.PROJECT_NAME = "MiniScript"
|
||||
env.RUN_TARGET = os.path.join(env['variant_dir'], 'bin/miniscript')
|
||||
|
||||
## MiniScript source files
|
||||
SOURCES = [
|
||||
Glob('src/*.c'),
|
||||
Glob('src/types/*.c'),
|
||||
Glob('src/types/gen/*.c'),
|
||||
]
|
||||
|
||||
CPPPATH = [
|
||||
'include/',
|
||||
]
|
||||
|
||||
## Compile miniscript lib.
|
||||
vm = env.Library(
|
||||
target = 'bin/miniscript',
|
||||
source = SOURCES,
|
||||
CPPPATH = CPPPATH,
|
||||
)
|
||||
|
||||
## Test executable
|
||||
test = env.Program(
|
||||
target = 'bin/miniscript',
|
||||
source = ['test/main.c'],
|
||||
CPPPATH = CPPPATH,
|
||||
LIBPATH = 'bin',
|
||||
LIBS = 'miniscript',
|
||||
)
|
||||
|
||||
Requires(test, vm)
|
||||
|
||||
|
211
SConstruct
Normal file
211
SConstruct
Normal file
@ -0,0 +1,211 @@
|
||||
#!python
|
||||
import os, subprocess, sys
|
||||
|
||||
def get_variant_dir(env):
|
||||
ret = 'build/' + env['platform'] + '/' + env['target'];
|
||||
if env['platform'] == 'windows':
|
||||
return ret + '/' + env['bits']
|
||||
return ret
|
||||
|
||||
opts = Variables([], ARGUMENTS)
|
||||
## Define our options
|
||||
opts.Add(EnumVariable('platform', "Compilation platform", '', ['', 'windows', 'x11', 'linux', 'osx']))
|
||||
opts.Add(EnumVariable('target', "Compilation target", 'debug', ['debug', 'release']))
|
||||
opts.Add(EnumVariable('bits', 'output program bits', '64', ['32', '64']))
|
||||
opts.Add(BoolVariable('use_llvm', "Use the LLVM / Clang compiler", False))
|
||||
opts.Add(BoolVariable('use_mingw', "Use Mingw compiler", False))
|
||||
|
||||
opts.Add(BoolVariable('vsproj', "make a visual studio project", False))
|
||||
opts.Add(BoolVariable('verbose', "use verbose build command", False))
|
||||
|
||||
opts.Add(BoolVariable('libs', "include unit tests in main", False))
|
||||
|
||||
## Setup the Environment
|
||||
DefaultEnvironment(tools=[]) ## not using any tools
|
||||
env = Environment()
|
||||
|
||||
## Updates the environment with the option variables.
|
||||
opts.Update(env)
|
||||
|
||||
if env['use_llvm']:
|
||||
env['CC'] = 'clang'
|
||||
env['CXX'] = 'clang++'
|
||||
elif env['use_mingw']:
|
||||
env['tools'] = ['mingw']
|
||||
|
||||
## find platform
|
||||
if env['platform'] == '':
|
||||
if sys.platform == 'win32':
|
||||
env['platform'] = 'windows'
|
||||
elif sys.platform in ('x11', 'linux', 'linux2'):
|
||||
env['platform'] = 'linux'
|
||||
elif sys.platform == 'darwin':
|
||||
env['platform'] = 'osx'
|
||||
else:
|
||||
print("platform(%s) not supported." % sys.platform)
|
||||
quit()
|
||||
|
||||
## For the reference:
|
||||
## - CCFLAGS are compilation flags shared between C and C++
|
||||
## - CFLAGS are for C-specific compilation flags
|
||||
## - CXXFLAGS are for C++-specific compilation flags
|
||||
## - CPPFLAGS are for pre-processor flags
|
||||
## - CPPDEFINES are for pre-processor defines
|
||||
## - LINKFLAGS are for linking flags
|
||||
|
||||
## Check our platform specifics
|
||||
if env['platform'] == "osx":
|
||||
env.Append(CXXFLAGS=['-std=c++17'])
|
||||
if env['target'] == 'debug':
|
||||
env.Append(CCFLAGS=['-g', '-O2', '-arch', 'x86_64'])
|
||||
env.Append(LINKFLAGS=['-arch', 'x86_64'])
|
||||
else:
|
||||
env.Append(CCFLAGS=['-g', '-O3', '-arch', 'x86_64'])
|
||||
env.Append(LINKFLAGS=['-arch', 'x86_64'])
|
||||
|
||||
elif env['platform'] == 'x11':
|
||||
env.Append(LIBS=['dl', 'pthread'])
|
||||
env.Append(CXXFLAGS=['-std=c++17'])
|
||||
if env['target'] == 'debug':
|
||||
env.Append(CCFLAGS=['-fPIC', '-g3', '-Og'])
|
||||
else:
|
||||
env.Append(CCFLAGS=['-fPIC', '-g', '-O3'])
|
||||
|
||||
elif env['platform'] == "windows":
|
||||
env.Append(CXXFLAGS=['/std:c++17', '/bigobj'])
|
||||
env.Append(CPPDEFINES=['_CRT_SECURE_NO_WARNINGS'])
|
||||
env.Append(CPPDEFINES=['WIN32', '_WIN32', '_WINDOWS'])
|
||||
env.Append(CCFLAGS=['-W3', '-GR', '/FS'])
|
||||
env.Append(LINKFLAGS='-SUBSYSTEM:CONSOLE')
|
||||
env.Append(LIBS=[])
|
||||
|
||||
if env['bits'] == '32': env['TARGET_ARCH'] = 'x86'
|
||||
else: env['TARGET_ARCH'] = 'x86_64'
|
||||
|
||||
if env['target'] == 'debug':
|
||||
env.Append(CPPDEFINES=['DEBUG'])
|
||||
env.Append(CCFLAGS=['-EHsc', '-MDd', '-ZI'])
|
||||
env.Append(LINKFLAGS=['-DEBUG'])
|
||||
else:
|
||||
env.Append(CPPDEFINES=['NDEBUG'])
|
||||
env.Append(CCFLAGS=['-O2', '-EHsc', '-MD'])
|
||||
|
||||
## --------------------------------------------------------------------------------
|
||||
|
||||
## no_verbose function is from : https://github.com/godotengine/godot/blob/master/methods.py
|
||||
def no_verbose(sys, env):
|
||||
colors = {}
|
||||
# Colors are disabled in non-TTY environments such as pipes. This means
|
||||
# that if output is redirected to a file, it will not contain color codes
|
||||
|
||||
colors["cyan"] = "\033[96m" if sys.stdout.isatty() else ""
|
||||
colors["purple"] = "\033[95m" if sys.stdout.isatty() else ""
|
||||
colors["blue"] = "\033[94m" if sys.stdout.isatty() else ""
|
||||
colors["green"] = "\033[92m" if sys.stdout.isatty() else ""
|
||||
colors["yellow"] = "\033[93m" if sys.stdout.isatty() else ""
|
||||
colors["red"] = "\033[91m" if sys.stdout.isatty() else ""
|
||||
colors["end"] = "\033[0m" if sys.stdout.isatty() else ""
|
||||
|
||||
compile_source_message = "{}Compiling {}==> {}$SOURCE{}".format(
|
||||
colors["blue"], colors["purple"], colors["yellow"], colors["end"]
|
||||
)
|
||||
java_compile_source_message = "{}Compiling {}==> {}$SOURCE{}".format(
|
||||
colors["blue"], colors["purple"], colors["yellow"], colors["end"]
|
||||
)
|
||||
compile_shared_source_message = "{}Compiling shared {}==> {}$SOURCE{}".format(
|
||||
colors["blue"], colors["purple"], colors["yellow"], colors["end"]
|
||||
)
|
||||
link_program_message = "{}Linking Program {}==> {}$TARGET{}".format(
|
||||
colors["red"], colors["purple"], colors["yellow"], colors["end"]
|
||||
)
|
||||
link_library_message = "{}Linking Static Library {}==> {}$TARGET{}".format(
|
||||
colors["red"], colors["purple"], colors["yellow"], colors["end"]
|
||||
)
|
||||
ranlib_library_message = "{}Ranlib Library {}==> {}$TARGET{}".format(
|
||||
colors["red"], colors["purple"], colors["yellow"], colors["end"]
|
||||
)
|
||||
link_shared_library_message = "{}Linking Shared Library {}==> {}$TARGET{}".format(
|
||||
colors["red"], colors["purple"], colors["yellow"], colors["end"]
|
||||
)
|
||||
java_library_message = "{}Creating Java Archive {}==> {}$TARGET{}".format(
|
||||
colors["red"], colors["purple"], colors["yellow"], colors["end"]
|
||||
)
|
||||
env.Append(CXXCOMSTR=[compile_source_message])
|
||||
env.Append(CCCOMSTR=[compile_source_message])
|
||||
env.Append(SHCCCOMSTR=[compile_shared_source_message])
|
||||
env.Append(SHCXXCOMSTR=[compile_shared_source_message])
|
||||
env.Append(ARCOMSTR=[link_library_message])
|
||||
env.Append(RANLIBCOMSTR=[ranlib_library_message])
|
||||
env.Append(SHLINKCOMSTR=[link_shared_library_message])
|
||||
env.Append(LINKCOMSTR=[link_program_message])
|
||||
env.Append(JARCOMSTR=[java_library_message])
|
||||
env.Append(JAVACCOMSTR=[java_compile_source_message])
|
||||
|
||||
if not env['verbose']:
|
||||
no_verbose(sys, env)
|
||||
|
||||
Export('env')
|
||||
env['variant_dir'] = get_variant_dir(env)
|
||||
SConscript('SConscript', variant_dir=env['variant_dir'], duplicate=0)
|
||||
|
||||
## --------------------------------------------------------------------------------
|
||||
|
||||
## visual studio targets
|
||||
def get_vsproj_context():
|
||||
targets = []
|
||||
variants = [] ## ["debug|Win32", "debug|x64", "release|Win32", "release|x64"]
|
||||
for target in 'debug', 'release':
|
||||
for bits in '32', '64':
|
||||
variants.append(target+'|'+('Win32' if bits=='32' else 'x64'))
|
||||
targets.append(env.RUN_TARGET)
|
||||
return variants, targets
|
||||
|
||||
|
||||
def recursive_collect(dir, suffix):
|
||||
ret = []
|
||||
for file in os.listdir(dir):
|
||||
file = os.path.join(dir, file)
|
||||
if os.path.isfile(file):
|
||||
for suff in suffix:
|
||||
if (file.endswith(suff)):
|
||||
ret.append('$(ProjectDir)' + os.path.relpath(file))
|
||||
elif os.path.isdir(file):
|
||||
ret += recursive_collect(file, suffix)
|
||||
return ret
|
||||
|
||||
def msvs_collect_header():
|
||||
return recursive_collect('.', ('.h', '.hpp'))
|
||||
|
||||
def msvc_collect_sources():
|
||||
return recursive_collect('.', ('.c', '.cpp', '.cc', '.cxx'))
|
||||
|
||||
def msvc_build_commandline(commands):
|
||||
common_build_prefix = [
|
||||
'cmd /V /C set "bits=64"',
|
||||
'(if "$(PlatformTarget)"=="x86" (set "bits=32"))',
|
||||
]
|
||||
return " ^& ".join(common_build_prefix + [commands])
|
||||
|
||||
|
||||
if env['vsproj']:
|
||||
env["MSVSBUILDCOM"] = msvc_build_commandline(
|
||||
"scons --directory=\"$(ProjectDir.TrimEnd('\\'))\" platform=windows target=$(Configuration) bits=!bits!"
|
||||
)
|
||||
env["MSVSREBUILDCOM"] = msvc_build_commandline(
|
||||
"scons --directory=\"$(ProjectDir.TrimEnd('\\'))\" platform=windows target=$(Configuration) bits=!bits! vsproj=yes"
|
||||
)
|
||||
env["MSVSCLEANCOM"] = msvc_build_commandline(
|
||||
"scons --directory=\"$(ProjectDir.TrimEnd('\\'))\" --clean platform=windows bits=!bits! target=$(Configuration)"
|
||||
)
|
||||
variants, targets = get_vsproj_context()
|
||||
env.MSVSProject(target = env.PROJECT_NAME + env['MSVSPROJECTSUFFIX'],
|
||||
srcs = msvc_collect_sources(),
|
||||
incs = msvs_collect_header(),
|
||||
variant = variants,
|
||||
runfile = targets,
|
||||
buildtarget = targets,
|
||||
)
|
||||
|
||||
|
||||
## Generates help for the -h scons option.
|
||||
Help(opts.GenerateHelpText(env))
|
30
include/miniscript.h
Normal file
30
include/miniscript.h
Normal file
@ -0,0 +1,30 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Thakee Nathees
|
||||
* Licensed under: MIT License
|
||||
*/
|
||||
|
||||
#ifndef MINISCRIPT_H
|
||||
#define MINISCRIPT_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
// The version number macros.
|
||||
#define MS_VERSION_MAJOR 0
|
||||
#define MS_VERSION_MINOR 1
|
||||
#define MS_VERSION_PATCH 0
|
||||
|
||||
// String representation of the value.
|
||||
#define MS_VERSION_STRING "0.1.0"
|
||||
|
||||
// MiniScript Virtual Machine.
|
||||
// it'll contain the state of the execution, stack, heap, and manage memory
|
||||
// allocations.
|
||||
typedef struct VM VM;
|
||||
|
||||
// C function pointer which is callable from MiniScript.
|
||||
typedef void (*MiniScriptNativeFn)(VM* vm);
|
||||
|
||||
|
||||
|
||||
#endif // MINISCRIPT_H
|
107
src/common.h
Normal file
107
src/common.h
Normal file
@ -0,0 +1,107 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Thakee Nathees
|
||||
* Licensed under: MIT License
|
||||
*/
|
||||
|
||||
#ifndef MS_COMMON_H
|
||||
#define MS_COMMON_H
|
||||
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
// miniscript visibility macros. define MS_DLL for using miniscript as a
|
||||
// shared library and define MS_COMPILE to export symbols.
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define _MS_EXPORT __declspec(dllexport)
|
||||
#define MS_IMPORT __declspec(dllimport)
|
||||
#elif defined(__GNUC__)
|
||||
#define _MS_EXPORT __attribute__((visibility ("default")))
|
||||
#define _MS_IMPORT
|
||||
#else
|
||||
#define _MS_EXPORT
|
||||
#define _MS_IMPORT
|
||||
#endif
|
||||
|
||||
#ifdef MS_DLL
|
||||
#ifdef MS_COMPILE
|
||||
#define MS_PUBLIC _MS_EXPORT
|
||||
#else
|
||||
#define MS_PUBLIC _MS_IMPORT
|
||||
#endif
|
||||
#else
|
||||
#define MS_PUBLIC
|
||||
#endif
|
||||
|
||||
// Unique number to identify for various cases.
|
||||
typedef uint32_t ID;
|
||||
|
||||
// Nan-Tagging could be disable for debugging/portability purposes.
|
||||
// To disable define `VAR_NAN_TAGGING 0`, otherwise it defaults to Nan-Tagging.
|
||||
#ifndef VAR_NAN_TAGGING
|
||||
#define VAR_NAN_TAGGING 1
|
||||
#endif
|
||||
|
||||
#if VAR_NAN_TAGGING
|
||||
typedef uint64_t Var;
|
||||
#else
|
||||
typedef struct Var Var;
|
||||
#endif
|
||||
|
||||
typedef struct Object Object;
|
||||
typedef struct String String;
|
||||
typedef struct Array Array;
|
||||
typedef struct Range Range;
|
||||
|
||||
typedef struct Script Script;
|
||||
//typedef struct Class Class;
|
||||
typedef struct Function Function;
|
||||
|
||||
#ifdef DEBUG
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#define ASSERT(condition, message) \
|
||||
do { \
|
||||
if (!(condition)) { \
|
||||
fprintf(stderr, "Assertion failed: %s\n\tat %s() (%s:%i)\n", \
|
||||
message, __func__, __FILE__, __LINE__); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define UNREACHABLE() \
|
||||
do { \
|
||||
fprintf(stderr, "Execution reached an unreachable path\n" \
|
||||
"\tat %s() (%s:%i)\n", __FILE__, __LINE__, __func__); \
|
||||
abort(); \
|
||||
} while (false)
|
||||
|
||||
#else
|
||||
|
||||
#define ASSERT(condition, message) do { } while (false)
|
||||
|
||||
// Reference : https://github.com/wren-lang/
|
||||
#if defined( _MSC_VER )
|
||||
#define UNREACHABLE() __assume(0)
|
||||
#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))
|
||||
#define UNREACHABLE() __builtin_unreachable()
|
||||
#else
|
||||
#define UNREACHABLE()
|
||||
#endif
|
||||
|
||||
#endif // DEBUG
|
||||
|
||||
// Allocate object of [type] using the vmRealloc function.
|
||||
#define ALLOCATE(vm, type) \
|
||||
((type*)vmRealloc(vm, NULL, 0, sizeof(type)))
|
||||
|
||||
// Allocate object of [type] which has a dynamic tail array of type [tail_type]
|
||||
// with [count] entries.
|
||||
#define ALLOCATE_DYNAMIC(vm, type, count, tail_type) \
|
||||
((type*)vmRealloc(vm, NULL, 0, sizeof(type) + sizeof(tail_type) * (count)))
|
||||
|
||||
#endif //MS_COMMON_H
|
945
src/compiler.c
Normal file
945
src/compiler.c
Normal file
@ -0,0 +1,945 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Thakee Nathees
|
||||
* Licensed under: MIT License
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
|
||||
#include "types/name_table.h"
|
||||
#include "types/gen/byte_buffer.h"
|
||||
#include "utils.h"
|
||||
#include "vm.h"
|
||||
|
||||
// The maximum number of variables (or global if compiling top level script)
|
||||
// to lookup from the compiling context. Also it's limited by it's opcode
|
||||
// which is using a single byte value to identify the local.
|
||||
#define MAX_VARIABLES 256
|
||||
|
||||
typedef enum {
|
||||
|
||||
TK_ERROR = 0,
|
||||
TK_EOF,
|
||||
TK_LINE,
|
||||
|
||||
// symbols
|
||||
TK_DOT, // .
|
||||
TK_DOTDOT, // ..
|
||||
TK_COMMA, // ,
|
||||
TK_COLLON, // :
|
||||
TK_SEMICOLLON, // ;
|
||||
TK_HASH, // #
|
||||
TK_LPARAN, // (
|
||||
TK_RPARAN, // )
|
||||
TK_LBRACKET, // [
|
||||
TK_RBRACKET, // ]
|
||||
TK_LBRACE, // {
|
||||
TK_RBRACE, // }
|
||||
TK_PERCENT, // %
|
||||
|
||||
TK_TILD, // ~
|
||||
TK_AMP, // &
|
||||
TK_PIPE, // |
|
||||
TK_CARET, // ^
|
||||
|
||||
TK_PLUS, // +
|
||||
TK_MINUS, // -
|
||||
TK_STAR, // *
|
||||
TK_FSLASH, // /
|
||||
TK_BSLASH, // \.
|
||||
TK_EQ, // =
|
||||
TK_GT, // >
|
||||
TK_LT, // <
|
||||
//TK_BANG, // ! parsed as TK_NOT
|
||||
|
||||
TK_EQEQ, // ==
|
||||
TK_NOTEQ, // !=
|
||||
TK_GTEQ, // >=
|
||||
TK_LTEQ, // <=
|
||||
|
||||
TK_PLUSEQ, // +=
|
||||
TK_MINUSEQ, // -=
|
||||
TK_STAREQ, // *=
|
||||
TK_DIVEQ, // /=
|
||||
TK_SRIGHT, // >>
|
||||
TK_SLEFT, // <<
|
||||
|
||||
//TODO:
|
||||
// >>= <<=
|
||||
//TK_PLUSPLUS, // ++
|
||||
//TK_MINUSMINUS, // --
|
||||
//TK_MODEQ, // %=
|
||||
//TK_XOREQ, // ^=
|
||||
|
||||
// Keywords.
|
||||
//TK_TYPE, // type
|
||||
TK_IMPORT, // import
|
||||
TK_ENUM, // enum
|
||||
TK_DEF, // def
|
||||
TK_NATIVE, // native (C function declaration)
|
||||
TK_END, // end
|
||||
|
||||
TK_NULL, // null
|
||||
TK_SELF, // self
|
||||
TK_IS, // is
|
||||
TK_IN, // in
|
||||
TK_AND, // and
|
||||
TK_OR, // or
|
||||
TK_NOT, // not
|
||||
TK_TRUE, // true
|
||||
TK_FALSE, // false
|
||||
|
||||
// Type names for is test.
|
||||
// TK_NULL already defined.
|
||||
TK_BOOL_T, // Bool
|
||||
TK_NUM_T, // Num
|
||||
TK_STRING_T, // String
|
||||
TK_ARRAY_T, // Array
|
||||
TK_MAP_T, // Map
|
||||
TK_RANGE_T, // Range
|
||||
TK_FUNC_T, // Function
|
||||
TK_OBJ_T, // Object (self, user data, etc.)
|
||||
|
||||
TK_DO, // do
|
||||
TK_WHILE, // while
|
||||
TK_FOR, // for
|
||||
TK_IF, // if
|
||||
TK_ELIF, // elif
|
||||
TK_ELSE, // else
|
||||
TK_BREAK, // break
|
||||
TK_CONTINUE, // continue
|
||||
TK_RETURN, // return
|
||||
|
||||
TK_NAME, // identifier
|
||||
|
||||
TK_NUMBER, // number literal
|
||||
TK_STRING, // string literal
|
||||
|
||||
/* String interpolation (reference wren-lang)
|
||||
* but it doesn't support recursive ex: "a \(b + "\(c)")"
|
||||
* "a \(b) c \(d) e"
|
||||
* tokenized as:
|
||||
* TK_STR_INTERP "a "
|
||||
* TK_NAME b
|
||||
* TK_STR_INTERP " c "
|
||||
* TK_NAME d
|
||||
* TK_STRING " e" */
|
||||
// TK_STR_INTERP, //< not yet.
|
||||
|
||||
} TokenType;
|
||||
|
||||
typedef struct {
|
||||
TokenType type;
|
||||
|
||||
const char* start; //< Begining of the token in the source.
|
||||
int length; //< Number of chars of the token.
|
||||
int line; //< Line number of the token (1 based).
|
||||
Var value; //< Literal value of the token.
|
||||
} Token;
|
||||
|
||||
typedef struct {
|
||||
const char* identifier;
|
||||
int length;
|
||||
TokenType tk_type;
|
||||
} _Keyword;
|
||||
|
||||
// List of keywords mapped into their identifiers.
|
||||
static _Keyword _keywords[] = {
|
||||
//{ "type", 4, TK_TYPE },
|
||||
{ "import", 6, TK_IMPORT },
|
||||
{ "enum", 4, TK_ENUM },
|
||||
{ "def", 3, TK_DEF },
|
||||
{ "native", 6, TK_NATIVE },
|
||||
{ "end", 3, TK_END },
|
||||
{ "null", 4, TK_NULL },
|
||||
{ "self", 4, TK_SELF },
|
||||
{ "is", 2, TK_IS },
|
||||
{ "in", 2, TK_IN },
|
||||
{ "and", 3, TK_AND },
|
||||
{ "or", 2, TK_OR },
|
||||
{ "not", 3, TK_NOT },
|
||||
{ "true", 4, TK_TRUE },
|
||||
{ "false", 5, TK_FALSE },
|
||||
{ "do", 2, TK_DO },
|
||||
{ "while", 5, TK_WHILE },
|
||||
{ "for", 3, TK_FOR },
|
||||
{ "if", 2, TK_IF },
|
||||
{ "elif", 4, TK_ELIF },
|
||||
{ "else", 4, TK_ELSE },
|
||||
{ "break", 5, TK_BREAK },
|
||||
{ "continue", 8, TK_CONTINUE },
|
||||
{ "return", 6, TK_RETURN },
|
||||
|
||||
// Type names.
|
||||
{ "Bool", 4, TK_BOOL_T },
|
||||
{ "Num", 3, TK_NUM_T },
|
||||
{ "String", 6, TK_STRING_T },
|
||||
{ "Array", 5, TK_ARRAY_T },
|
||||
{ "Map", 3, TK_MAP_T },
|
||||
{ "Range", 5, TK_RANGE_T },
|
||||
{ "Object", 6, TK_OBJ_T },
|
||||
{ "Function", 8, TK_FUNC_T },
|
||||
|
||||
{ NULL, (TokenType)(0) }, // Sentinal to mark the end of the array
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
VM* vm; //< Owner of the parser (for reporting errors, etc).
|
||||
|
||||
const char* source; //< Currently compiled source.
|
||||
|
||||
const char* token_start; //< Start of the currently parsed token.
|
||||
const char* current_char; //< Current char position in the source.
|
||||
int current_line; //< Line number of the current char.
|
||||
|
||||
Token previous, current, next; //< Currently parsed tokens.
|
||||
|
||||
bool has_errors; //< True if any syntex error occured at compile time.
|
||||
} Parser;
|
||||
|
||||
// Compiler Types ////////////////////////////////////////////////////////////
|
||||
|
||||
// Precedence parsing references:
|
||||
// https://en.wikipedia.org/wiki/Shunting-yard_algorithm
|
||||
// TODO: I should explicitly state wren-lang as a reference "globaly".
|
||||
|
||||
typedef enum {
|
||||
PREC_NONE,
|
||||
PREC_LOWEST,
|
||||
PREC_ASSIGNMENT, // =
|
||||
PREC_LOGICAL_OR, // or
|
||||
PREC_LOGICAL_AND, // and
|
||||
PREC_LOGICAL_NOT, // not
|
||||
PREC_EQUALITY, // == !=
|
||||
PREC_IN, // in
|
||||
PREC_IS, // is
|
||||
PREC_COMPARISION, // < > <= >=
|
||||
PREC_BITWISE_OR, // |
|
||||
PREC_BITWISE_XOR, // ^
|
||||
PREC_BITWISE_AND, // &
|
||||
PREC_BITWISE_SHIFT, // << >>
|
||||
PREC_RANGE, // ..
|
||||
PREC_TERM, // + -
|
||||
PREC_FACTOR, // * / %
|
||||
PREC_UNARY, // - ! ~
|
||||
PREC_CALL, // ()
|
||||
PREC_SUBSCRIPT, // []
|
||||
PREC_ATTRIB, // .index
|
||||
PREC_PRIMARY,
|
||||
} Precedence;
|
||||
|
||||
typedef void (*GrammarFn)(Compiler* compiler, bool can_assign);
|
||||
|
||||
typedef struct {
|
||||
GrammarFn prefix;
|
||||
GrammarFn infix;
|
||||
Precedence precedence;
|
||||
} GrammarRule;
|
||||
|
||||
typedef struct {
|
||||
const char* name; //< Directly points into the source string.
|
||||
int length; //< Length of the name.
|
||||
int depth; //< The depth the local is defined in. (-1 means global)
|
||||
} Variable;
|
||||
|
||||
typedef struct sLoop {
|
||||
|
||||
// Index of the loop's start instruction where the execution will jump
|
||||
// back to once it reach the loop end.
|
||||
int start;
|
||||
|
||||
// Index of the jump out address instruction to patch it's value once done
|
||||
// compiling the loop.
|
||||
int exit_jump;
|
||||
|
||||
// Index of the first body instruction. Needed to start patching jump
|
||||
// address from which till the loop end.
|
||||
int body;
|
||||
|
||||
// The outer loop of the current loop used to set and reset the compiler's
|
||||
// current loop context.
|
||||
struct sLoop* outer_loop;
|
||||
|
||||
} Loop;
|
||||
|
||||
struct Compiler {
|
||||
|
||||
VM* vm;
|
||||
Parser parser;
|
||||
|
||||
// Current depth the compiler in (-1 means top level) 0 means function
|
||||
// level and > 0 is inner scope.
|
||||
int scope_depth;
|
||||
|
||||
Variable variables[MAX_VARIABLES]; //< Variables in the current context.
|
||||
int var_count; //< Number of locals in [variables].
|
||||
|
||||
// TODO: compiler should mark Script* below not to be garbage collected.
|
||||
|
||||
Script* script; //< Current script.
|
||||
Loop* loop; //< Current loop.
|
||||
Function* fn; //< Current function.
|
||||
};
|
||||
|
||||
/*****************************************************************************
|
||||
* LEXING *
|
||||
*****************************************************************************/
|
||||
|
||||
// Forward declaration of lexer methods.
|
||||
|
||||
static char eatChar(Parser* parser);
|
||||
static void setNextValueToken(Parser* parser, TokenType type, Var value);
|
||||
static void setNextToken(Parser* parser, TokenType type);
|
||||
static bool matchChar(Parser* parser, char c);
|
||||
static bool matchLine(Parser* parser);
|
||||
|
||||
static void eatString(Parser* parser) {
|
||||
ByteBuffer buff;
|
||||
byteBufferInit(&buff);
|
||||
|
||||
while (true) {
|
||||
char c = eatChar(parser);
|
||||
|
||||
if (c == '"') break;
|
||||
|
||||
if (c == '\0') {
|
||||
// TODO: syntaxError()
|
||||
|
||||
// Null byte is required by TK_EOF.
|
||||
parser->current_char--;
|
||||
break;
|
||||
}
|
||||
|
||||
if (c == '\\') {
|
||||
switch (eatChar(parser)) {
|
||||
case '"': byteBufferWrite(&buff, parser->vm, '"'); break;
|
||||
case '\\': byteBufferWrite(&buff, parser->vm, '\\'); break;
|
||||
case 'n': byteBufferWrite(&buff, parser->vm, '\n'); break;
|
||||
case 'r': byteBufferWrite(&buff, parser->vm, '\r'); break;
|
||||
case 't': byteBufferWrite(&buff, parser->vm, '\t'); break;
|
||||
|
||||
default:
|
||||
// TODO: syntaxError("Error: invalid escape character")
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
byteBufferWrite(&buff, parser->vm, c);
|
||||
}
|
||||
}
|
||||
|
||||
// '\0' will be added by varNewSring();
|
||||
Var string = VAR_OBJ(&newString(parser->vm, (const char*)buff.data,
|
||||
(uint32_t)buff.count)->_super);
|
||||
|
||||
byteBufferClear(&buff, parser->vm);
|
||||
|
||||
setNextValueToken(parser, TK_STRING, string);
|
||||
}
|
||||
|
||||
// Returns the current char of the parser on.
|
||||
static char peekChar(Parser* parser) {
|
||||
return *parser->current_char;
|
||||
}
|
||||
|
||||
// Returns the next char of the parser on.
|
||||
static char peekNextChar(Parser* parser) {
|
||||
if (peekChar(parser) == '\0') return '\0';
|
||||
return *(parser->current_char + 1);
|
||||
}
|
||||
|
||||
// Advance the parser by 1 char.
|
||||
static char eatChar(Parser* parser) {
|
||||
char c = peekChar(parser);
|
||||
parser->current_char++;
|
||||
if (c == '\n') parser->current_line++;
|
||||
return c;
|
||||
}
|
||||
|
||||
// Complete lexing an identifier name.
|
||||
static void eatName(Parser* parser) {
|
||||
|
||||
char c = peekChar(parser);
|
||||
while (utilIsName(c) || utilIsDigit(c)) {
|
||||
eatChar(parser);
|
||||
c = peekChar(parser);
|
||||
}
|
||||
|
||||
const char* name_start = parser->token_start;
|
||||
|
||||
TokenType type = TK_NAME;
|
||||
|
||||
int length = (int)(parser->current_char - name_start);
|
||||
for (int i = 0; _keywords[i].identifier != NULL; i++) {
|
||||
if (_keywords[i].length == length &&
|
||||
strncmp(name_start, _keywords[i].identifier, length) == 0) {
|
||||
type = _keywords[i].tk_type;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
setNextToken(parser, type);
|
||||
}
|
||||
|
||||
// Complete lexing a number literal.
|
||||
static void eatNumber(Parser* parser) {
|
||||
|
||||
// TODO: hex, binary and scientific literals.
|
||||
|
||||
while (utilIsDigit(peekChar(parser)))
|
||||
eatChar(parser);
|
||||
|
||||
if (matchChar(parser, '.')) {
|
||||
while (utilIsDigit(peekChar(parser)))
|
||||
eatChar(parser);
|
||||
}
|
||||
|
||||
Var value = VAR_NUM(strtod(parser->token_start, NULL));
|
||||
setNextValueToken(parser, TK_NUMBER, value);
|
||||
}
|
||||
|
||||
// Read and ignore chars till it reach new line or EOF.
|
||||
static void skipLineComment(Parser* parser) {
|
||||
char c = eatChar(parser);
|
||||
|
||||
while (c != '\n' && c != '\0') {
|
||||
c = eatChar(parser);
|
||||
}
|
||||
}
|
||||
|
||||
// Will skip multiple new lines.
|
||||
static void skipNewLines(Parser* parser) {
|
||||
matchLine(parser);
|
||||
}
|
||||
|
||||
// If the current char is [c] consume it and advance char by 1 and returns
|
||||
// true otherwise returns false.
|
||||
static bool matchChar(Parser* parser, char c) {
|
||||
if (peekChar(parser) != c) return false;
|
||||
eatChar(parser);
|
||||
return true;
|
||||
}
|
||||
|
||||
// If the current char is [c] eat the char and add token two otherwise eat
|
||||
// append token one.
|
||||
static void setNextTwoCharToken(Parser* parser, char c, TokenType one,
|
||||
TokenType two) {
|
||||
if (matchChar(parser, c)) {
|
||||
setNextToken(parser, two);
|
||||
} else {
|
||||
setNextToken(parser, one);
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the next token as the type.
|
||||
static void setNextToken(Parser* parser, TokenType type) {
|
||||
parser->next.type = type;
|
||||
parser->next.start = parser->token_start;
|
||||
parser->next.length = (int)(parser->current_char - parser->token_start);
|
||||
parser->next.line = parser->current_line - ((type == TK_LINE) ? 1 : 0);
|
||||
}
|
||||
|
||||
// Initialize the next token as the type and assign the value.
|
||||
static void setNextValueToken(Parser* parser, TokenType type, Var value) {
|
||||
setNextToken(parser, type);
|
||||
parser->next.value = value;
|
||||
}
|
||||
|
||||
// Lex the next token and set it as the next token.
|
||||
static void lexToken(Parser* parser) {
|
||||
parser->previous = parser->current;
|
||||
parser->current = parser->next;
|
||||
|
||||
if (parser->current.type == TK_EOF) return;
|
||||
|
||||
while (peekChar(parser) != '\0') {
|
||||
parser->token_start = parser->current_char;
|
||||
char c = eatChar(parser);
|
||||
|
||||
switch (c) {
|
||||
case ',': setNextToken(parser, TK_COMMA); return;
|
||||
case ':': setNextToken(parser, TK_COLLON); return;
|
||||
case ';': setNextToken(parser, TK_SEMICOLLON); return;
|
||||
case '#': setNextToken(parser, TK_HASH); return;
|
||||
case '(': setNextToken(parser, TK_LPARAN); return;
|
||||
case ')': setNextToken(parser, TK_RPARAN); return;
|
||||
case '[': setNextToken(parser, TK_LBRACKET); return;
|
||||
case ']': setNextToken(parser, TK_RBRACKET); return;
|
||||
case '{': setNextToken(parser, TK_LBRACE); return;
|
||||
case '}': setNextToken(parser, TK_RBRACE); return;
|
||||
case '%': setNextToken(parser, TK_PERCENT); return;
|
||||
|
||||
case '~': setNextToken(parser, TK_TILD); return;
|
||||
case '&': setNextToken(parser, TK_AMP); return;
|
||||
case '|': setNextToken(parser, TK_PIPE); return;
|
||||
case '^': setNextToken(parser, TK_CARET); return;
|
||||
|
||||
case '\n': setNextToken(parser, TK_LINE); return;
|
||||
|
||||
case ' ':
|
||||
case '\t':
|
||||
case '\r': {
|
||||
char c = peekChar(parser);
|
||||
while (c == ' ' || c == '\t' || c == '\r') {
|
||||
eatChar(parser);
|
||||
c = peekChar(parser);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case '.': // TODO: ".5" should be a valid number.
|
||||
setNextTwoCharToken(parser, '.', TK_DOT, TK_DOTDOT);
|
||||
return;
|
||||
|
||||
case '=':
|
||||
setNextTwoCharToken(parser, '=', TK_EQ, TK_EQEQ);
|
||||
return;
|
||||
|
||||
case '!':
|
||||
setNextTwoCharToken(parser, '=', TK_NOT, TK_NOTEQ);
|
||||
return;
|
||||
|
||||
case '>':
|
||||
if (matchChar(parser, '>'))
|
||||
setNextToken(parser, TK_SRIGHT);
|
||||
else
|
||||
setNextTwoCharToken(parser, '=', TK_GT, TK_GTEQ);
|
||||
return;
|
||||
|
||||
case '<':
|
||||
if (matchChar(parser, '<'))
|
||||
setNextToken(parser, TK_SLEFT);
|
||||
else
|
||||
setNextTwoCharToken(parser, '=', TK_LT, TK_LTEQ);
|
||||
return;
|
||||
|
||||
case '+':
|
||||
setNextTwoCharToken(parser, '=', TK_PLUS, TK_PLUSEQ);
|
||||
return;
|
||||
|
||||
case '-':
|
||||
setNextTwoCharToken(parser, '=', TK_MINUS, TK_MINUSEQ);
|
||||
return;
|
||||
|
||||
case '*':
|
||||
setNextTwoCharToken(parser, '=', TK_STAR, TK_STAREQ);
|
||||
return;
|
||||
|
||||
case '/':
|
||||
setNextTwoCharToken(parser, '=', TK_FSLASH, TK_DIVEQ);
|
||||
return;
|
||||
|
||||
case '"': eatString(parser); return;
|
||||
|
||||
default: {
|
||||
|
||||
if (utilIsDigit(c)) {
|
||||
eatNumber(parser);
|
||||
} else if (utilIsName(c)) {
|
||||
eatName(parser);
|
||||
} else {
|
||||
if (c >= 32 && c <= 126) {
|
||||
// TODO: syntaxError("Invalid character %c", c);
|
||||
} else {
|
||||
// TODO: syntaxError("Invalid byte 0x%x", (uint8_t)c);
|
||||
}
|
||||
setNextToken(parser, TK_ERROR);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
setNextToken(parser, TK_EOF);
|
||||
parser->next.start = parser->current_char;
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
* PARSING *
|
||||
*****************************************************************************/
|
||||
|
||||
// Initialize the parser.
|
||||
static void parserInit(Parser* self, VM* vm, const char* source) {
|
||||
self->vm = vm;
|
||||
self->source = source;
|
||||
self->token_start = source;
|
||||
self->current_char = source;
|
||||
self->current_line = 1;
|
||||
self->has_errors = false;
|
||||
|
||||
self->next.type = TK_ERROR;
|
||||
self->next.start = NULL;
|
||||
self->next.length = 0;
|
||||
self->next.line = 1;
|
||||
self->next.value = VAR_UNDEFINED;
|
||||
}
|
||||
|
||||
// Returns current token type.
|
||||
static TokenType peek(Parser* self) {
|
||||
return self->current.type;
|
||||
}
|
||||
|
||||
// Returns next token type.
|
||||
static TokenType peekNext(Parser* self) {
|
||||
return self->next.type;
|
||||
}
|
||||
|
||||
// Consume the current token if it's expected and lex for the next token
|
||||
// and return true otherwise reutrn false. It'll skips all the new lines
|
||||
// inbetween thus matching TK_LINE is invalid.
|
||||
static bool match(Parser* self, TokenType expected) {
|
||||
ASSERT(expected != TK_LINE, "Can't match TK_LINE.");
|
||||
matchLine(self);
|
||||
|
||||
if (peek(self) != expected) return false;
|
||||
lexToken(self);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Match one or more lines and return true if there any.
|
||||
static bool matchLine(Parser* parser) {
|
||||
if (peek(parser) != TK_LINE) return false;
|
||||
while (peek(parser) == TK_LINE)
|
||||
lexToken(parser);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Match semi collon or multiple new lines.
|
||||
static void matchEndStatement(Parser* parser) {
|
||||
|
||||
// Semi collon must be on the same line.
|
||||
if (peek(parser) == TK_SEMICOLLON)
|
||||
match(parser, TK_SEMICOLLON);
|
||||
|
||||
matchLine(parser);
|
||||
}
|
||||
|
||||
// Match optional "do" keyword and new lines.
|
||||
static void matchStartBlock(Parser* parser) {
|
||||
|
||||
// "do" must be on the same line.
|
||||
if (peek(parser) == TK_DO)
|
||||
match(parser, TK_DO);
|
||||
|
||||
matchLine(parser);
|
||||
}
|
||||
|
||||
// Consume the the current token and if it's not [expected] emits error log
|
||||
// and continue parsing for more error logs. It'll skips all the new lines
|
||||
// inbetween thus matching TK_LINE is invald.
|
||||
static void consume(Parser* self, TokenType expected, const char* err_msg) {
|
||||
ASSERT(expected != TK_LINE, "Can't match TK_LINE.");
|
||||
matchLine(self);
|
||||
|
||||
lexToken(self);
|
||||
if (self->previous.type != expected) {
|
||||
// TODO: syntaxError(err_msg);
|
||||
|
||||
// If the next token is expected discard the current to minimize
|
||||
// cascaded errors and continue parsing.
|
||||
if (peek(self) == expected) {
|
||||
lexToken(self);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
* PARSING GRAMMAR *
|
||||
*****************************************************************************/
|
||||
|
||||
// Forward declaration of grammar functions.
|
||||
|
||||
static void exprAssignment(Compiler* compiler, bool can_assign);
|
||||
|
||||
// Bool, Num, String, Null, -and- bool_t, Array_t, String_t, ...
|
||||
static void exprLiteral(Compiler* compiler, bool can_assign);
|
||||
static void exprName(Compiler* compiler, bool can_assign);
|
||||
|
||||
|
||||
static void exprBinaryOp(Compiler* compiler, bool can_assign);
|
||||
static void exprUnaryOp(Compiler* compiler, bool can_assign);
|
||||
|
||||
static void exprGrouping(Compiler* compiler, bool can_assign);
|
||||
static void exprArray(Compiler* compiler, bool can_assign);
|
||||
static void exprMap(Compiler* compiler, bool can_assign);
|
||||
|
||||
static void exprCall(Compiler* compiler, bool can_assign);
|
||||
static void exprAttrib(Compiler* compiler, bool can_assign);
|
||||
static void exprSubscript(Compiler* compiler, bool can_assign);
|
||||
|
||||
#define NO_RULE { NULL, NULL, PREC_NONE }
|
||||
#define NO_INFIX PREC_NONE
|
||||
|
||||
GrammarRule rules[] = { // Prefix Infix Infix Precedence
|
||||
/* TK_ERROR */ NO_RULE,
|
||||
/* TK_EOF */ NO_RULE,
|
||||
/* TK_LINE */ NO_RULE,
|
||||
/* TK_DOT */ { exprAttrib, NULL, PREC_ATTRIB },
|
||||
/* TK_DOTDOT */ { NULL, exprBinaryOp, PREC_RANGE },
|
||||
/* TK_COMMA */ NO_RULE,
|
||||
/* TK_COLLON */ NO_RULE,
|
||||
/* TK_SEMICOLLON */ NO_RULE,
|
||||
/* TK_HASH */ NO_RULE,
|
||||
/* TK_LPARAN */ { exprGrouping, exprCall, PREC_CALL },
|
||||
/* TK_RPARAN */ NO_RULE,
|
||||
/* TK_LBRACKET */ { exprArray, exprSubscript, PREC_SUBSCRIPT },
|
||||
/* TK_RBRACKET */ NO_RULE,
|
||||
/* TK_LBRACE */ { exprMap, NULL, NO_INFIX },
|
||||
/* TK_RBRACE */ NO_RULE,
|
||||
/* TK_PERCENT */ { NULL, exprBinaryOp, PREC_FACTOR },
|
||||
/* TK_TILD */ { exprUnaryOp, NULL, NO_INFIX },
|
||||
/* TK_AMP */ { NULL, exprBinaryOp, PREC_BITWISE_AND },
|
||||
/* TK_PIPE */ { NULL, exprBinaryOp, PREC_BITWISE_OR },
|
||||
/* TK_CARET */ { NULL, exprBinaryOp, PREC_BITWISE_XOR },
|
||||
/* TK_PLUS */ { NULL, exprBinaryOp, PREC_TERM },
|
||||
/* TK_MINUS */ { NULL, exprBinaryOp, PREC_TERM },
|
||||
/* TK_STAR */ { NULL, exprBinaryOp, PREC_FACTOR },
|
||||
/* TK_FSLASH */ { NULL, exprBinaryOp, PREC_FACTOR },
|
||||
/* TK_BSLASH */ NO_RULE,
|
||||
/* TK_EQ */ { NULL, exprAssignment, PREC_ASSIGNMENT },
|
||||
/* TK_GT */ { NULL, exprBinaryOp, PREC_COMPARISION },
|
||||
/* TK_LT */ { NULL, exprBinaryOp, PREC_COMPARISION },
|
||||
/* TK_EQEQ */ { NULL, exprBinaryOp, PREC_EQUALITY },
|
||||
/* TK_NOTEQ */ { NULL, exprBinaryOp, PREC_EQUALITY },
|
||||
/* TK_GTEQ */ { NULL, exprBinaryOp, PREC_COMPARISION },
|
||||
/* TK_LTEQ */ { NULL, exprBinaryOp, PREC_COMPARISION },
|
||||
/* TK_PLUSEQ */ { NULL, exprAssignment, PREC_ASSIGNMENT },
|
||||
/* TK_MINUSEQ */ { NULL, exprAssignment, PREC_ASSIGNMENT },
|
||||
/* TK_STAREQ */ { NULL, exprAssignment, PREC_ASSIGNMENT },
|
||||
/* TK_DIVEQ */ { NULL, exprAssignment, PREC_ASSIGNMENT },
|
||||
/* TK_SRIGHT */ { NULL, exprBinaryOp, PREC_BITWISE_SHIFT },
|
||||
/* TK_SLEFT */ { NULL, exprBinaryOp, PREC_BITWISE_SHIFT },
|
||||
/* TK_IMPORT */ NO_RULE,
|
||||
/* TK_ENUM */ NO_RULE,
|
||||
/* TK_DEF */ NO_RULE,
|
||||
/* TK_EXTERN */ NO_RULE,
|
||||
/* TK_END */ NO_RULE,
|
||||
/* TK_NULL */ NO_RULE,
|
||||
/* TK_SELF */ NO_RULE,
|
||||
/* TK_IS */ { NULL, exprBinaryOp, PREC_IS },
|
||||
/* TK_IN */ { NULL, exprBinaryOp, PREC_IN },
|
||||
/* TK_AND */ { NULL, exprBinaryOp, PREC_LOGICAL_AND },
|
||||
/* TK_OR */ { NULL, exprBinaryOp, PREC_LOGICAL_OR },
|
||||
/* TK_NOT */ { NULL, exprUnaryOp, PREC_LOGICAL_NOT },
|
||||
/* TK_TRUE */ { exprLiteral, NULL, NO_INFIX },
|
||||
/* TK_FALSE */ { exprLiteral, NULL, NO_INFIX },
|
||||
/* TK_BOOL_T */ { exprLiteral, NULL, NO_INFIX },
|
||||
/* TK_NUM_T */ { exprLiteral, NULL, NO_INFIX },
|
||||
/* TK_STRING_T */ { exprLiteral, NULL, NO_INFIX },
|
||||
/* TK_ARRAY_T */ { exprLiteral, NULL, NO_INFIX },
|
||||
/* TK_MAP_T */ { exprLiteral, NULL, NO_INFIX },
|
||||
/* TK_RANGE_T */ { exprLiteral, NULL, NO_INFIX },
|
||||
/* TK_FUNC_T */ { exprLiteral, NULL, NO_INFIX },
|
||||
/* TK_OBJ_T */ { exprLiteral, NULL, NO_INFIX },
|
||||
/* TK_DO */ NO_RULE,
|
||||
/* TK_WHILE */ NO_RULE,
|
||||
/* TK_FOR */ NO_RULE,
|
||||
/* TK_IF */ NO_RULE,
|
||||
/* TK_ELIF */ NO_RULE,
|
||||
/* TK_ELSE */ NO_RULE,
|
||||
/* TK_BREAK */ NO_RULE,
|
||||
/* TK_CONTINUE */ NO_RULE,
|
||||
/* TK_RETURN */ NO_RULE,
|
||||
/* TK_NAME */ { exprName, NULL, NO_INFIX },
|
||||
/* TK_NUMBER */ { exprLiteral, NULL, NO_INFIX },
|
||||
/* TK_STRING */ { exprLiteral, NULL, NO_INFIX },
|
||||
};
|
||||
|
||||
static GrammarRule* getRule(TokenType type) {
|
||||
return &(rules[(int)type]);
|
||||
}
|
||||
|
||||
static void exprAssignment(Compiler* compiler, bool can_assign) { /*TODO*/ }
|
||||
|
||||
static void exprLiteral(Compiler* compiler, bool can_assign) { /*TODO*/ }
|
||||
static void exprName(Compiler* compiler, bool can_assign) { /*TODO*/ }
|
||||
|
||||
|
||||
static void exprBinaryOp(Compiler* compiler, bool can_assign) { /*TODO*/ }
|
||||
static void exprUnaryOp(Compiler* compiler, bool can_assign) { /*TODO*/ }
|
||||
|
||||
static void exprGrouping(Compiler* compiler, bool can_assign) { /*TODO*/ }
|
||||
static void exprArray(Compiler* compiler, bool can_assign) { /*TODO*/ }
|
||||
static void exprMap(Compiler* compiler, bool can_assign) { /*TODO*/ }
|
||||
|
||||
static void exprCall(Compiler* compiler, bool can_assign) { /*TODO*/ }
|
||||
static void exprAttrib(Compiler* compiler, bool can_assign) { /*TODO*/ }
|
||||
static void exprSubscript(Compiler* compiler, bool can_assign) { /*TODO*/ }
|
||||
|
||||
|
||||
/*****************************************************************************
|
||||
* COMPILING *
|
||||
*****************************************************************************/
|
||||
|
||||
// Used in searching for local variables.
|
||||
typedef enum {
|
||||
SCOPE_ANY = -3,
|
||||
SCOPE_CURRENT,
|
||||
} ScopeType;
|
||||
|
||||
// Result type for an identifier definition.
|
||||
typedef enum {
|
||||
NAME_NOT_DEFINED,
|
||||
NAME_LOCAL_VAR, //< Including parameter.
|
||||
NAME_GLOBAL_VAR,
|
||||
NAME_SCRIPT_FN,
|
||||
} NameDefnType;
|
||||
|
||||
// Identifier search result.
|
||||
typedef struct {
|
||||
|
||||
NameDefnType type;
|
||||
|
||||
// Could be found in one of the imported script or in it's imported script
|
||||
// recursively. If true [_extern] will be the script ID.
|
||||
bool is_extern;
|
||||
|
||||
// Extern script's ID.
|
||||
ID _extern;
|
||||
|
||||
union {
|
||||
int local;
|
||||
int global;
|
||||
int func;
|
||||
} index;
|
||||
|
||||
} NameSearchResult;
|
||||
|
||||
static void compilerInit(Compiler* compiler, VM* vm, const char* source) {
|
||||
parserInit(&compiler->parser, vm, source);
|
||||
compiler->vm = vm;
|
||||
compiler->scope_depth = -1;
|
||||
compiler->var_count = 0;
|
||||
Loop* loop = NULL;
|
||||
Function* fn = NULL;
|
||||
}
|
||||
|
||||
// Search for the name through compiler's variables. Returns -1 if not found.
|
||||
static int compilerSearchVariables(Compiler* compiler, const char* name,
|
||||
int length, ScopeType scope) {
|
||||
|
||||
for (int i = 0; i < compiler->var_count; i++) {
|
||||
Variable* variable = &compiler->variables[i];
|
||||
if (scope == SCOPE_CURRENT &&
|
||||
compiler->scope_depth != variable->depth) {
|
||||
continue;
|
||||
}
|
||||
if (variable->length == length &&
|
||||
strncmp(variable->name, name, length) == 0) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Will check if the name already defined.
|
||||
static NameSearchResult compilerSearchName(Compiler* compiler,
|
||||
const char* name, int length) {
|
||||
// TODO:
|
||||
NameSearchResult result;
|
||||
result.type = NAME_NOT_DEFINED;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Add a variable and return it's index to the context. Assumes that the
|
||||
// variable name is unique and not defined before in the current scope.
|
||||
static int compilerAddVariable(Compiler* compiler, const char* name,
|
||||
int length) {
|
||||
Variable* variable = &compiler->variables[compiler->var_count];
|
||||
variable->name = name;
|
||||
variable->length = length;
|
||||
variable->depth = compiler->scope_depth;
|
||||
return compiler->var_count++;
|
||||
}
|
||||
|
||||
static void compileFunction(Compiler* compiler, bool is_native) {
|
||||
|
||||
Parser* parser = &compiler->parser;
|
||||
|
||||
consume(&compiler->parser, TK_NAME, "Expected a function name.");
|
||||
|
||||
const char* name_start = parser->previous.start;
|
||||
int name_length = parser->previous.length;
|
||||
NameSearchResult result = compilerSearchName(compiler, name_start,
|
||||
name_length);
|
||||
|
||||
if (result.type != NAME_NOT_DEFINED) {
|
||||
// TODO: multiple definition error();
|
||||
}
|
||||
|
||||
int index = nameTableAdd(&compiler->script->function_names, compiler->vm,
|
||||
name_start, name_length);
|
||||
|
||||
Function* func = newFunction(compiler->vm, nameTableGet(
|
||||
&compiler->script->function_names, index), compiler->script, is_native);
|
||||
|
||||
vmPushTempRef(compiler->vm, &func->_super);
|
||||
functionBufferWrite(&compiler->script->functions, compiler->vm, func);
|
||||
vmPopTempRef(compiler->vm);
|
||||
|
||||
compiler->fn = func;
|
||||
|
||||
consume(parser, TK_LPARAN, "Expected '(' after function name.");
|
||||
|
||||
compiler->scope_depth++; // Parameter scope.
|
||||
|
||||
// Compile parameter list.
|
||||
while (match(parser, TK_NAME)) {
|
||||
int predef = compilerSearchVariables(compiler, parser->previous.start,
|
||||
parser->previous.length, SCOPE_CURRENT);
|
||||
if (predef != -1) {
|
||||
// TODO: error("Multiple definition of a parameter");
|
||||
}
|
||||
match(parser, TK_COMMA);
|
||||
}
|
||||
|
||||
consume(parser, TK_RPARAN, "Expected ')' after parameters end.");
|
||||
matchEndStatement(parser);
|
||||
|
||||
if (is_native) { // Done here.
|
||||
compiler->scope_depth--; // Parameter scope.
|
||||
compiler->fn = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: Compile body.
|
||||
|
||||
compiler->scope_depth--; // Parameter scope.
|
||||
compiler->fn = NULL;
|
||||
}
|
||||
|
||||
Script* compileSource(VM* vm, const char* source) {
|
||||
|
||||
// Skip utf8 BOM if there is any.
|
||||
if (strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3;
|
||||
|
||||
Compiler compiler;
|
||||
compilerInit(&compiler, vm, source);
|
||||
|
||||
Script* script = newScript(vm);
|
||||
compiler.script = script;
|
||||
|
||||
// Parser pointer for quick access.
|
||||
Parser* parser = &compiler.parser;
|
||||
|
||||
// Lex initial tokens. current <-- next.
|
||||
lexToken(parser);
|
||||
lexToken(parser);
|
||||
skipNewLines(parser);
|
||||
|
||||
while (!match(parser, TK_EOF)) {
|
||||
|
||||
if (match(parser, TK_NATIVE)) {
|
||||
compileFunction(&compiler, true);
|
||||
|
||||
} else if (match(parser, TK_DEF)) {
|
||||
compileFunction(&compiler, false);
|
||||
|
||||
} else if (match(parser, TK_IMPORT)) {
|
||||
// TODO:
|
||||
|
||||
} else {
|
||||
// name = value # Variable defn.
|
||||
// name() # statement
|
||||
}
|
||||
}
|
||||
}
|
17
src/compiler.h
Normal file
17
src/compiler.h
Normal file
@ -0,0 +1,17 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Thakee Nathees
|
||||
* Licensed under: MIT License
|
||||
*/
|
||||
|
||||
#ifndef COMPILER_H
|
||||
#define COMPILER_H
|
||||
|
||||
#include "common.h"
|
||||
#include "var.h"
|
||||
|
||||
typedef struct Compiler Compiler;
|
||||
|
||||
Script* compileSource(VM* vm, const char* source);
|
||||
|
||||
|
||||
#endif // COMPILER_H
|
6
src/types/.gitignore
vendored
Normal file
6
src/types/.gitignore
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
|
||||
## Ignore all generated source files.
|
||||
*.gen.h
|
||||
*.gen.c
|
||||
gen/
|
||||
*.bat
|
44
src/types/buffer.template.c
Normal file
44
src/types/buffer.template.c
Normal file
@ -0,0 +1,44 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Thakee Nathees
|
||||
* Licensed under: MIT License
|
||||
*/
|
||||
|
||||
/** A template header to emulate C++ template and every occurence of
|
||||
* $name$ will be replaced by the name of the buffer and $type$ will be
|
||||
* replaced by the element type of the buffer (by a pre compile script) */
|
||||
|
||||
// Replace the following line with "$name$_buffer.h"
|
||||
#include "buffer.template.h"
|
||||
#include "../utils.h"
|
||||
#include "../vm.h"
|
||||
|
||||
void $name_l$BufferInit($name$Buffer* self) {
|
||||
self->data = NULL;
|
||||
self->count = 0;
|
||||
self->capacity = 0;
|
||||
}
|
||||
|
||||
void $name_l$BufferClear($name$Buffer* self, VM* vm) {
|
||||
vmRealloc(vm, self->data, self->capacity * sizeof($type$), 0);
|
||||
self->data = NULL;
|
||||
self->count = 0;
|
||||
self->capacity = 0;
|
||||
}
|
||||
|
||||
void $name_l$BufferFill($name$Buffer* self, VM* vm, $type$ data, int count) {
|
||||
|
||||
if (self->capacity < self->count + count) {
|
||||
int capacity = utilPowerOf2Ceil((int)self->count + count);
|
||||
self->data = ($type$*)vmRealloc(vm, self->data,
|
||||
self->capacity * sizeof($type$), capacity * sizeof($type$));
|
||||
self->capacity = capacity;
|
||||
}
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
self->data[self->count++] = data;
|
||||
}
|
||||
}
|
||||
|
||||
void $name_l$BufferWrite($name$Buffer* self, VM* vm, $type$ data) {
|
||||
$name_l$BufferFill(self, vm, data, 1);
|
||||
}
|
45
src/types/buffer.template.h
Normal file
45
src/types/buffer.template.h
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Thakee Nathees
|
||||
* Licensed under: MIT License
|
||||
*/
|
||||
|
||||
/** A template header to emulate C++ template and every occurence of
|
||||
* $name$ will be replaced by the name of the buffer and $type$ will be
|
||||
* replaced by the element type of the buffer (by a pre compile script) */
|
||||
|
||||
#ifndef $name_u$_BUFFER_H
|
||||
#define $name_u$_BUFFER_H
|
||||
|
||||
#include "../common.h"
|
||||
#include "miniscript.h"
|
||||
|
||||
// The factor by which the buffer will grow when it's capacity reached.
|
||||
#define GROW_FACTOR 2
|
||||
|
||||
// The initial capacity of the buffer.
|
||||
#define MIN_CAPACITY 16
|
||||
|
||||
// A place holder typedef to prevent IDE syntax errors. Remove this line
|
||||
// when generating the source.
|
||||
typedef uint8_t $type$;
|
||||
|
||||
typedef struct {
|
||||
$type$* data;
|
||||
size_t count;
|
||||
size_t capacity;
|
||||
} $name$Buffer;
|
||||
|
||||
// Initialize a new buffer int instance.
|
||||
void $name_l$BufferInit($name$Buffer* self);
|
||||
|
||||
// Clears the allocated elementes from the VM's realloc function.
|
||||
void $name_l$BufferClear($name$Buffer* self, VM* vm);
|
||||
|
||||
// Fill the buffer at the end of it with provided data if the capacity isn't
|
||||
// enough using VM's realloc function.
|
||||
void $name_l$BufferFill($name$Buffer* self, VM* vm, $type$ data, int count);
|
||||
|
||||
// Write to the buffer with provided data at the end of the buffer.
|
||||
void $name_l$BufferWrite($name$Buffer* self, VM* vm, $type$ data);
|
||||
|
||||
#endif // $name_u$_BUFFER_H
|
101
src/types/buffergen.py
Normal file
101
src/types/buffergen.py
Normal file
@ -0,0 +1,101 @@
|
||||
from pathlib import Path ## python 3.4
|
||||
import shutil
|
||||
import os, sys
|
||||
|
||||
## usage buffergen.py [--clean]
|
||||
|
||||
SCRIPT_PATH = Path(os.path.realpath(__file__))
|
||||
ROOT = str(SCRIPT_PATH.parent)
|
||||
|
||||
GEN_LIST = [
|
||||
## name type
|
||||
('Int', 'int'),
|
||||
('Byte', 'uint8_t'),
|
||||
('Var', 'Var'),
|
||||
('String', 'String*'),
|
||||
('Function', 'Function*'),
|
||||
]
|
||||
|
||||
def log(msg):
|
||||
print('[buffergen.py]', msg)
|
||||
|
||||
def gen():
|
||||
cwd = os.getcwd()
|
||||
os.chdir(ROOT)
|
||||
_gen()
|
||||
os.chdir(cwd)
|
||||
return 0
|
||||
|
||||
def clean():
|
||||
cwd = os.getcwd()
|
||||
os.chdir(ROOT)
|
||||
_clean()
|
||||
os.chdir(cwd)
|
||||
return 0
|
||||
|
||||
def _replace(text, _data):
|
||||
text = text.replace('$name$', _data[0])
|
||||
text = text.replace('$name_l$', _data[0].lower())
|
||||
text = text.replace('$name_u$', _data[0].upper())
|
||||
text = text.replace('$type$', _data[1])
|
||||
|
||||
## Fix relative imports.
|
||||
text = text.replace('../vm.h', '../../vm.h')
|
||||
text = text.replace('../utils.h', '../../utils.h')
|
||||
text = text.replace('../common.h', '../../common.h')
|
||||
|
||||
return text
|
||||
|
||||
def _gen():
|
||||
|
||||
header = ''
|
||||
source = ''
|
||||
with open('buffer.template.h', 'r') as f:
|
||||
header = f.read()
|
||||
with open('buffer.template.c', 'r') as f:
|
||||
source = f.read()
|
||||
|
||||
for _data in GEN_LIST:
|
||||
_header = header.replace('''\
|
||||
// A place holder typedef to prevent IDE syntax errors. Remove this line
|
||||
// when generating the source.
|
||||
typedef uint8_t $type$;
|
||||
''', '')
|
||||
_header = _replace(_header, _data)
|
||||
|
||||
_source = source.replace('''\
|
||||
// Replace the following line with "$name$_buffer.h"
|
||||
#include "buffer.template.h"''', '#include "%s_buffer.h"' % _data[0].lower())
|
||||
_source = _replace(_source, _data)
|
||||
|
||||
if not os.path.exists('gen/'):
|
||||
os.mkdir('gen/')
|
||||
|
||||
with open('gen/' + _data[0].lower() + '_buffer.h', 'w') as f:
|
||||
f.write(_header)
|
||||
log(_data[0].lower() + '_buffer.h' + ' generated' )
|
||||
with open('gen/' + _data[0].lower() + '_buffer.c', 'w') as f:
|
||||
f.write(_source)
|
||||
log(_data[0].lower() + '_buffer.c' + ' generated' )
|
||||
|
||||
|
||||
def _clean():
|
||||
shutil.rmtree('gen/')
|
||||
log("Buffer source files removed")
|
||||
|
||||
|
||||
def error_exit(msg):
|
||||
print("Error: %s\n\tusage buffergen.py [--clean]" % msg)
|
||||
exit(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) > 2:
|
||||
error_exit("invalid arg count")
|
||||
if len(sys.argv) == 2:
|
||||
if sys.argv[1] == '--clean':
|
||||
_clean()
|
||||
else:
|
||||
error_exit("unknown argument")
|
||||
else:
|
||||
_gen()
|
||||
exit(0)
|
31
src/types/name_table.c
Normal file
31
src/types/name_table.c
Normal file
@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Thakee Nathees
|
||||
* Licensed under: MIT License
|
||||
*/
|
||||
|
||||
#include "name_table.h"
|
||||
#include "../var.h"
|
||||
#include "../vm.h"
|
||||
|
||||
void nameTableInit(NameTable* self) {
|
||||
stringBufferInit(self);
|
||||
}
|
||||
|
||||
void nameTableClear(NameTable* self, VM* vm) {
|
||||
stringBufferClear(self, vm);
|
||||
}
|
||||
|
||||
int nameTableAdd(NameTable* self, VM* vm, const char* name, size_t length) {
|
||||
String* string = newString(vm, name, (uint32_t)length);
|
||||
|
||||
vmPushTempRef(vm, &string->_super);
|
||||
stringBufferWrite(self, vm, string);
|
||||
vmPopTempRef(vm);
|
||||
|
||||
return (int)(self->count - 1);
|
||||
}
|
||||
|
||||
const char* nameTableGet(NameTable* self, int index) {
|
||||
ASSERT(0 <= index && index < self->count, "Index out of bounds.");
|
||||
return self->data[index]->data;
|
||||
}
|
27
src/types/name_table.h
Normal file
27
src/types/name_table.h
Normal file
@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Thakee Nathees
|
||||
* Licensed under: MIT License
|
||||
*/
|
||||
|
||||
#ifndef SYMBOL_TABLE_H
|
||||
#define SYMBOL_TABLE_H
|
||||
|
||||
// Symbol table maps the names to it's member indecies in the VarBuffer.
|
||||
#include "gen/string_buffer.h"
|
||||
|
||||
// TODO: Change this to use Map.
|
||||
typedef StringBuffer NameTable;
|
||||
|
||||
// Initialize the symbol table.
|
||||
void nameTableInit(NameTable* self);
|
||||
|
||||
// Remove the elements of the symbol table.
|
||||
void nameTableClear(NameTable* self, VM* vm);
|
||||
|
||||
// Add a name to the name table and return the index of the name in the table.
|
||||
int nameTableAdd(NameTable* self, VM* vm, const char* name, size_t length);
|
||||
|
||||
// Return name at index.
|
||||
const char* nameTableGet(NameTable* self, int index);
|
||||
|
||||
#endif // SYMBOL_TABLE_H
|
144
src/utils.c
Normal file
144
src/utils.c
Normal file
@ -0,0 +1,144 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Thakee Nathees
|
||||
* Licensed under: MIT License
|
||||
*/
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
int utilPowerOf2Ceil(int n) {
|
||||
n--;
|
||||
n |= n >> 1;
|
||||
n |= n >> 2;
|
||||
n |= n >> 4;
|
||||
n |= n >> 8;
|
||||
n |= n >> 16;
|
||||
n++;
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
bool utilIsName(char c) {
|
||||
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || (c == '_');
|
||||
}
|
||||
|
||||
bool utilIsDigit(char c) {
|
||||
return ('0' <= c && c <= '9');
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
* UTF8 *
|
||||
****************************************************************************/
|
||||
|
||||
#define B1(first) 0b##first
|
||||
#define B2(first, last) 0b##first##last
|
||||
#define B3(first, second, last) 0b##first##second##last
|
||||
#define B4(first, second, third, last) 0b##first##second##third##last
|
||||
|
||||
int utf8_encodeBytesCount(int value) {
|
||||
if (value <= 0x7f) return 1;
|
||||
if (value <= 0x7ff) return 2;
|
||||
if (value <= 0xffff) return 3;
|
||||
if (value <= 0x10ffff) return 4;
|
||||
|
||||
// if we're here means it's an invalid leading byte
|
||||
return 0;
|
||||
}
|
||||
|
||||
int utf8_decodeBytesCount(uint8_t byte) {
|
||||
|
||||
if ((byte >> 7) == 0b0) return 1;
|
||||
if ((byte >> 6) == 0b10) return 1; //< continuation byte
|
||||
if ((byte >> 5) == 0b110) return 2;
|
||||
if ((byte >> 4) == 0b1110) return 3;
|
||||
if ((byte >> 3) == 0b11110) return 4;
|
||||
|
||||
// if we're here means it's an invalid utf8 byte
|
||||
return 1;
|
||||
}
|
||||
|
||||
int utf8_encodeValue(int value, uint8_t* bytes) {
|
||||
|
||||
if (value <= 0x7f) {
|
||||
*bytes = value & 0x7f;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// 2 byte character 110xxxxx 10xxxxxx -> last 6 bits write to 2nd byte and
|
||||
// first 5 bit write to first byte
|
||||
if (value <= 0x7ff) {
|
||||
*(bytes++) = B2(110, 00000) | ((value & B2(11111, 000000)) >> 6);
|
||||
*(bytes) = B2(10, 000000) | ((value & B1(111111)));
|
||||
return 2;
|
||||
}
|
||||
|
||||
// 3 byte character 1110xxxx 10xxxxxx 10xxxxxx -> from last, 6 bits write
|
||||
// to 3rd byte, next 6 bits write to 2nd byte, and 4 bits to first byte.
|
||||
if (value <= 0xffff) {
|
||||
*(bytes++) = B2(1110, 0000) | ((value & B3(1111, 000000, 000000)) >> 12);
|
||||
*(bytes++) = B2(10, 000000) | ((value & B2(111111, 000000)) >> 6);
|
||||
*(bytes) = B2(10, 000000) | ((value & B1(111111)));
|
||||
return 3;
|
||||
}
|
||||
|
||||
// 4 byte character 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx -> last 6 bits to
|
||||
// to 4th byte, next 6 bits to 3rd byte, next 6 bits to 2nd byte, 3 bits
|
||||
// first byte.
|
||||
if (value <= 0x10ffff) {
|
||||
*(bytes++) = B2(11110, 000) | ((value & B4(111, 000000, 000000, 000000)) >> 18);
|
||||
*(bytes++) = B2(10, 000000) | ((value & B3(111111, 000000, 000000)) >> 12);
|
||||
*(bytes++) = B2(10, 000000) | ((value & B2(111111, 000000)) >> 6);
|
||||
*(bytes) = B2(10, 000000) | ((value & B1(111111)));
|
||||
return 4;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int utf8_decodeBytes(uint8_t* bytes, int* value) {
|
||||
|
||||
int continue_bytes = 0;
|
||||
int byte_count = 1;
|
||||
int _value = 0;
|
||||
|
||||
if ((*bytes & B2(11, 000000)) == B2(10, 000000)) {
|
||||
*value = *bytes;
|
||||
return byte_count;
|
||||
}
|
||||
|
||||
else if ((*bytes & B2(111, 00000)) == B2(110, 00000)) {
|
||||
continue_bytes = 1;
|
||||
_value = (*bytes & B1(11111));
|
||||
}
|
||||
|
||||
else if ((*bytes & B2(1111, 0000)) == B2(1110, 0000)) {
|
||||
continue_bytes = 2;
|
||||
_value = (*bytes & B1(1111));
|
||||
}
|
||||
|
||||
else if ((*bytes & B2(11111, 000)) == B2(11110, 000)) {
|
||||
continue_bytes = 3;
|
||||
_value = (*bytes & B1(111));
|
||||
}
|
||||
|
||||
else {
|
||||
// Invalid leading byte
|
||||
return -1;
|
||||
}
|
||||
|
||||
// now add the continuation bytes to the _value
|
||||
while (continue_bytes--) {
|
||||
bytes++, byte_count++;
|
||||
|
||||
if ((*bytes & B2(11, 000000)) != B2(10, 000000)) return -1;
|
||||
|
||||
_value = (_value << 6) | (*bytes & B2(00, 111111));
|
||||
}
|
||||
|
||||
*value = _value;
|
||||
return byte_count;
|
||||
}
|
||||
|
||||
#undef B1
|
||||
#undef B2
|
||||
#undef B3
|
||||
#undef B4
|
112
src/utils.h
Normal file
112
src/utils.h
Normal file
@ -0,0 +1,112 @@
|
||||
/*
|
||||
* Copyright (c) 2020-2021 Thakee Nathees
|
||||
* Licensed under: MIT License
|
||||
*/
|
||||
|
||||
#ifndef UTILS_H
|
||||
#define UTILS_H
|
||||
|
||||
#include "common.h"
|
||||
|
||||
// Returns the smallest power of two that is equal to or greater than [n].
|
||||
// Copyied from : https://github.com/wren-lang/wren/blob/main/src/vm/wren_utils.h#L119
|
||||
// Reference : http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2Float
|
||||
int utilPowerOf2Ceil(int n);
|
||||
|
||||
// Returns true if `c` is [A-Za-z_].
|
||||
bool utilIsName(char c);
|
||||
|
||||
// Returns true if `c` is [0-9].
|
||||
bool utilIsDigit(char c);
|
||||
|
||||
#endif // UTILS_H
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* UTF8 *
|
||||
****************************************************************************/
|
||||
|
||||
|
||||
#ifndef UTF8_H
|
||||
#define UTF8_H
|
||||
|
||||
/** @file
|
||||
* A tiny UTF-8 utility library.
|
||||
*
|
||||
*
|
||||
* Utf-8 is an elegant character encoding which I just love it's simplicity,
|
||||
* and compatibility It's just a wonderful hack of all time. A single byte
|
||||
* length utf-8 character is the same as an ASCII character. In case if you
|
||||
* don't know about ASCII encoding it's just how a character is represented in
|
||||
* a single byte. For an example the character 'A' is 01000001, 'B' is 01000010
|
||||
* and so on. The first bit in is always 0 called parity bit, it's a way to
|
||||
* check if some of the bits have flipped by noice back in the old age of
|
||||
* computers. Parity bit should be equal to the sum of the rest of the bits mod
|
||||
* 2. So we have 7 bits to represent ASCII which is 127 different characters.
|
||||
* But utf-8 can potentially encode 2,164,864 characters.
|
||||
*
|
||||
* The length of a utf-8 character would vary from 1 to 4. If it's a single
|
||||
* byte character, it's starts with a 0 and rest of the 7 bytes have the
|
||||
* value. It's not just like ASCII, it is ASCII (compatable). For the 2 bytes
|
||||
* character the first byte starts with 110....., for the 3 bytes character
|
||||
* it's starts with 1110.... and for the 4 byte it's 11110... The first byte
|
||||
* is called the leading byte and the rest of the bytes of the character is
|
||||
* called continuation bytes.
|
||||
*
|
||||
* <pre>
|
||||
* example:
|
||||
* v-- leading byte v-- continuation byte => 2 bytes
|
||||
* é = 11000011 10101001
|
||||
* ^^^ ^^
|
||||
* 110 means 2 bytes 10 means continuation
|
||||
*
|
||||
* (note that the character é is 8 bit long with ANSI encoding)
|
||||
* </pre>
|
||||
*
|
||||
* USAGE:
|
||||
* // define imlpementation only a single *.c source file like this
|
||||
* #define UTF8_IMPLEMENT
|
||||
* #include "utf8.h"
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
// Returns the number of bytes the the [value] would take to encode. returns 0
|
||||
// if the value is invalid utf8 representation.
|
||||
//
|
||||
// <pre>
|
||||
// For single byte character, represented as 0xxxxxxx
|
||||
// the payload is 7 bytes so the maximum value would be 0x7f
|
||||
//
|
||||
// For 2 bytes characters, represented as 110xxxxx 10xxxxxx
|
||||
// the payload is 11 bits | xxx xxxx xxxx |
|
||||
// so the maximum value would be 0x7ff | 7 f f |
|
||||
//
|
||||
// For 3 bytes character, represented as 1110xxxx 10xxxxxx 10xxxxxx
|
||||
// the payload is 16 bits | xxxx xxxx xxxx xxxx |
|
||||
// so the maximum value would be 0xffff | f f f f |
|
||||
//
|
||||
// For 4 bytes character, represented as 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
// the payload is 21 bits | x xxxx xxxx xxxx xxxx xxxx |
|
||||
// so the maximum value *SHOULD* be 0x1fffff | 1 f f f f f |
|
||||
// but in RFC3629 §3 (https://tools.ietf.org/html/rfc3629#section-3) UTF-8 is
|
||||
// limited to 0x10FFFF to match the limits of UTF-16.
|
||||
// </pre>
|
||||
int utf8_encodeBytesCount(int value);
|
||||
|
||||
// Returns the number of bytes the the leading [byte] contains. returns 1 if
|
||||
// the byte is an invalid utf8 leading byte (to skip pass to the next byte).
|
||||
int utf8_decodeBytesCount(uint8_t byte);
|
||||
|
||||
// Encodes the 32 bit value into a byte array which should be a size of 4 and
|
||||
// returns the number of bytes the value encoded (if invalid returns 0, that
|
||||
// how many it write to the buffer.
|
||||
int utf8_encodeValue(int value, uint8_t* bytes);
|
||||
|
||||
// Decodes from the leading [byte] and write the value to param [value] and
|
||||
// returns the number of bytes the value decoded, if invalid write -1 to the
|
||||
// value.
|
||||
int utf8_decodeBytes(uint8_t* bytes, int* value);
|
||||
|
||||
|
||||
#endif // UTF8_H
|
96
src/var.c
Normal file
96
src/var.c
Normal file
@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright (c) 2020-2021 Thakee Nathees
|
||||
* Licensed under: MIT License
|
||||
*/
|
||||
|
||||
#include "var.h"
|
||||
#include "vm.h"
|
||||
|
||||
void varInitObject(Object* self, VM* vm, ObjectType type) {
|
||||
self->type = type;
|
||||
self->next = vm->first;
|
||||
vm->first = self;
|
||||
// TODO: set isGray = false;
|
||||
}
|
||||
|
||||
#if VAR_NAN_TAGGING
|
||||
// A union to reinterpret a double as raw bits and back.
|
||||
typedef union {
|
||||
uint64_t bits64;
|
||||
uint32_t bits32[2];
|
||||
double num;
|
||||
} _DoubleBitsConv;
|
||||
#endif
|
||||
|
||||
Var doubleToVar(double value) {
|
||||
#if VAR_NAN_TAGGING
|
||||
_DoubleBitsConv bits;
|
||||
bits.num = value;
|
||||
return bits.bits64;
|
||||
#else
|
||||
// TODO:
|
||||
#endif // VAR_NAN_TAGGING
|
||||
}
|
||||
|
||||
static inline double varToDouble(Var value) {
|
||||
#if VAR_NAN_TAGGING
|
||||
_DoubleBitsConv bits;
|
||||
bits.bits64 = value;
|
||||
return bits.num;
|
||||
#else
|
||||
// TODO:
|
||||
#endif // VAR_NAN_TAGGING
|
||||
}
|
||||
|
||||
String* newString(VM* vm, const char* text, uint32_t length) {
|
||||
|
||||
ASSERT(length == 0 || text != NULL, "Unexpected NULL string.");
|
||||
|
||||
String* string = ALLOCATE_DYNAMIC(vm, String, length + 1, char);
|
||||
varInitObject(&string->_super, vm, OBJ_STRING);
|
||||
string->length = length;
|
||||
|
||||
if (length != 0) memcpy(string->data, text, length);
|
||||
string->data[length] = '\0';
|
||||
return string;
|
||||
}
|
||||
|
||||
Script* newScript(VM* vm) {
|
||||
Script* script = ALLOCATE(vm, Script);
|
||||
varInitObject(&script->_super, vm, OBJ_SCRIPT);
|
||||
|
||||
varBufferInit(&script->globals);
|
||||
nameTableInit(&script->global_names);
|
||||
|
||||
functionBufferInit(&script->functions);
|
||||
nameTableInit(&script->function_names);
|
||||
|
||||
return script;
|
||||
}
|
||||
|
||||
Function* newFunction(VM* vm, const char* name, Script* owner,
|
||||
bool is_native) {
|
||||
|
||||
Function* func = ALLOCATE(vm, Function);
|
||||
varInitObject(&func->_super, vm, OBJ_FUNC);
|
||||
|
||||
func->name = name;
|
||||
func->owner = owner;
|
||||
func->arity = -1;
|
||||
|
||||
func->is_native = is_native;
|
||||
|
||||
if (is_native) {
|
||||
func->native = NULL;
|
||||
} else {
|
||||
vmPushTempRef(vm, &func->_super);
|
||||
Fn* fn = ALLOCATE(vm, Fn);
|
||||
vmPopTempRef(vm);
|
||||
|
||||
byteBufferInit(&fn->opcodes);
|
||||
intBufferInit(&fn->oplines);
|
||||
fn->stack_size = 0;
|
||||
func->fn = fn;
|
||||
}
|
||||
return func;
|
||||
}
|
295
src/var.h
Normal file
295
src/var.h
Normal file
@ -0,0 +1,295 @@
|
||||
/*
|
||||
* Copyright (c) 2020-2021 Thakee Nathees
|
||||
* Licensed under: MIT License
|
||||
*/
|
||||
|
||||
#ifndef VAR_H
|
||||
#define VAR_H
|
||||
|
||||
/** @file
|
||||
* A simple single header dynamic type system library for small dynamic typed
|
||||
* languages using a technique called NaN-tagging (optional). The method is
|
||||
* inspired from the wren (https://wren.io/) an awsome language written by the
|
||||
* author of "Crafting Interpreters" Bob Nystrom and it's contrbuters.
|
||||
* Reference:
|
||||
* https://github.com/wren-lang/wren/blob/main/src/vm/wren_value.h
|
||||
* https://leonardschuetz.ch/blog/nan-boxing/
|
||||
*
|
||||
* The previous implementation was to add a type field to every \ref var
|
||||
* and use smart pointers(C++17) to object with custom destructors,
|
||||
* which makes the programme in effect for small types such null, bool,
|
||||
* int and float.
|
||||
*/
|
||||
|
||||
/** __STDC_LIMIT_MACROS and __STDC_CONSTANT_MACROS are a workaround to
|
||||
* allow C++ programs to use stdint.h macros specified in the C99
|
||||
* standard that aren't in the C++ standard */
|
||||
#define __STDC_LIMIT_MACROS
|
||||
#include <stdint.h>
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "types/gen/byte_buffer.h"
|
||||
#include "types/gen/function_buffer.h"
|
||||
#include "types/gen/int_buffer.h"
|
||||
#include "types/gen/var_buffer.h"
|
||||
#include "types/name_table.h"
|
||||
|
||||
// To use dynamic variably-sized struct with a tail array add an array at the
|
||||
// end of the struct with size \ref DYNAMIC_TAIL_ARRAY. This method was a
|
||||
// legacy standard called "struct hack".
|
||||
#if __STDC_VERSION__ >= 199901L
|
||||
/** for std >= c99 it's just `arr[]` */
|
||||
#define DYNAMIC_TAIL_ARRAY
|
||||
#else
|
||||
#define DYNAMIC_TAIL_ARRAY 0
|
||||
#endif
|
||||
|
||||
// Number of maximum import statements in a script.
|
||||
#define MAX_IMPORT_SCRIPTS 16
|
||||
|
||||
/**
|
||||
* The IEEE 754 double precision float bit representation.
|
||||
*
|
||||
* 1 Sign bit
|
||||
* | 11 Exponent bits
|
||||
* | | 52 Mantissa (i.e. fraction values) bits
|
||||
* | | |
|
||||
* S[Exponent-][Mantissa------------------------------------------]
|
||||
*
|
||||
* if all bits of the exponent are set it's a NaN ("Not a Number") value.
|
||||
*
|
||||
* v~~~~~~~~~~ NaN value
|
||||
* -11111111111----------------------------------------------------
|
||||
*
|
||||
* We define a our variant \ref var as an unsigned 64 bit integer (we treat it
|
||||
* like a bit array) if the exponent bits were not set, just reinterprit it as
|
||||
* a IEEE 754 double precision 64 bit number. Other wise we there are a lot of
|
||||
* different combination of bits we can use for our custom tagging, this method
|
||||
* is called NaN-Tagging.
|
||||
*
|
||||
* There are two kinds of NaN values "signalling" and "quiet". The first one is
|
||||
* intended to halt the execution but the second one is to continue the
|
||||
* execution quietly. We get the quiet NaN by setting the highest mentissa bit.
|
||||
*
|
||||
* v~Highest mestissa bit
|
||||
* -[NaN ]1---------------------------------------------------
|
||||
*
|
||||
* if sign bit set, it's a heap allocated pointer.
|
||||
* | these 2 bits are type tags representing 8 different types
|
||||
* | vv
|
||||
* S[NaN ]1cXX------------------------------------------------
|
||||
* | ^~~~~~~~ 48 bits to represent the value (51 for pointer)
|
||||
* '- if this (const) bit set, it's a constant.
|
||||
*
|
||||
* On a 32-bit machine a pointer size is 32 and on a 64-bit machine actually 48
|
||||
* bits are used for pointers. Ta-da, now we have double precision number,
|
||||
* primitives, pointers all inside a 64 bit sequence and for numbers it doesn't
|
||||
* require any bit mask operations, which means math on the var is now even
|
||||
* faster.
|
||||
*
|
||||
* our custom 2 bits type tagging
|
||||
* c00 : NULL
|
||||
* c01 ... 0 : UNDEF (used in unused map keys)
|
||||
* ... 1 : VOID (void function return void not null)
|
||||
* ... 10 : FALSE
|
||||
* ... 11 : TRUE
|
||||
* c10 : INTEGER
|
||||
* |
|
||||
* '-- c is const bit.
|
||||
*
|
||||
*/
|
||||
|
||||
#if VAR_NAN_TAGGING
|
||||
|
||||
// Masks and payloads.
|
||||
#define _MASK_SIGN ((uint64_t)0x8000000000000000)
|
||||
#define _MASK_QNAN ((uint64_t)0x7ffc000000000000)
|
||||
#define _MASK_TYPE ((uint64_t)0x0003000000000000)
|
||||
#define _MASK_CONST ((uint64_t)0x0004000000000000)
|
||||
|
||||
#define _MASK_INTEGER (_MASK_QNAN | (uint64_t)0x0002000000000000)
|
||||
#define _MASK_OBJECT (_MASK_QNAN | (uint64_t)0x8000000000000000)
|
||||
|
||||
#define _PAYLOAD_INTEGER ((uint64_t)0x00000000ffffffff)
|
||||
#define _PAYLOAD_OBJECT ((uint64_t)0x0000ffffffffffff)
|
||||
|
||||
// Primitive types.
|
||||
#define VAR_NULL (_MASK_QNAN | (uint64_t)0x0000000000000000)
|
||||
#define VAR_UNDEFINED (_MASK_QNAN | (uint64_t)0x0001000000000000)
|
||||
#define VAR_VOID (_MASK_QNAN | (uint64_t)0x0001000000000001)
|
||||
#define VAR_FALSE (_MASK_QNAN | (uint64_t)0x0001000000000002)
|
||||
#define VAR_TRUE (_MASK_QNAN | (uint64_t)0x0001000000000003)
|
||||
|
||||
// Encode types.
|
||||
#define VAR_BOOL(value) ((value)? VAR_TRUE : VAR_FALSE)
|
||||
#define VAR_INT(value) (_MASK_INTEGER | (uint32_t)(int32_t)(value))
|
||||
#define VAR_NUM(value) (doubleToVar(value))
|
||||
#define VAR_OBJ(value) ((Var)(_MASK_OBJECT | (uint64_t)(uintptr_t)(value)))
|
||||
|
||||
// Const casting.
|
||||
#define ADD_CONST(value) ((value) | _MASK_CONST)
|
||||
#define REMOVE_CONST(value) ((value) & ~_MASK_CONST)
|
||||
|
||||
// Check types.
|
||||
#define IS_CONST(value) ((value & _MASK_CONST) == _MASK_CONST)
|
||||
#define IS_NULL(value) ((value) == VAR_NULL)
|
||||
#define IS_UNDEF(value) ((value) == VAR_UNDEF)
|
||||
#define IS_FALSE(value) ((value) == VAR_FALSE)
|
||||
#define IS_TRUE(value) ((value) == VAR_TRUE)
|
||||
#define IS_BOOL(value) (IS_TRUE(value) || IS_FALSE(value))
|
||||
#define IS_INT(value) ((value & _MASK_INTEGER) == _MASK_INTEGER)
|
||||
#define IS_NUM(value) ((value & _MASK_QNAN) != _MASK_QNAN)
|
||||
#define IS_OBJ(value) ((value & _MASK_OBJECT) == _MASK_OBJECT)
|
||||
|
||||
// Decode types.
|
||||
#define AS_BOOL(value) ((value) == VAR_TRUE)
|
||||
#define AS_INT(value) ((int32_t)((value) & _PAYLOAD_INTEGER))
|
||||
#define AS_NUM(value) (varToDouble(value))
|
||||
#define AS_OBJ(value) ((Object*)(value & _PAYLOAD_OBJECT))
|
||||
|
||||
#define AS_STRING(value) ((String*)AS_OBJ(value))
|
||||
#define AS_CSTRING(value) (AS_STRING(value)->data)
|
||||
#define AS_ARRAY(value) ((Array*)AS_OBJ(value))
|
||||
#define AS_MAP(value) ((Map*)AS_OBJ(value))
|
||||
#define AS_RANGE(value) ((Range*)AS_OBJ(value))
|
||||
|
||||
typedef uint64_t Var;
|
||||
|
||||
#else
|
||||
|
||||
// TODO: Union tagging implementation of all the above macros ignore macros
|
||||
// starts with an underscore.
|
||||
|
||||
|
||||
typedef enum {
|
||||
VAR_UNDEFINED, //< Internal type for exceptions.
|
||||
VAR_NULL, //< Null pointer type.
|
||||
VAR_BOOL, //< Yin and yang of software.
|
||||
VAR_INT, //< Only 32bit integers (to consistance with Nan-Tagging).
|
||||
VAR_FLOAT, //< Floats are stored as (64bit) double.
|
||||
|
||||
VAR_OBJECT, //< Base type for all \ref var_Object types.
|
||||
} VarType;
|
||||
|
||||
typedef struct {
|
||||
VarType type;
|
||||
union {
|
||||
bool _bool;
|
||||
int _int;
|
||||
double _float;
|
||||
Object* _obj;
|
||||
};
|
||||
} var;
|
||||
|
||||
#endif // VAR_NAN_TAGGING
|
||||
|
||||
typedef enum /* ObjectType */ {
|
||||
OBJ_STRING,
|
||||
OBJ_ARRAY,
|
||||
OBJ_MAP,
|
||||
OBJ_RANGE,
|
||||
|
||||
OBJ_SCRIPT,
|
||||
OBJ_FUNC,
|
||||
OBJ_INSTANCE,
|
||||
|
||||
OBJ_USER,
|
||||
} ObjectType;
|
||||
|
||||
// Base struct for all heap allocated objects.
|
||||
struct Object {
|
||||
ObjectType type; //< Type of the object in \ref var_Object_Type.
|
||||
//Class* is; //< The class the object IS. // No OOP in MS.
|
||||
|
||||
Object* next; //< Next object in the heap allocated link list.
|
||||
};
|
||||
|
||||
struct String {
|
||||
Object _super;
|
||||
|
||||
uint32_t length; //< Length of the string in \ref data.
|
||||
uint32_t capacity; //< Size of allocated \ref data.
|
||||
char data[DYNAMIC_TAIL_ARRAY];
|
||||
};
|
||||
|
||||
struct Array {
|
||||
Object _super;
|
||||
|
||||
VarBuffer elements; //< Elements of the array.
|
||||
};
|
||||
|
||||
// TODO: struct Map here.
|
||||
|
||||
struct Range {
|
||||
Object _super;
|
||||
|
||||
double from; //< Beggining of the range inclusive.
|
||||
double to; //< End of the range exclusive.
|
||||
};
|
||||
|
||||
struct Script {
|
||||
Object _super;
|
||||
|
||||
ID imports[MAX_IMPORT_SCRIPTS]; //< Imported script IDs.
|
||||
int import_count; //< Number of import in imports.
|
||||
|
||||
VarBuffer globals; //< Script level global variables.
|
||||
NameTable global_names; //< Name map to index in globals.
|
||||
|
||||
FunctionBuffer functions; //< Script level functions.
|
||||
NameTable function_names; //< Name map to index in functions.
|
||||
|
||||
// TODO: literal constants as Map.
|
||||
};
|
||||
|
||||
// To maintain simpilicity I won't implement object oriantation in MiniScript.
|
||||
//struct Class {
|
||||
// Object _super;
|
||||
//
|
||||
// Class* _base_class;
|
||||
// String* name;
|
||||
//};
|
||||
|
||||
// Script function pointer.
|
||||
typedef struct {
|
||||
ByteBuffer opcodes; //< Buffer of opcodes.
|
||||
IntBuffer oplines; //< Line number of opcodes for debug (1 based).
|
||||
int stack_size; //< Maximum size of stack required.
|
||||
} Fn;
|
||||
|
||||
struct Function {
|
||||
Object _super;
|
||||
|
||||
const char* name; //< Name in the script [owner].
|
||||
Script* owner; //< Owner script of the function.
|
||||
int arity; //< Number of argument the function expects.
|
||||
|
||||
bool is_native; //< True if Native function.
|
||||
union {
|
||||
MiniScriptNativeFn native; //< Native function pointer.
|
||||
Fn* fn; //< Script function pointer.
|
||||
};
|
||||
};
|
||||
|
||||
// Methods.
|
||||
|
||||
void varInitObject(Object* self, VM* vm, ObjectType type);
|
||||
|
||||
// Instead use VAR_NUM(value) and AS_NUM(value)
|
||||
Var doubleToVar(double value);
|
||||
double varToDouble(Var value);
|
||||
|
||||
// Allocate new String object and return String*.
|
||||
String* newString(VM* vm, const char* text, uint32_t length);
|
||||
|
||||
// Allocate new Script object and return Script*.
|
||||
Script* newScript(VM* vm);
|
||||
|
||||
// Allocate new Function object and return Function*. Parameter [name] should
|
||||
// be the name in the Script's nametable.
|
||||
Function* newFunction(VM* vm, const char* name, Script* owner, bool is_native);
|
||||
|
||||
|
||||
#endif // VAR_H
|
34
src/vm.c
Normal file
34
src/vm.c
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Thakee Nathees
|
||||
* Licensed under: MIT License
|
||||
*/
|
||||
|
||||
#include "vm.h"
|
||||
|
||||
void* vmRealloc(VM* self, void* memory, size_t old_size, size_t new_size) {
|
||||
|
||||
// Track the total allocated memory of the VM to trigger the GC.
|
||||
self->bytes_allocated += new_size - old_size;
|
||||
|
||||
// TODO: If vm->bytes_allocated > some_value -> GC();
|
||||
|
||||
if (new_size == 0) {
|
||||
free(memory);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return realloc(memory, new_size);
|
||||
}
|
||||
|
||||
void vmPushTempRef(VM* self, Object* obj) {
|
||||
ASSERT(obj != NULL, "Cannot reference to NULL.");
|
||||
if (self->temp_reference_count < MAX_TEMP_REFERENCE,
|
||||
"Too many temp references");
|
||||
self->temp_reference[self->temp_reference_count++] = obj;
|
||||
}
|
||||
|
||||
void vmPopTempRef(VM* self) {
|
||||
ASSERT(self->temp_reference_count > 0, "Temporary reference is empty to pop.");
|
||||
self->temp_reference_count--;
|
||||
}
|
||||
|
48
src/vm.h
Normal file
48
src/vm.h
Normal file
@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Thakee Nathees
|
||||
* Licensed under: MIT License
|
||||
*/
|
||||
|
||||
#ifndef VM_H
|
||||
#define VM_H
|
||||
|
||||
#include "common.h"
|
||||
#include "compiler.h"
|
||||
#include "var.h"
|
||||
|
||||
// The maximum number of temporary object reference to protect them from being
|
||||
// garbage collected.
|
||||
#define MAX_TEMP_REFERENCE 8
|
||||
|
||||
struct VM {
|
||||
|
||||
// The first object in the link list of all heap allocated objects.
|
||||
Object* first;
|
||||
|
||||
size_t bytes_allocated;
|
||||
|
||||
// A stack of temporary object references to ensure that the object
|
||||
// doesn't garbage collected.
|
||||
Object* temp_reference[MAX_TEMP_REFERENCE];
|
||||
int temp_reference_count;
|
||||
|
||||
// current compiler reference to mark it's heap allocated objects.
|
||||
Compiler* compiler;
|
||||
};
|
||||
|
||||
// A realloc wrapper which handles memory allocations of the VM.
|
||||
// - To allocate new memory pass NULL to parameter [memory] and 0 to
|
||||
// parameter [old_size] on failure it'll return NULL.
|
||||
// - To free an already allocated memory pass 0 to parameter [old_size]
|
||||
// and it'll returns NULL.
|
||||
// - The [old_size] parameter is required to keep track of the VM's
|
||||
// allocations to trigger the garbage collections.
|
||||
void* vmRealloc(VM* self, void* memory, size_t old_size, size_t new_size);
|
||||
|
||||
// Push the object to temporary references stack.
|
||||
void vmPushTempRef(VM* self, Object* obj);
|
||||
|
||||
// Pop the top most object from temporary reference stack.
|
||||
void vmPopTempRef(VM* self);
|
||||
|
||||
#endif // VM_H
|
296
test/clogger.h
Normal file
296
test/clogger.h
Normal file
@ -0,0 +1,296 @@
|
||||
/*
|
||||
* Copyright (c) 2020-2021 Thakee Nathees
|
||||
* Licensed under: MIT License
|
||||
*/
|
||||
|
||||
#ifndef clogger_H
|
||||
#define clogger_H
|
||||
|
||||
/** @file
|
||||
* Single header console color logger library
|
||||
*
|
||||
* USAGE:
|
||||
* // define imlpementation only a single *.c source file like this
|
||||
* #define CLOGGER_IMPLEMENT
|
||||
* #include "clogger.h"
|
||||
*
|
||||
* You should call `clogger_init();` before any of your calling logging calls.
|
||||
* You can define your own pallete with `clogger_ColorPalette` and apply it
|
||||
* from `clogger_setColorPalette(your_pallete)` function. There is a list of
|
||||
* public API functions declared. `clogger_iColor` is just a 8 bit unsigned
|
||||
* integer value which, first 4 bits represent background and last 4 bits
|
||||
* represent forground. COL_FG | (COL_BG << 4). You can define your won
|
||||
* pallete. For examples see the implementation in `test.c`.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
|
||||
/** supported max 16 different colors to maintain compatibility in windows */
|
||||
#define PALLETE_MAX_SIZE 16
|
||||
|
||||
#define CLOGGER_PROGRESS_BAR 30
|
||||
|
||||
/** color logger public API */
|
||||
typedef struct clogger_Color clogger_Color;
|
||||
typedef struct clogger_ColorPalette clogger_ColorPalette;
|
||||
typedef uint8_t clogger_iColor;
|
||||
|
||||
clogger_Color clogger_ColorRGB(uint8_t r, uint8_t g, uint8_t b);
|
||||
clogger_ColorPalette clogger_newPallete();
|
||||
|
||||
void clogger_setColorPalette(clogger_ColorPalette pallate);
|
||||
void clogger_init();
|
||||
|
||||
void clogger_logf(clogger_iColor color, bool _stderr, const char* fmt, ...);
|
||||
void clogger_logfVA(const char* fmt, va_list args, bool _stderr,
|
||||
clogger_iColor color);
|
||||
void clogger_log(const char* msg, clogger_iColor color, bool _stderr);
|
||||
|
||||
void clogger_logfSuccess(const char* fmt, ...);
|
||||
void clogger_logfWarning(const char* fmt, ...);
|
||||
void clogger_logfError(const char* fmt, ...);
|
||||
|
||||
void clogger_progress(const char* msg, int done, int total);
|
||||
|
||||
/** Define our own platform macro */
|
||||
#ifndef _PLATFORM_DEFINED_
|
||||
#define _PLATFORM_DEFINED_
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(__NT__)
|
||||
#define PLATFORM_WINDOWS
|
||||
#elif defined(__APPLE__) || defined(__MACH__)
|
||||
#include <TargetConditionals.h>
|
||||
#if TARGET_IPHONE_SIMULATOR
|
||||
#define PLATFORM_IOS_SIMULATOR
|
||||
#elif TARGET_OS_IPHONE
|
||||
#define PLATFORM_IOS
|
||||
#elif TARGET_OS_MAC
|
||||
#define PLATFORM_APPLE
|
||||
#else
|
||||
#error "Unknown Apple platform"
|
||||
#endif
|
||||
#elif defined(__linux__)
|
||||
#define PLATFORM_LINUX
|
||||
#elif defined(__unix__)
|
||||
#define PLATFORM_UNIX
|
||||
#elif defined(_POSIX_VERSION)
|
||||
#define PLATFORM_POSIX
|
||||
#else
|
||||
#error "Unknown Platform"
|
||||
#endif
|
||||
#endif // _PLATFORM_DEFINED_
|
||||
|
||||
/** The buffer size for vnsprintf(...) */
|
||||
#ifndef VSNPRINTF_BUFF_SIZE
|
||||
#define VSNPRINTF_BUFF_SIZE 8192
|
||||
#endif
|
||||
|
||||
/** The platform independant color values */
|
||||
struct clogger_Color {
|
||||
uint8_t r, g, b;
|
||||
};
|
||||
|
||||
typedef enum clogger_Colors clogger_Colors;
|
||||
enum clogger_Colors {
|
||||
CLOGGER_COL_BLACK = 0,
|
||||
CLOGGER_COL_WHITE = 7,
|
||||
CLOGGER_COL_GREEN = 2,
|
||||
CLOGGER_COL_YELLOW = 14,
|
||||
CLOGGER_COL_RED = 12,
|
||||
|
||||
CLOGGER_COL_CUSTOM_1 = 1,
|
||||
CLOGGER_COL_CUSTOM_2 = 3,
|
||||
CLOGGER_COL_CUSTOM_3 = 4,
|
||||
CLOGGER_COL_CUSTOM_4 = 5,
|
||||
CLOGGER_COL_CUSTOM_5 = 6,
|
||||
CLOGGER_COL_CUSTOM_6 = 8,
|
||||
CLOGGER_COL_CUSTOM_7 = 9,
|
||||
CLOGGER_COL_CUSTOM_8 = 10,
|
||||
CLOGGER_COL_CUSTOM_9 = 11,
|
||||
CLOGGER_COL_CUSTOM_10 = 13,
|
||||
CLOGGER_COL_CUSTOM_11 = 15,
|
||||
};
|
||||
|
||||
struct clogger_ColorPalette {
|
||||
clogger_Color colors[PALLETE_MAX_SIZE];
|
||||
};
|
||||
|
||||
#endif // clogger_H
|
||||
|
||||
#ifdef CLOGGER_IMPLEMENT
|
||||
|
||||
/** The default color palette of cprint (global) */
|
||||
clogger_ColorPalette* g_clogger_color_pallete;
|
||||
|
||||
void clogger_init() {
|
||||
if (g_clogger_color_pallete == NULL) {
|
||||
clogger_setColorPalette(clogger_newPallete());
|
||||
}
|
||||
}
|
||||
|
||||
clogger_Color clogger_ColorRGB(uint8_t r, uint8_t g, uint8_t b) {
|
||||
clogger_Color ret = {r, g, b};
|
||||
return ret;
|
||||
}
|
||||
|
||||
void clogger_logf(clogger_iColor color, bool _stderr, const char* fmt, ...) {
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
clogger_logfVA(fmt, args, _stderr, color);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
void clogger_logfSuccess(const char* p_fmt, ...) {
|
||||
va_list args;
|
||||
va_start(args, p_fmt);
|
||||
clogger_logfVA(p_fmt, args, false, CLOGGER_COL_GREEN);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
void clogger_logfWarning(const char* p_fmt, ...) {
|
||||
va_list args;
|
||||
va_start(args, p_fmt);
|
||||
clogger_logfVA(p_fmt, args, true, CLOGGER_COL_YELLOW);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
void clogger_logfError(const char* p_fmt, ...) {
|
||||
va_list args;
|
||||
va_start(args, p_fmt);
|
||||
clogger_logfVA(p_fmt, args, true, CLOGGER_COL_RED);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
void clogger_logfVA(const char* fmt, va_list args, bool _stderr, clogger_iColor color) {
|
||||
|
||||
char buf[VSNPRINTF_BUFF_SIZE + 1]; // +1 for the terminating character
|
||||
int len = vsnprintf(buf, VSNPRINTF_BUFF_SIZE, fmt, args);
|
||||
|
||||
if (len <= 0) return;
|
||||
// Output is too big, will be truncated
|
||||
if ((unsigned int)len >= VSNPRINTF_BUFF_SIZE) len = VSNPRINTF_BUFF_SIZE;
|
||||
buf[len] = 0;
|
||||
clogger_log((const char*)buf, color, _stderr);
|
||||
}
|
||||
|
||||
/** for other terminal emulator which support ANSI (git base, mysys, putty, ...) */
|
||||
void cclogger_logANSI(const char* message, clogger_iColor color, bool _stderr) {
|
||||
// \033[38;2;R;G;Bm msg \033[0;00m
|
||||
assert(g_clogger_color_pallete != NULL && "did you forgot to call clogger_init()");
|
||||
|
||||
clogger_Color col = g_clogger_color_pallete->colors[color];
|
||||
|
||||
char buff_color[23]; int ptr = 0;
|
||||
ptr += sprintf(buff_color + ptr, "%s", "\033[38;2;");
|
||||
ptr += sprintf(buff_color + ptr, "%i", col.r);
|
||||
ptr += sprintf(buff_color + ptr, "%s", ";");
|
||||
ptr += sprintf(buff_color + ptr, "%i", col.g);
|
||||
ptr += sprintf(buff_color + ptr, "%s", ";");
|
||||
ptr += sprintf(buff_color + ptr, "%i", col.b);
|
||||
ptr += sprintf(buff_color + ptr, "%s", "m");
|
||||
buff_color[22] = '\0';
|
||||
fprintf((_stderr) ? stderr : stdout, "%s%s%s", buff_color, message, "\033[0;00m");
|
||||
}
|
||||
|
||||
void clogger_progress(const char* msg, int done, int total) {
|
||||
float precentage = (float)done / (float)total;
|
||||
clogger_logf(CLOGGER_COL_WHITE, false, "\r%s [", msg);
|
||||
int i = 0;
|
||||
for (; i < precentage * CLOGGER_PROGRESS_BAR; i++)
|
||||
clogger_log("#", CLOGGER_COL_GREEN, false);
|
||||
for (; i < CLOGGER_PROGRESS_BAR; i++)
|
||||
clogger_log(" ", CLOGGER_COL_WHITE, false);
|
||||
clogger_logf(CLOGGER_COL_WHITE, false, "] %i%%", (int)(precentage * 100));
|
||||
fflush(stdout);
|
||||
|
||||
}
|
||||
|
||||
/** PLATFORM DEPENDENT CODE **************************************************/
|
||||
|
||||
#ifdef PLATFORM_WINDOWS
|
||||
|
||||
#ifndef NOMINMAX /**< mingw already has defined for us. */
|
||||
#define NOMINMAX
|
||||
#endif
|
||||
|
||||
#include <io.h> /**< for isatty() */
|
||||
#include <Windows.h>
|
||||
#undef ERROR /**< This will polute symbol `ERROR` later */
|
||||
|
||||
#include <direct.h>
|
||||
|
||||
clogger_ColorPalette clogger_newPallete() {
|
||||
clogger_ColorPalette pallete;
|
||||
#ifndef __TINYC__
|
||||
if (_isatty(_fileno(stdout))) {
|
||||
HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE);
|
||||
CONSOLE_SCREEN_BUFFER_INFOEX info;
|
||||
info.cbSize = sizeof(CONSOLE_SCREEN_BUFFER_INFOEX);
|
||||
|
||||
GetConsoleScreenBufferInfoEx(hConsole, &info);
|
||||
for (int i = 1; i < PALLETE_MAX_SIZE; i++) {
|
||||
COLORREF color = info.ColorTable[i];
|
||||
pallete.colors[i].r = GetRValue(color);
|
||||
pallete.colors[i].g = GetGValue(color);
|
||||
pallete.colors[i].b = GetBValue(color);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return pallete;
|
||||
}
|
||||
|
||||
void clogger_setColorPalette(clogger_ColorPalette pallete) {
|
||||
static clogger_ColorPalette s_pallete;
|
||||
s_pallete = pallete;
|
||||
g_clogger_color_pallete = &s_pallete;
|
||||
#ifndef __TINYC__
|
||||
if (_isatty(_fileno(stdout))) {
|
||||
HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE);
|
||||
CONSOLE_SCREEN_BUFFER_INFOEX info;
|
||||
info.cbSize = sizeof(CONSOLE_SCREEN_BUFFER_INFOEX);
|
||||
|
||||
GetConsoleScreenBufferInfoEx(hConsole, &info);
|
||||
for (int i = 1; i < PALLETE_MAX_SIZE; i++) {
|
||||
uint8_t r = g_clogger_color_pallete->colors[i].r;
|
||||
uint8_t g = g_clogger_color_pallete->colors[i].g;
|
||||
uint8_t b = g_clogger_color_pallete->colors[i].b;
|
||||
info.ColorTable[i] = RGB(r, g, b);
|
||||
}
|
||||
SetConsoleScreenBufferInfoEx(hConsole, &info);
|
||||
}
|
||||
#endif
|
||||
// else we use ANSI color codes
|
||||
}
|
||||
|
||||
static void _win32_setConsoleColor(clogger_iColor color) {
|
||||
assert(g_clogger_color_pallete != NULL && "did you forgot to call clogger_init()");
|
||||
SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), color);
|
||||
}
|
||||
|
||||
void clogger_log(const char* message, clogger_iColor color, bool _stderr) {
|
||||
FILE* out = (_stderr) ? stderr : stdout;
|
||||
if (_isatty(_fileno(out))) {
|
||||
_win32_setConsoleColor(color);
|
||||
fprintf(out, "%s", message);
|
||||
_win32_setConsoleColor(CLOGGER_COL_WHITE);
|
||||
} else {
|
||||
cclogger_logANSI(message, color, _stderr);
|
||||
fflush(out);
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined(PLATFORM_LINUX)
|
||||
|
||||
#error "TODO:"
|
||||
|
||||
#else
|
||||
|
||||
#error "TODO:"
|
||||
|
||||
#endif // PLATFORM_WINDOWS
|
||||
|
||||
#endif // CLOGGER_IMPLEMENT
|
54
test/main.c
Normal file
54
test/main.c
Normal file
@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Thakee Nathees
|
||||
* Licensed under: MIT License
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#define CLOGGER_IMPLEMENT
|
||||
#include "clogger.h"
|
||||
|
||||
// FIXME:
|
||||
#include "../src/common.h"
|
||||
#include "../src/var.h"
|
||||
#include "../src/vm.h"
|
||||
|
||||
#include "../src/types/gen/string_buffer.h"
|
||||
#include "../src/types/gen/byte_buffer.h"
|
||||
|
||||
int main() {
|
||||
clogger_init();
|
||||
//clogger_logfError("[DummyError] dummy error\n");
|
||||
//clogger_logfWarning("[DummyWarning] dummy warning\n");
|
||||
|
||||
FILE* fp = fopen("test.ms", "r");
|
||||
if (fp != NULL) {
|
||||
char buff[1024];
|
||||
size_t read = fread(buff, 1, sizeof(buff), fp);
|
||||
buff[read] = '\0';
|
||||
printf("%s\n", buff);
|
||||
fclose(fp);
|
||||
} else {
|
||||
clogger_logfError("[Error] cannot open file test.ms\n");
|
||||
}
|
||||
|
||||
VM* vm = (VM*)malloc(sizeof(VM));
|
||||
memset(vm, 0, sizeof(VM));
|
||||
|
||||
ByteBuffer buff;
|
||||
byteBufferInit(&buff);
|
||||
|
||||
byteBufferWrite(&buff, vm, 'a');
|
||||
byteBufferWrite(&buff, vm, 'b');
|
||||
byteBufferWrite(&buff, vm, 'c');
|
||||
|
||||
String* str = newString(vm, (const char*)buff.data, 3);
|
||||
Var vstr = VAR_OBJ(&str->_super);
|
||||
if (strcmp(AS_CSTRING(vstr), "abc") != 0) {
|
||||
clogger_logfError("[Error] something went wrong.\n");
|
||||
}
|
||||
|
||||
compileSource(vm, "native someNativeFn(a, b, c);\n");
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user