mirror of
synced 2025-02-06 20:50:55 +08:00
The name script is missleading as it only refering to the scripts (the files that contain the statements) where as it could also be the native module objects containing collection of native functions. After this commit, native functions can also have set owner module and it won't be as confusing as before.
3063 lines
98 KiB
3063 lines
98 KiB
* Copyright (c) 2020-2022 Thakee Nathees
* Copyright (c) 2021-2022 Pocketlang Contributors
* Distributed Under The MIT License
#include "pk_compiler.h"
#include "pk_core.h"
#include "pk_buffers.h"
#include "pk_utils.h"
#include "pk_vm.h"
#include "pk_debug.h"
// The maximum number of locals or global (if compiling top level module)
// to lookup from the compiling context. Also it's limited by it's opcode
// which is using a single byte value to identify the local.
#define MAX_VARIABLES 256
// The maximum number of constant literal a module can contain. Also it's
// limited by it's opcode which is using a short value to identify.
#define MAX_CONSTANTS (1 << 16)
// The maximum number of names that were used before defined. Its just the size
// of the Forward buffer of the compiler. Feel free to increase it if it
// require more.
// Pocketlang support two types of interpolation.
// 1. Name interpolation ex: "Hello $name!"
// 2. Expression interpolation ex: "Hello ${getName()}!"
// Consider a string: "a ${ b "c ${d}" } e" -- Here the depth of 'b' is 1 and
// the depth of 'd' is 2 and so on. The maximum depth an expression can go is
// defined as MAX_STR_INTERP_DEPTH below.
// The maximum address possible to jump. Similar limitation as above.
#define MAX_JUMP (1 << 16)
// Max number of break statement in a loop statement to patch.
#define MAX_BREAK_PATCH 256
/* TOKENS */
typedef enum {
// symbols
TK_DOT, // .
TK_DOTDOT, // ..
TK_COMMA, // ,
TK_HASH, // #
TK_TILD, // ~
TK_AMP, // &
TK_PIPE, // |
TK_CARET, // ^
TK_ARROW, // ->
TK_PLUS, // +
TK_MINUS, // -
TK_STAR, // *
TK_BSLASH, // \.
TK_EQ, // =
TK_GT, // >
TK_LT, // <
TK_EQEQ, // ==
TK_NOTEQ, // !=
TK_GTEQ, // >=
TK_LTEQ, // <=
TK_PLUSEQ, // +=
TK_STAREQ, // *=
TK_DIVEQ, // /=
TK_MODEQ, // %=
TK_ANDEQ, // &=
TK_OREQ, // |=
TK_XOREQ, // ^=
TK_SRIGHT, // >>
TK_SLEFT, // <<
TK_SLEFTEQ, // <<=
// Keywords.
TK_MODULE, // module
TK_CLASS, // class
TK_FROM, // from
TK_IMPORT, // import
TK_AS, // as
TK_DEF, // def
TK_NATIVE, // native (C function declaration)
TK_FUNC, // func (literal function)
TK_END, // end
TK_NULL, // null
TK_IN, // in
TK_AND, // and
TK_OR, // or
TK_NOT, // not / !
TK_TRUE, // true
TK_FALSE, // false
TK_DO, // do
TK_THEN, // then
TK_WHILE, // while
TK_FOR, // for
TK_IF, // if
TK_ELSIF, // elsif
TK_ELSE, // else
TK_BREAK, // break
TK_CONTINUE, // continue
TK_RETURN, // return
TK_NAME, // identifier
TK_NUMBER, // number literal
TK_STRING, // string literal
/* String interpolation
* "a ${b} c $d e"
* tokenized as:
* TK_STRING " e" */
} TokenType;
typedef struct {
TokenType type;
const char* start; //< Begining of the token in the source.
int length; //< Number of chars of the token.
int line; //< Line number of the token (1 based).
Var value; //< Literal value of the token.
} Token;
typedef struct {
const char* identifier;
int length;
TokenType tk_type;
} _Keyword;
// List of keywords mapped into their identifiers.
static _Keyword _keywords[] = {
{ "module", 6, TK_MODULE },
{ "class", 5, TK_CLASS },
{ "from", 4, TK_FROM },
{ "import", 6, TK_IMPORT },
{ "as", 2, TK_AS },
{ "def", 3, TK_DEF },
{ "native", 6, TK_NATIVE },
{ "func", 4, TK_FUNC },
{ "end", 3, TK_END },
{ "null", 4, TK_NULL },
{ "in", 2, TK_IN },
{ "and", 3, TK_AND },
{ "or", 2, TK_OR },
{ "not", 3, TK_NOT },
{ "true", 4, TK_TRUE },
{ "false", 5, TK_FALSE },
{ "do", 2, TK_DO },
{ "then", 4, TK_THEN },
{ "while", 5, TK_WHILE },
{ "for", 3, TK_FOR },
{ "if", 2, TK_IF },
{ "elsif", 5, TK_ELSIF },
{ "else", 4, TK_ELSE },
{ "break", 5, TK_BREAK },
{ "continue", 8, TK_CONTINUE },
{ "return", 6, TK_RETURN },
{ NULL, 0, (TokenType)(0) }, // Sentinel to mark the end of the array.
// Precedence parsing references:
// https://en.wikipedia.org/wiki/Shunting-yard_algorithm
// http://mathcenter.oxford.emory.edu/site/cs171/shuntingYardAlgorithm/
// http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
typedef enum {
PREC_TEST, // in is
PREC_COMPARISION, // < > <= >=
PREC_TERM, // + -
PREC_FACTOR, // * / %
PREC_UNARY, // - ! ~ not
PREC_CALL, // ()
PREC_ATTRIB, // .index
} Precedence;
typedef void (*GrammarFn)(Compiler* compiler);
typedef struct {
GrammarFn prefix;
GrammarFn infix;
Precedence precedence;
} GrammarRule;
typedef enum {
DEPTH_MODULE = -2, //< Only used for module body function's depth.
DEPTH_GLOBAL = -1, //< Global variables.
DEPTH_LOCAL, //< Local scope. Increase with inner scope.
} Depth;
typedef enum {
FN_NATIVE, //< Native C function.
FN_SCRIPT, //< Script functions defined with 'def'.
FN_LITERAL, //< Literal functions defined with 'function(){...}'
} FuncType;
typedef struct {
const char* name; //< Directly points into the source string.
uint32_t length; //< Length of the name.
int depth; //< The depth the local is defined in.
int line; //< The line variable declared for debugging.
} Local;
typedef struct sLoop {
// Index of the loop's start instruction where the execution will jump
// back to once it reach the loop end or continue used.
int start;
// Index of the jump out address instruction to patch it's value once done
// compiling the loop.
int exit_jump;
// Array of address indexes to patch break address.
int patches[MAX_BREAK_PATCH];
int patch_count;
// The outer loop of the current loop used to set and reset the compiler's
// current loop context.
struct sLoop* outer_loop;
// Depth of the loop, required to pop all the locals in that loop when it
// met a break/continue statement inside.
int depth;
} Loop;
// ForwardName is used for globals that are accessed before defined inside
// a local scope.
// TODO: Since function and class global variables are initialized at the
// compile time we can allow access to them at the global scope.
typedef struct sForwardName {
// Index of the short instruction that has the value of the global's name
// (in the names buffer of the module).
int instruction;
// The function where the name is used, and the instruction is belongs to.
Fn* func;
// The name string's pointer in the source.
const char* name;
int length;
// Line number of the name used (required for error message).
int line;
} ForwardName;
typedef struct sFunc {
// Scope of the function. -2 for module body function, -1 for top level
// function and literal functions will have the scope where it declared.
int depth;
// The actual function pointer which is being compiled.
Function* ptr;
// The index of the function in its module.
int index;
// If outer function of this function, for top level function the outer
// function will be the module's body function.
struct sFunc* outer_func;
} Func;
// A convenient macro to get the current function.
#define _FN (compiler->func->ptr->fn)
// The context of the parsing phase for the compiler.
typedef struct sParser {
// Parser need a reference of the PKVM to allocate strings (for string
// literals in the source) and to report error if there is any.
PKVM* vm;
// The [source] and the [file_path] are pointers to an allocated string.
// The parser doesn't keep references to that objects (to prevent them
// from garbage collected). It's the compiler's responsibility to keep the
// strings alive alive as long as the parser is alive.
const char* source; //< Currently compiled source.
const char* file_path; //< Path of the module (for reporting errors).
const char* token_start; //< Start of the currently parsed token.
const char* current_char; //< Current char position in the source.
int current_line; //< Line number of the current char.
Token previous, current, next; //< Currently parsed tokens.
// The current depth of the string interpolation. 0 means we're not inside
// an interpolated string.
int si_depth;
// If we're parsing an interpolated string and found a TK_RBRACE (ie. '}')
// we need to know if that's belongs to the expression we're parsing, or the
// end of the current interpolation.
// To achieve that We need to keep track of the number of open brace at the
// current depth. If we don't have any open brace then the TK_RBRACE token
// is consumed to end the interpolation.
// If we're inside an interpolated string (ie. si_depth > 0)
// si_open_brace[si_depth - 1] will return the number of open brace at the
// current depth.
int si_open_brace[MAX_STR_INTERP_DEPTH];
// Since we're supporting both quotes (single and double), we need to keep
// track of the qoute the interpolation is surrounded by to properly
// terminate the string.
// here si_quote[si_depth - 1] will return the surrunded quote of the
// expression at current depth.
char si_quote[MAX_STR_INTERP_DEPTH];
// When we're parsing a name interpolated string (ie. "Hello $name!") we
// have to keep track of where the name ends to start the interpolation
// from there. The below value [si_name_end] will be NULL if we're not
// parsing a name interpolated string, otherwise it'll points to the end of
// the name.
// Also we're using [si_name_quote] to store the quote of the string to
// properly terminate.
const char* si_name_end;
char si_name_quote;
// An array of implicitly forward declared names, which will be resolved once
// the module is completely compiled.
ForwardName forwards[MAX_FORWARD_NAMES];
int forwards_count;
bool repl_mode; //< True if compiling for REPL.
bool has_errors; //< True if any syntex error occurred at.
bool need_more_lines; //< True if we need more lines in REPL mode.
} Parser;
struct Compiler {
// The parser of the compiler which contains all the parsing context for the
// current compilation.
Parser parser;
// Each module will be compiled with it's own compiler and a module is
// imported, a new compiler is created for that module and it'll be added to
// the linked list of compilers at the begining. PKVM will use this compiler
// reference as a root object (objects which won't garbage collected) and
// the chain of compilers will be marked at the marking phase.
// Here is how the chain change when a new compiler (compiler_3) created.
// PKVM -> compiler_2 -> compiler_1 -> NULL
// PKVM -> compiler_3 -> compiler_2 -> compiler_1 -> NULL
Compiler* next_compiler;
const PkCompileOptions* options; //< To configure the compilation.
Module* module; //< Current module that's being compiled.
Loop* loop; //< Current loop the we're parsing.
Func* func; //< Current function we're parsing.
// Current depth the compiler in (-1 means top level) 0 means function
// level and > 0 is inner scope.
int scope_depth;
Local locals[MAX_VARIABLES]; //< Variables in the current context.
int local_count; //< Number of locals in [locals].
int stack_size; //< Current size including locals ind temps.
// True if the last statement is a new local variable assignment. Because
// the assignment is different than regular assignment and use this boolean
// to tell the compiler that dont pop it's assigned value because the value
// itself is the local.
bool new_local;
// Will be true when parsing an "l-value" which can be assigned to a value
// using the assignment operator ('='). ie. 'a = 42' here a is an "l-value"
// and the 42 is a "r-value" so the assignment is consumed and compiled.
// Consider '42 = a' where 42 is a "r-value" which cannot be assigned.
// Similarly 'a = 1 + b = 2' the expression '(1 + b)' is a "r value" and
// the assignment here is invalid, however 'a = 1 + (b = 2)' is valid because
// the 'b' is an "l-value" and can be assigned but the '(b = 2)' is a
// "r-value".
bool l_value;
// This value will be true after parsing a call expression, for every other
// Expressions it'll be false. This is **ONLY** to be used when compiling a
// return statement to check if the last parsed expression is a call to
// perform a tail call optimization (anywhere else this below boolean is
// meaningless).
bool is_last_call;
// Since the compiler manually call some builtin functions we need to cache
// the index of the functions in order to prevent search for them each time.
int bifn_list_join;
typedef struct {
int params;
int stack;
} OpInfo;
static OpInfo opcode_info[] = {
#define OPCODE(name, params, stack) { params, stack },
#include "pk_opcodes.h"
#undef OPCODE
// This should be called once the compiler initialized (to access it's fields).
static void parserInit(Parser* parser, PKVM* vm, Compiler* compiler,
const char* source, const char* path) {
parser->vm = vm;
parser->source = source;
parser->file_path = path;
parser->token_start = parser->source;
parser->current_char = parser->source;
parser->current_line = 1;
parser->next.type = TK_ERROR;
parser->next.start = NULL;
parser->next.length = 0;
parser->next.line = 1;
parser->next.value = VAR_UNDEFINED;
parser->si_depth = 0;
parser->si_name_end = NULL;
parser->si_name_quote = '\0';
parser->forwards_count = 0;
parser->repl_mode = !!(compiler->options && compiler->options->repl_mode);
parser->has_errors = false;
parser->need_more_lines = false;
static void compilerInit(Compiler* compiler, PKVM* vm, const char* source,
Module* module, const PkCompileOptions* options) {
compiler->next_compiler = NULL;
compiler->module = module;
compiler->options = options;
compiler->scope_depth = DEPTH_GLOBAL;
compiler->local_count = 0;
compiler->stack_size = 0;
compiler->loop = NULL;
compiler->func = NULL;
compiler->new_local = false;
compiler->is_last_call = false;
parserInit(&compiler->parser, vm, compiler, source, module->path->data);
// Cache the required built functions.
compiler->bifn_list_join = findBuiltinFunction(vm, "list_join", 9);
ASSERT(compiler->bifn_list_join >= 0, OOPS);
// Internal error report function for lexing and parsing.
static void reportError(Parser* parser, const char* file, int line,
const char* fmt, va_list args) {
// On REPL mode only the first error is reported.
if (parser->repl_mode && parser->has_errors) {
parser->has_errors = true;
// If the source is incomplete we're not printing an error message,
// instead return PK_RESULT_UNEXPECTED_EOF to the host.
if (parser->need_more_lines) {
ASSERT(parser->repl_mode, OOPS);
if (parser->vm->config.error_fn == NULL) return;
// TODO: fix the buffer size. A non terminated large string could cause this
// crash.
char message[ERROR_MESSAGE_SIZE];
int length = vsnprintf(message, sizeof(message), fmt, args);
__ASSERT(length >= 0, "Error message buffer failed at vsnprintf().");
parser->vm->config.error_fn(parser->vm, PK_ERROR_COMPILE,
file, line, message);
// Error caused at the middle of lexing (and TK_ERROR will be lexed instead).
static void lexError(Parser* parser, const char* fmt, ...) {
va_list args;
va_start(args, fmt);
reportError(parser, parser->file_path, parser->current_line, fmt, args);
// Error caused when parsing. The associated token assumed to be last consumed
// which is [parser->previous].
static void parseError(Compiler* compiler, const char* fmt, ...) {
Token* token = &(compiler->parser.previous);
// Lex errors would reported earlier by lexError and lexed a TK_ERROR token.
if (token->type == TK_ERROR) return;
va_list args;
va_start(args, fmt);
reportError(&(compiler->parser), compiler->parser.file_path,
token->line, fmt, args);
// Error caused when trying to resolve forward names (maybe more in the
// future), Which will be called once after compiling the module and thus we
// need to pass the line number the error originated from.
static void resolveError(Compiler* compiler, int line, const char* fmt, ...) {
va_list args;
va_start(args, fmt);
line, fmt, args);
/* LEXING */
// Forward declaration of lexer methods.
static char peekChar(Parser* parser);
static char peekNextChar(Parser* parser);
static char eatChar(Parser* parser);
static void setNextValueToken(Parser* parser, TokenType type, Var value);
static void setNextToken(Parser* parser, TokenType type);
static bool matchChar(Parser* parser, char c);
static void eatString(Parser* parser, bool single_quote) {
pkByteBuffer buff;
char quote = (single_quote) ? '\'' : '"';
// For interpolated string it'll be TK_STRING_INTERP.
TokenType tk_type = TK_STRING;
while (true) {
char c = eatChar(parser);
if (c == quote) break;
if (c == '\0') {
lexError(parser, "Non terminated string.");
// Null byte is required by TK_EOF.
if (c == '$') {
if (parser->si_depth < MAX_STR_INTERP_DEPTH) {
char c = peekChar(parser);
if (c == '{') { // Expression interpolation (ie. "${expr}").
parser->si_quote[parser->si_depth - 1] = quote;
parser->si_open_brace[parser->si_depth - 1] = 0;
} else { // Name Interpolation.
if (!utilIsName(c)) {
lexError(parser, "Expected '{' or identifier after '$'.");
} else { // Name interpolation (ie. "Hello $name!").
// The pointer [ptr] will points to the character at where the
// interpolated string ends. (ie. the next character after name
// ends).
const char* ptr = parser->current_char;
while (utilIsName(*(ptr)) || utilIsDigit(*(ptr))) {
parser->si_name_end = ptr;
parser->si_name_quote = quote;
} else {
lexError(parser, "Maximum interpolation level reached (can only "
"interpolate upto depth %d).", MAX_STR_INTERP_DEPTH);
if (c == '\\') {
switch (eatChar(parser)) {
case '"': pkByteBufferWrite(&buff, parser->vm, '"'); break;
case '\'': pkByteBufferWrite(&buff, parser->vm, '\''); break;
case '\\': pkByteBufferWrite(&buff, parser->vm, '\\'); break;
case 'n': pkByteBufferWrite(&buff, parser->vm, '\n'); break;
case 'r': pkByteBufferWrite(&buff, parser->vm, '\r'); break;
case 't': pkByteBufferWrite(&buff, parser->vm, '\t'); break;
// '$' In pocketlang string is used for interpolation.
case '$': pkByteBufferWrite(&buff, parser->vm, '$'); break;
lexError(parser, "Error: invalid escape character");
} else {
pkByteBufferWrite(&buff, parser->vm, c);
// '\0' will be added by varNewSring();
Var string = VAR_OBJ(newStringLength(parser->vm, (const char*)buff.data,
pkByteBufferClear(&buff, parser->vm);
setNextValueToken(parser, tk_type, string);
// Returns the current char of the compiler on.
static char peekChar(Parser* parser) {
return *parser->current_char;
// Returns the next char of the compiler on.
static char peekNextChar(Parser* parser) {
if (peekChar(parser) == '\0') return '\0';
return *(parser->current_char + 1);
// Advance the compiler by 1 char.
static char eatChar(Parser* parser) {
char c = peekChar(parser);
if (c == '\n') parser->current_line++;
return c;
// Complete lexing an identifier name.
static void eatName(Parser* parser) {
char c = peekChar(parser);
while (utilIsName(c) || utilIsDigit(c)) {
c = peekChar(parser);
const char* name_start = parser->token_start;
TokenType type = TK_NAME;
int length = (int)(parser->current_char - name_start);
for (int i = 0; _keywords[i].identifier != NULL; i++) {
if (_keywords[i].length == length &&
strncmp(name_start, _keywords[i].identifier, length) == 0) {
type = _keywords[i].tk_type;
setNextToken(parser, type);
// Complete lexing a number literal.
static void eatNumber(Parser* parser) {
#define IS_HEX_CHAR(c) \
(('0' <= (c) && (c) <= '9') || \
('a' <= (c) && (c) <= 'f'))
#define IS_BIN_CHAR(c) (((c) == '0') || ((c) == '1'))
Var value = VAR_NULL; // The number value.
char c = *parser->token_start;
// Binary literal.
if (c == '0' && peekChar(parser) == 'b') {
eatChar(parser); // Consume '0b'
uint64_t bin = 0;
c = peekChar(parser);
if (!IS_BIN_CHAR(c)) {
lexError(parser, "Invalid binary literal.");
} else {
do {
// Consume the next digit.
c = peekChar(parser);
if (!IS_BIN_CHAR(c)) break;
// Check the length of the binary literal.
int length = (int)(parser->current_char - parser->token_start);
if (length > STR_BIN_BUFF_SIZE - 2) { // -2: '-\0' 0b is in both side.
lexError(parser, "Binary literal is too long.");
// "Append" the next digit at the end.
bin = (bin << 1) | (c - '0');
} while (true);
value = VAR_NUM((double)bin);
} else if (c == '0' && peekChar(parser) == 'x') {
eatChar(parser); // Consume '0x'
uint64_t hex = 0;
c = peekChar(parser);
// The first digit should be either hex digit.
if (!IS_HEX_CHAR(c)) {
lexError(parser, "Invalid hex literal.");
} else {
do {
// Consume the next digit.
c = peekChar(parser);
if (!IS_HEX_CHAR(c)) break;
// Check the length of the binary literal.
int length = (int)(parser->current_char - parser->token_start);
if (length > STR_HEX_BUFF_SIZE - 2) { // -2: '-\0' 0x is in both side.
lexError(parser, "Hex literal is too long.");
// "Append" the next digit at the end.
uint8_t append_val = ('0' <= c && c <= '9')
? (uint8_t)(c - '0')
: (uint8_t)((c - 'a') + 10);
hex = (hex << 4) | append_val;
} while (true);
value = VAR_NUM((double)hex);
} else { // Regular number literal.
while (utilIsDigit(peekChar(parser))) {
if (peekChar(parser) == '.' && utilIsDigit(peekNextChar(parser))) {
matchChar(parser, '.');
while (utilIsDigit(peekChar(parser))) {
// Parse if in scientific notation format (MeN == M * 10 ** N).
if (matchChar(parser, 'e') || matchChar(parser, 'E')) {
if (peekChar(parser) == '+' || peekChar(parser) == '-') {
if (!utilIsDigit(peekChar(parser))) {
lexError(parser, "Invalid number literal.");
} else { // Eat the exponent.
while (utilIsDigit(peekChar(parser))) eatChar(parser);
errno = 0;
value = VAR_NUM(atof(parser->token_start));
if (errno == ERANGE) {
const char* start = parser->token_start;
int len = (int)(parser->current_char - start);
lexError(parser, "Number literal is too large (%.*s).", len, start);
value = VAR_NUM(0);
setNextValueToken(parser, TK_NUMBER, value);
#undef IS_BIN_CHAR
#undef IS_HEX_CHAR
// Read and ignore chars till it reach new line or EOF.
static void skipLineComment(Parser* parser) {
char c;
while ((c = peekChar(parser)) != '\0') {
// Don't eat new line it's not part of the comment.
if (c == '\n') return;
// If the current char is [c] consume it and advance char by 1 and returns
// true otherwise returns false.
static bool matchChar(Parser* parser, char c) {
if (peekChar(parser) != c) return false;
return true;
// If the current char is [c] eat the char and add token two otherwise eat
// append token one.
static void setNextTwoCharToken(Parser* parser, char c, TokenType one,
TokenType two) {
if (matchChar(parser, c)) {
setNextToken(parser, two);
} else {
setNextToken(parser, one);
// Initialize the next token as the type.
static void setNextToken(Parser* parser, TokenType type) {
Token* next = &parser->next;
next->type = type;
next->start = parser->token_start;
next->length = (int)(parser->current_char - parser->token_start);
next->line = parser->current_line - ((type == TK_LINE) ? 1 : 0);
// Initialize the next token as the type and assign the value.
static void setNextValueToken(Parser* parser, TokenType type, Var value) {
setNextToken(parser, type);
parser->next.value = value;
// Lex the next token and set it as the next token.
static void lexToken(Parser* parser) {
parser->previous = parser->current;
parser->current = parser->next;
if (parser->current.type == TK_EOF) return;
while (peekChar(parser) != '\0') {
parser->token_start = parser->current_char;
// If we're parsing a name interpolation and the current character is where
// the name end, continue parsing the string.
// "Hello $name!"
// ^-- si_name_end
if (parser->si_name_end != NULL) {
if (parser->current_char == parser->si_name_end) {
parser->si_name_end = NULL;
eatString(parser, parser->si_name_quote == '\'');
} else {
ASSERT(parser->current_char < parser->si_name_end, OOPS);
char c = eatChar(parser);
switch (c) {
case '{': {
// If we're inside an interpolation, increase the open brace count
// of the current depth.
if (parser->si_depth > 0) {
parser->si_open_brace[parser->si_depth - 1]++;
setNextToken(parser, TK_LBRACE);
case '}': {
// If we're inside of an interpolated string.
if (parser->si_depth > 0) {
// No open braces, then end the expression and complete the string.
if (parser->si_open_brace[parser->si_depth - 1] == 0) {
char quote = parser->si_quote[parser->si_depth - 1];
parser->si_depth--; //< Exit the depth.
eatString(parser, quote == '\'');
} else { // Decrease the open brace at the current depth.
parser->si_open_brace[parser->si_depth - 1]--;
setNextToken(parser, TK_RBRACE);
case ',': setNextToken(parser, TK_COMMA); return;
case ':': setNextToken(parser, TK_COLLON); return;
case ';': setNextToken(parser, TK_SEMICOLLON); return;
case '#': skipLineComment(parser); break;
case '(': setNextToken(parser, TK_LPARAN); return;
case ')': setNextToken(parser, TK_RPARAN); return;
case '[': setNextToken(parser, TK_LBRACKET); return;
case ']': setNextToken(parser, TK_RBRACKET); return;
case '%':
setNextTwoCharToken(parser, '=', TK_PERCENT, TK_MODEQ);
case '~': setNextToken(parser, TK_TILD); return;
case '&':
setNextTwoCharToken(parser, '=', TK_AMP, TK_ANDEQ);
case '|':
setNextTwoCharToken(parser, '=', TK_PIPE, TK_OREQ);
case '^':
setNextTwoCharToken(parser, '=', TK_CARET, TK_XOREQ);
case '\n': setNextToken(parser, TK_LINE); return;
case ' ':
case '\t':
case '\r': {
c = peekChar(parser);
while (c == ' ' || c == '\t' || c == '\r') {
c = peekChar(parser);
case '.':
if (matchChar(parser, '.')) {
setNextToken(parser, TK_DOTDOT); // '..'
} else if (utilIsDigit(peekChar(parser))) {
eatChar(parser); // Consume the decimal point.
eatNumber(parser); // Consume the rest of the number
} else {
setNextToken(parser, TK_DOT); // '.'
case '=':
setNextTwoCharToken(parser, '=', TK_EQ, TK_EQEQ);
case '!':
setNextTwoCharToken(parser, '=', TK_NOT, TK_NOTEQ);
case '>':
if (matchChar(parser, '>')) {
if (matchChar(parser, '=')) {
setNextToken(parser, TK_SRIGHTEQ);
} else {
setNextToken(parser, TK_SRIGHT);
} else {
setNextTwoCharToken(parser, '=', TK_GT, TK_GTEQ);
case '<':
if (matchChar(parser, '<')) {
if (matchChar(parser, '=')) {
setNextToken(parser, TK_SLEFTEQ);
} else {
setNextToken(parser, TK_SLEFT);
} else {
setNextTwoCharToken(parser, '=', TK_LT, TK_LTEQ);
case '+':
setNextTwoCharToken(parser, '=', TK_PLUS, TK_PLUSEQ);
case '-':
if (matchChar(parser, '=')) {
setNextToken(parser, TK_MINUSEQ); // '-='
} else if (matchChar(parser, '>')) {
setNextToken(parser, TK_ARROW); // '->'
} else {
setNextToken(parser, TK_MINUS); // '-'
case '*':
setNextTwoCharToken(parser, '=', TK_STAR, TK_STAREQ);
case '/':
setNextTwoCharToken(parser, '=', TK_FSLASH, TK_DIVEQ);
case '"': eatString(parser, false); return;
case '\'': eatString(parser, true); return;
default: {
if (utilIsDigit(c)) {
} else if (utilIsName(c)) {
} else {
if (c >= 32 && c <= 126) {
lexError(parser, "Invalid character '%c'", c);
} else {
lexError(parser, "Invalid byte 0x%x", (uint8_t)c);
setNextToken(parser, TK_ERROR);
setNextToken(parser, TK_EOF);
parser->next.start = parser->current_char;
// Returns current token type without lexing a new token.
static TokenType peek(Compiler* compiler) {
return compiler->parser.current.type;
// Consume the current token if it's expected and lex for the next token
// and return true otherwise return false.
static bool match(Compiler* compiler, TokenType expected) {
if (peek(compiler) != expected) return false;
return true;
// Consume the the current token and if it's not [expected] emits error log
// and continue parsing for more error logs.
static void consume(Compiler* compiler, TokenType expected,
const char* err_msg) {
if (compiler->parser.previous.type != expected) {
parseError(compiler, "%s", err_msg);
// If the next token is expected discard the current to minimize
// cascaded errors and continue parsing.
if (peek(compiler) == expected) {
// Match one or more lines and return true if there any.
static bool matchLine(Compiler* compiler) {
bool consumed = false;
if (peek(compiler) == TK_LINE) {
while (peek(compiler) == TK_LINE)
consumed = true;
// If we're running on REPL mode, at the EOF and compile time error occurred,
// signal the host to get more lines and try re-compiling it.
if (compiler->parser.repl_mode && !compiler->parser.has_errors) {
if (peek(compiler) == TK_EOF) {
compiler->parser.need_more_lines = true;
return consumed;
// Will skip multiple new lines.
static void skipNewLines(Compiler* compiler) {
// Match semi collon, multiple new lines or peek 'end', 'else', 'elsif'
// keywords.
static bool matchEndStatement(Compiler* compiler) {
if (match(compiler, TK_SEMICOLLON)) {
return true;
if (matchLine(compiler) || peek(compiler) == TK_EOF)
return true;
// In the below statement we don't require any new lines or semicolons.
// 'if cond then stmnt1 elsif cond2 then stmnt2 else stmnt3 end'
if (peek(compiler) == TK_END || peek(compiler) == TK_ELSE ||
peek(compiler) == TK_ELSIF)
return true;
return false;
// Consume semi collon, multiple new lines or peek 'end' keyword.
static void consumeEndStatement(Compiler* compiler) {
if (!matchEndStatement(compiler)) {
parseError(compiler, "Expected statement end with '\\n' or ';'.");
// Match optional "do" or "then" keyword and new lines.
static void consumeStartBlock(Compiler* compiler, TokenType delimiter) {
bool consumed = false;
// Match optional "do" or "then".
if (delimiter == TK_DO || delimiter == TK_THEN) {
if (match(compiler, delimiter))
consumed = true;
if (matchLine(compiler))
consumed = true;
if (!consumed) {
const char* msg;
if (delimiter == TK_DO) msg = "Expected enter block with newline or 'do'.";
else msg = "Expected enter block with newline or 'then'.";
parseError(compiler, msg);
// Returns a optional compound assignment.
static bool matchAssignment(Compiler* compiler) {
if (match(compiler, TK_EQ)) return true;
if (match(compiler, TK_PLUSEQ)) return true;
if (match(compiler, TK_MINUSEQ)) return true;
if (match(compiler, TK_STAREQ)) return true;
if (match(compiler, TK_DIVEQ)) return true;
if (match(compiler, TK_MODEQ)) return true;
if (match(compiler, TK_ANDEQ)) return true;
if (match(compiler, TK_OREQ)) return true;
if (match(compiler, TK_XOREQ)) return true;
if (match(compiler, TK_SRIGHTEQ)) return true;
if (match(compiler, TK_SLEFTEQ)) return true;
return false;
// Result type for an identifier definition.
typedef enum {
NAME_LOCAL_VAR, //< Including parameter.
NAME_BUILTIN, //< Native builtin function.
} NameDefnType;
// Identifier search result.
typedef struct {
NameDefnType type;
// Index in the variable/function buffer/array.
int index;
// The line it declared.
int line;
} NameSearchResult;
// Will check if the name already defined.
static NameSearchResult compilerSearchName(Compiler* compiler,
const char* name, uint32_t length) {
NameSearchResult result;
result.type = NAME_NOT_DEFINED;
for (int i = compiler->local_count - 1; i >= 0; i--) {
Local* local = &compiler->locals[i];
ASSERT(local->depth != DEPTH_GLOBAL, OOPS);
// Literal functions are not closures and ignore it's outer function's
// local variables.
if (compiler->func->depth >= local->depth) {
if (length == local->length) {
if (strncmp(local->name, name, length) == 0) {
result.type = NAME_LOCAL_VAR;
result.index = i;
return result;
int index; // For storing the search result below.
// Search through globals.
index = moduleGetGlobalIndex(compiler->module, name, length);
if (index != -1) {
result.type = NAME_GLOBAL_VAR;
result.index = index;
return result;
// Search through builtin functions.
index = findBuiltinFunction(compiler->parser.vm, name, length);
if (index != -1) {
result.type = NAME_BUILTIN;
result.index = index;
return result;
return result;
// Forward declaration of codegen functions.
static void emitOpcode(Compiler* compiler, Opcode opcode);
static int emitByte(Compiler* compiler, int byte);
static int emitShort(Compiler* compiler, int arg);
static void emitLoopJump(Compiler* compiler);
static void emitAssignment(Compiler* compiler, TokenType assignment);
static void emitFunctionEnd(Compiler* compiler);
static void patchJump(Compiler* compiler, int addr_index);
static void patchListSize(Compiler* compiler, int size_index, int size);
static void patchForward(Compiler* compiler, Fn* fn, int index, int name);
static int compilerAddConstant(Compiler* compiler, Var value);
static int compilerAddVariable(Compiler* compiler, const char* name,
uint32_t length, int line);
static void compilerAddForward(Compiler* compiler, int instruction, Fn* fn,
const char* name, int length, int line);
static void compilerChangeStack(Compiler* compiler, int num);
// Forward declaration of grammar functions.
static void parsePrecedence(Compiler* compiler, Precedence precedence);
static int compileFunction(Compiler* compiler, FuncType fn_type);
static void compileExpression(Compiler* compiler);
static void exprLiteral(Compiler* compiler);
static void exprInterpolation(Compiler* compiler);
static void exprFunc(Compiler* compiler);
static void exprName(Compiler* compiler);
static void exprOr(Compiler* compiler);
static void exprAnd(Compiler* compiler);
static void exprBinaryOp(Compiler* compiler);
static void exprUnaryOp(Compiler* compiler);
static void exprGrouping(Compiler* compiler);
static void exprList(Compiler* compiler);
static void exprMap(Compiler* compiler);
static void exprCall(Compiler* compiler);
static void exprAttrib(Compiler* compiler);
static void exprSubscript(Compiler* compiler);
// true, false, null, self.
static void exprValue(Compiler* compiler);
GrammarRule rules[] = { // Prefix Infix Infix Precedence
/* TK_DOT */ { NULL, exprAttrib, PREC_ATTRIB },
/* TK_DOTDOT */ { NULL, exprBinaryOp, PREC_RANGE },
/* TK_LPARAN */ { exprGrouping, exprCall, PREC_CALL },
/* TK_LBRACKET */ { exprList, exprSubscript, PREC_SUBSCRIPT },
/* TK_LBRACE */ { exprMap, NULL, NO_INFIX },
/* TK_PERCENT */ { NULL, exprBinaryOp, PREC_FACTOR },
/* TK_TILD */ { exprUnaryOp, NULL, NO_INFIX },
/* TK_AMP */ { NULL, exprBinaryOp, PREC_BITWISE_AND },
/* TK_PIPE */ { NULL, exprBinaryOp, PREC_BITWISE_OR },
/* TK_CARET */ { NULL, exprBinaryOp, PREC_BITWISE_XOR },
/* TK_PLUS */ { NULL, exprBinaryOp, PREC_TERM },
/* TK_MINUS */ { exprUnaryOp, exprBinaryOp, PREC_TERM },
/* TK_STAR */ { NULL, exprBinaryOp, PREC_FACTOR },
/* TK_FSLASH */ { NULL, exprBinaryOp, PREC_FACTOR },
/* TK_EQ */ NO_RULE,
/* TK_GT */ { NULL, exprBinaryOp, PREC_COMPARISION },
/* TK_LT */ { NULL, exprBinaryOp, PREC_COMPARISION },
/* TK_EQEQ */ { NULL, exprBinaryOp, PREC_EQUALITY },
/* TK_NOTEQ */ { NULL, exprBinaryOp, PREC_EQUALITY },
/* TK_GTEQ */ { NULL, exprBinaryOp, PREC_COMPARISION },
/* TK_LTEQ */ { NULL, exprBinaryOp, PREC_COMPARISION },
/* TK_SLEFT */ { NULL, exprBinaryOp, PREC_BITWISE_SHIFT },
/* TK_AS */ NO_RULE,
/* TK_FUNC */ { exprFunc, NULL, NO_INFIX },
/* TK_NULL */ { exprValue, NULL, NO_INFIX },
/* TK_IN */ { NULL, exprBinaryOp, PREC_TEST },
/* TK_AND */ { NULL, exprAnd, PREC_LOGICAL_AND },
/* TK_OR */ { NULL, exprOr, PREC_LOGICAL_OR },
/* TK_NOT */ { exprUnaryOp, NULL, PREC_UNARY },
/* TK_TRUE */ { exprValue, NULL, NO_INFIX },
/* TK_FALSE */ { exprValue, NULL, NO_INFIX },
/* TK_DO */ NO_RULE,
/* TK_IF */ NO_RULE,
/* TK_NAME */ { exprName, NULL, NO_INFIX },
/* TK_NUMBER */ { exprLiteral, NULL, NO_INFIX },
/* TK_STRING */ { exprLiteral, NULL, NO_INFIX },
/* TK_STRING_INTERP */ { exprInterpolation, NULL, NO_INFIX },
static GrammarRule* getRule(TokenType type) {
return &(rules[(int)type]);
// Emit variable store.
static void emitStoreVariable(Compiler* compiler, int index, bool global) {
if (global) {
emitOpcode(compiler, OP_STORE_GLOBAL);
emitByte(compiler, index);
} else {
if (index < 9) { //< 0..8 locals have single opcode.
emitOpcode(compiler, (Opcode)(OP_STORE_LOCAL_0 + index));
} else {
emitOpcode(compiler, OP_STORE_LOCAL_N);
emitByte(compiler, index);
static void emitPushVariable(Compiler* compiler, int index, bool global) {
if (global) {
emitOpcode(compiler, OP_PUSH_GLOBAL);
emitByte(compiler, index);
} else {
if (index < 9) { //< 0..8 locals have single opcode.
emitOpcode(compiler, (Opcode)(OP_PUSH_LOCAL_0 + index));
} else {
emitOpcode(compiler, OP_PUSH_LOCAL_N);
emitByte(compiler, index);
static void exprLiteral(Compiler* compiler) {
Token* value = &compiler->parser.previous;
int index = compilerAddConstant(compiler, value->value);
emitOpcode(compiler, OP_PUSH_CONSTANT);
emitShort(compiler, index);
// Consider the bellow string.
// "Hello $name!"
// This will be compiled as:
// list_join(["Hello ", name, "!"])
static void exprInterpolation(Compiler* compiler) {
emitOpcode(compiler, OP_PUSH_BUILTIN_FN);
emitByte(compiler, compiler->bifn_list_join);
emitOpcode(compiler, OP_PUSH_LIST);
int size_index = emitShort(compiler, 0);
int size = 0;
do {
// Push the string on the stack and append it to the list.
emitOpcode(compiler, OP_LIST_APPEND);
// Compile the expression and append it to the list.
emitOpcode(compiler, OP_LIST_APPEND);
} while (match(compiler, TK_STRING_INTERP));
// The last string is not TK_STRING_INTERP but it would be
// TK_STRING. Apped it.
// Optimize case last string could be empty. Skip it.
consume(compiler, TK_STRING, "Non terminated interpolated string.");
if (compiler->parser.previous.type == TK_STRING /* != if syntax error. */) {
ASSERT(IS_OBJ_TYPE(compiler->parser.previous.value, OBJ_STRING), OOPS);
String* str = (String*)AS_OBJ(compiler->parser.previous.value);
if (str->length != 0) {
emitOpcode(compiler, OP_LIST_APPEND);
patchListSize(compiler, size_index, size);
// Call the list_join function (which is at the stack top).
emitOpcode(compiler, OP_CALL);
emitByte(compiler, 1);
// After the above call, the lits and the "list_join" function will be popped
// from the stack and a string will be pushed. The so the result stack effect
// is -1.
compilerChangeStack(compiler, -1);
static void exprFunc(Compiler* compiler) {
int fn_index = compileFunction(compiler, FN_LITERAL);
emitOpcode(compiler, OP_PUSH_CONSTANT);
emitShort(compiler, fn_index);
static void exprName(Compiler* compiler) {
const char* start = compiler->parser.previous.start;
int length = compiler->parser.previous.length;
int line = compiler->parser.previous.line;
NameSearchResult result = compilerSearchName(compiler, start, length);
if (result.type == NAME_NOT_DEFINED) {
if (compiler->l_value && match(compiler, TK_EQ)) {
int index = compilerAddVariable(compiler, start, length, line);
// Compile the assigned value.
// Store the value to the variable.
if (compiler->scope_depth == DEPTH_GLOBAL) {
emitStoreVariable(compiler, index, true);
} else {
// This will prevent the assignment from being popped out from the
// stack since the assigned value itself is the local and not a temp.
compiler->new_local = true;
// Ensure the local variable's index is equals to the stack top index.
// If the compiler has errors, we cannot and don't have to assert.
ASSERT(compiler->parser.has_errors ||
(compiler->stack_size - 1) == index, OOPS);
// We don't need to call emitStoreVariable (which emit STORE_LOCAL)
// because the local is already at it's location in the stack, we just
// don't pop it.
} else {
// The name could be a global value which hasn't been defined at this
// point. We add an implicit forward declaration and once this expression
// executed the value could be initialized only if the expression is at
// a local depth.
if (compiler->scope_depth == DEPTH_GLOBAL) {
parseError(compiler, "Name '%.*s' is not defined.", length, start);
} else {
emitOpcode(compiler, OP_PUSH_GLOBAL);
int index = emitByte(compiler, 0xff);
compilerAddForward(compiler, index, _FN, start, length, line);
} else {
switch (result.type) {
const bool is_global = result.type == NAME_GLOBAL_VAR;
if (compiler->l_value && matchAssignment(compiler)) {
TokenType assignment = compiler->parser.previous.type;
if (assignment != TK_EQ) {
emitPushVariable(compiler, result.index, is_global);
emitAssignment(compiler, assignment);
} else {
emitStoreVariable(compiler, result.index, is_global);
} else {
emitPushVariable(compiler, result.index, is_global);
emitOpcode(compiler, OP_PUSH_BUILTIN_FN);
emitByte(compiler, result.index);
UNREACHABLE(); // Case already handled.
// Compiling (expr a) or (expr b)
// (expr a)
// | At this point (expr a) is at the stack top.
// V
// .-- (OP_OR [offset])
// | | if true short circuit and skip (expr b)
// | | otherwise pop (expr a) and continue.
// | V
// | (expr b)
// | | At this point (expr b) is at the stack top.
// | V
// '-> (...)
// At this point stack top would be
// either (expr a) or (expr b)
// Compiling 'and' expression is also similler but we jump if the (expr a) is
// false.
void exprOr(Compiler* compiler) {
emitOpcode(compiler, OP_OR);
int orpatch = emitShort(compiler, 0xffff); //< Will be patched.
parsePrecedence(compiler, PREC_LOGICAL_OR);
patchJump(compiler, orpatch);
void exprAnd(Compiler* compiler) {
emitOpcode(compiler, OP_AND);
int andpatch = emitShort(compiler, 0xffff); //< Will be patched.
parsePrecedence(compiler, PREC_LOGICAL_AND);
patchJump(compiler, andpatch);
static void exprBinaryOp(Compiler* compiler) {
TokenType op = compiler->parser.previous.type;
parsePrecedence(compiler, (Precedence)(getRule(op)->precedence + 1));
switch (op) {
case TK_DOTDOT: emitOpcode(compiler, OP_RANGE); break;
case TK_PERCENT: emitOpcode(compiler, OP_MOD); break;
case TK_AMP: emitOpcode(compiler, OP_BIT_AND); break;
case TK_PIPE: emitOpcode(compiler, OP_BIT_OR); break;
case TK_CARET: emitOpcode(compiler, OP_BIT_XOR); break;
case TK_PLUS: emitOpcode(compiler, OP_ADD); break;
case TK_MINUS: emitOpcode(compiler, OP_SUBTRACT); break;
case TK_STAR: emitOpcode(compiler, OP_MULTIPLY); break;
case TK_FSLASH: emitOpcode(compiler, OP_DIVIDE); break;
case TK_GT: emitOpcode(compiler, OP_GT); break;
case TK_LT: emitOpcode(compiler, OP_LT); break;
case TK_EQEQ: emitOpcode(compiler, OP_EQEQ); break;
case TK_NOTEQ: emitOpcode(compiler, OP_NOTEQ); break;
case TK_GTEQ: emitOpcode(compiler, OP_GTEQ); break;
case TK_LTEQ: emitOpcode(compiler, OP_LTEQ); break;
case TK_SRIGHT: emitOpcode(compiler, OP_BIT_RSHIFT); break;
case TK_SLEFT: emitOpcode(compiler, OP_BIT_LSHIFT); break;
case TK_IN: emitOpcode(compiler, OP_IN); break;
static void exprUnaryOp(Compiler* compiler) {
TokenType op = compiler->parser.previous.type;
parsePrecedence(compiler, (Precedence)(PREC_UNARY + 1));
switch (op) {
case TK_TILD: emitOpcode(compiler, OP_BIT_NOT); break;
case TK_MINUS: emitOpcode(compiler, OP_NEGATIVE); break;
case TK_NOT: emitOpcode(compiler, OP_NOT); break;
static void exprGrouping(Compiler* compiler) {
consume(compiler, TK_RPARAN, "Expected ')' after expression.");
static void exprList(Compiler* compiler) {
emitOpcode(compiler, OP_PUSH_LIST);
int size_index = emitShort(compiler, 0);
int size = 0;
do {
if (peek(compiler) == TK_RBRACKET) break;
emitOpcode(compiler, OP_LIST_APPEND);
} while (match(compiler, TK_COMMA));
consume(compiler, TK_RBRACKET, "Expected ']' after list elements.");
patchListSize(compiler, size_index, size);
static void exprMap(Compiler* compiler) {
emitOpcode(compiler, OP_PUSH_MAP);
do {
if (peek(compiler) == TK_RBRACE) break;
consume(compiler, TK_COLLON, "Expected ':' after map's key.");
emitOpcode(compiler, OP_MAP_INSERT);
} while (match(compiler, TK_COMMA));
consume(compiler, TK_RBRACE, "Expected '}' after map elements.");
static void exprCall(Compiler* compiler) {
// Compile parameters.
int argc = 0;
if (!match(compiler, TK_RPARAN)) {
do {
} while (match(compiler, TK_COMMA));
consume(compiler, TK_RPARAN, "Expected ')' after parameter list.");
emitOpcode(compiler, OP_CALL);
emitByte(compiler, argc);
// After the call the arguments will be popped and the callable
// will be replaced with the return value.
compilerChangeStack(compiler, -argc);
static void exprAttrib(Compiler* compiler) {
consume(compiler, TK_NAME, "Expected an attribute name after '.'.");
const char* name = compiler->parser.previous.start;
int length = compiler->parser.previous.length;
// Store the name in module's names buffer.
int index = moduleAddName(compiler->module, compiler->parser.vm,
name, length);
if (compiler->l_value && matchAssignment(compiler)) {
TokenType assignment = compiler->parser.previous.type;
if (assignment != TK_EQ) {
emitOpcode(compiler, OP_GET_ATTRIB_KEEP);
emitShort(compiler, index);
emitAssignment(compiler, assignment);
} else {
emitOpcode(compiler, OP_SET_ATTRIB);
emitShort(compiler, index);
} else {
emitOpcode(compiler, OP_GET_ATTRIB);
emitShort(compiler, index);
static void exprSubscript(Compiler* compiler) {
consume(compiler, TK_RBRACKET, "Expected ']' after subscription ends.");
if (compiler->l_value && matchAssignment(compiler)) {
TokenType assignment = compiler->parser.previous.type;
if (assignment != TK_EQ) {
emitOpcode(compiler, OP_GET_SUBSCRIPT_KEEP);
emitAssignment(compiler, assignment);
} else {
emitOpcode(compiler, OP_SET_SUBSCRIPT);
} else {
emitOpcode(compiler, OP_GET_SUBSCRIPT);
static void exprValue(Compiler* compiler) {
TokenType op = compiler->parser.previous.type;
switch (op) {
case TK_NULL: emitOpcode(compiler, OP_PUSH_NULL); break;
case TK_TRUE: emitOpcode(compiler, OP_PUSH_TRUE); break;
case TK_FALSE: emitOpcode(compiler, OP_PUSH_FALSE); break;
static void parsePrecedence(Compiler* compiler, Precedence precedence) {
GrammarFn prefix = getRule(compiler->parser.previous.type)->prefix;
if (prefix == NULL) {
parseError(compiler, "Expected an expression.");
compiler->l_value = precedence <= PREC_LOWEST;
// The above expression cannot be a call '(', since call is an infix
// operator. But could be true (ex: x = f()). we set is_last_call to false
// here and if the next infix operator is call this will be set to true
// once the call expression is parsed.
compiler->is_last_call = false;
while (getRule(compiler->parser.current.type)->precedence >= precedence) {
TokenType op = compiler->parser.previous.type;
GrammarFn infix = getRule(op)->infix;
// TK_LPARAN '(' as infix is the call operator.
compiler->is_last_call = (op == TK_LPARAN);
// Add a variable and return it's index to the context. Assumes that the
// variable name is unique and not defined before in the current scope.
static int compilerAddVariable(Compiler* compiler, const char* name,
uint32_t length, int line) {
// TODO: should I validate the name for pre-defined, etc?
// Check if maximum variable count is reached.
bool max_vars_reached = false;
const char* var_type = ""; // For max variables reached error message.
if (compiler->scope_depth == DEPTH_GLOBAL) {
if (compiler->module->globals.count >= MAX_VARIABLES) {
max_vars_reached = true;
var_type = "globals";
} else {
if (compiler->local_count >= MAX_VARIABLES) {
max_vars_reached = true;
var_type = "locals";
if (max_vars_reached) {
parseError(compiler, "A module should contain at most %d %s.",
MAX_VARIABLES, var_type);
return -1;
// Add the variable and return it's index.
if (compiler->scope_depth == DEPTH_GLOBAL) {
return (int)moduleAddGlobal(compiler->parser.vm, compiler->module,
name, length, VAR_NULL);
} else {
Local* local = &compiler->locals [compiler->local_count];
local->name = name;
local->length = length;
local->depth = compiler->scope_depth;
local->line = line;
return compiler->local_count++;
static void compilerAddForward(Compiler* compiler, int instruction, Fn* fn,
const char* name, int length, int line) {
if (compiler->parser.forwards_count == MAX_FORWARD_NAMES) {
parseError(compiler, "A module should contain at most %d implicit forward "
"function declarations.", MAX_FORWARD_NAMES);
ForwardName* forward = &compiler->parser.forwards[
forward->instruction = instruction;
forward->func = fn;
forward->name = name;
forward->length = length;
forward->line = line;
// Add a literal constant to module literals and return it's index.
static int compilerAddConstant(Compiler* compiler, Var value) {
pkVarBuffer* constants = &compiler->module->constants;
uint32_t index = moduleAddConstant(compiler->parser.vm,
compiler->module, value);
if (index >= MAX_CONSTANTS) {
parseError(compiler, "A module should contain at most %d "
"unique constants.", MAX_CONSTANTS);
return (int)index;
// Enters inside a block.
static void compilerEnterBlock(Compiler* compiler) {
// Change the stack size by the [num], if it's positive, the stack will
// grow otherwise it'll shrink.
static void compilerChangeStack(Compiler* compiler, int num) {
compiler->stack_size += num;
// If the compiler has error (such as undefined name), that will not popped
// because of the semantic error but it'll be popped once the expression
// parsing is done. So it's possible for negative size in error.
if (!compiler->parser.has_errors) ASSERT(compiler->stack_size >= 0, OOPS);
if (compiler->stack_size > _FN->stack_size) {
_FN->stack_size = compiler->stack_size;
// Write instruction to pop all the locals at the current [depth] or higher,
// but it won't change the stack size of locals count because this function
// is called by break/continue statements at the middle of a scope, so we need
// those locals till the scope ends. This will returns the number of locals
// that were popped.
static int compilerPopLocals(Compiler* compiler, int depth) {
ASSERT(depth > (int)DEPTH_GLOBAL, "Cannot pop global variables.");
int local = compiler->local_count - 1;
while (local >= 0 && compiler->locals[local].depth >= depth) {
// Note: Do not use emitOpcode(compiler, OP_POP);
// Because this function is called at the middle of a scope (break,
// continue). So we need the pop instruction here but we still need the
// locals to continue parsing the next statements in the scope. They'll be
// popped once the scope is ended.
emitByte(compiler, OP_POP);
return (compiler->local_count - 1) - local;
// Exits a block.
static void compilerExitBlock(Compiler* compiler) {
ASSERT(compiler->scope_depth > (int)DEPTH_GLOBAL, "Cannot exit toplevel.");
// Discard all the locals at the current scope.
int popped = compilerPopLocals(compiler, compiler->scope_depth);
compiler->local_count -= popped;
compiler->stack_size -= popped;
static void compilerPushFunc(Compiler* compiler, Func* fn,
Function* func, int index) {
fn->outer_func = compiler->func;
fn->ptr = func;
fn->depth = compiler->scope_depth;
fn->index = index;
compiler->func = fn;
static void compilerPopFunc(Compiler* compiler) {
compiler->func = compiler->func->outer_func;
// Emit a single byte and return it's index.
static int emitByte(Compiler* compiler, int byte) {
pkByteBufferWrite(&_FN->opcodes, compiler->parser.vm,
pkUintBufferWrite(&_FN->oplines, compiler->parser.vm,
return (int)_FN->opcodes.count - 1;
// Emit 2 bytes argument as big indian. return it's starting index.
static int emitShort(Compiler* compiler, int arg) {
emitByte(compiler, (arg >> 8) & 0xff);
return emitByte(compiler, arg & 0xff) - 1;
// Emits an instruction and update stack size (variable stack size opcodes
// should be handled).
static void emitOpcode(Compiler* compiler, Opcode opcode) {
emitByte(compiler, (int)opcode);
// If the opcode is OP_CALL the compiler should change the stack size
// manually because we don't know that here.
compilerChangeStack(compiler, opcode_info[opcode].stack);
// Jump back to the start of the loop.
static void emitLoopJump(Compiler* compiler) {
emitOpcode(compiler, OP_LOOP);
int offset = (int)_FN->opcodes.count - compiler->loop->start + 2;
emitShort(compiler, offset);
static void emitAssignment(Compiler* compiler, TokenType assignment) {
switch (assignment) {
case TK_PLUSEQ: emitOpcode(compiler, OP_ADD); break;
case TK_MINUSEQ: emitOpcode(compiler, OP_SUBTRACT); break;
case TK_STAREQ: emitOpcode(compiler, OP_MULTIPLY); break;
case TK_DIVEQ: emitOpcode(compiler, OP_DIVIDE); break;
case TK_MODEQ: emitOpcode(compiler, OP_MOD); break;
case TK_ANDEQ: emitOpcode(compiler, OP_BIT_AND); break;
case TK_OREQ: emitOpcode(compiler, OP_BIT_OR); break;
case TK_XOREQ: emitOpcode(compiler, OP_BIT_XOR); break;
case TK_SRIGHTEQ: emitOpcode(compiler, OP_BIT_RSHIFT); break;
case TK_SLEFTEQ: emitOpcode(compiler, OP_BIT_LSHIFT); break;
static void emitFunctionEnd(Compiler* compiler) {
// Don't use emitOpcode(compiler, OP_RETURN); Because it'll reduce the stack
// size by -1, (return value will be popped). This return is implictly added
// by the compiler.
// Since we're returning from the end of the function, there'll always be a
// null value at the base of the current call frame the reserved return value
// slot.
emitByte(compiler, OP_RETURN);
emitOpcode(compiler, OP_END);
// Update the jump offset.
static void patchJump(Compiler* compiler, int addr_index) {
int offset = (int)_FN->opcodes.count - (addr_index + 2 /*bytes index*/);
ASSERT(offset < MAX_JUMP, "Too large address offset to jump to.");
_FN->opcodes.data[addr_index] = (offset >> 8) & 0xff;
_FN->opcodes.data[addr_index + 1] = offset & 0xff;
// Update the size value for OP_PUSH_LIST instruction.
static void patchListSize(Compiler* compiler, int size_index, int size) {
_FN->opcodes.data[size_index] = (size >> 8) & 0xff;
_FN->opcodes.data[size_index + 1] = size & 0xff;
static void patchForward(Compiler* compiler, Fn* fn, int index, int name) {
fn->opcodes.data[index] = name & 0xff;
typedef enum {
} BlockType;
static void compileStatement(Compiler* compiler);
static void compileBlockBody(Compiler* compiler, BlockType type);
// Compile a class and return it's index in the module's types buffer.
static int compileClass(Compiler* compiler) {
// Consume the name of the type.
consume(compiler, TK_NAME, "Expected a type name.");
const char* name = compiler->parser.previous.start;
int name_len = compiler->parser.previous.length;
// Create a new class.
int cls_index, ctor_index;
Class* cls = newClass(compiler->parser.vm, compiler->module,
name, (uint32_t)name_len, &cls_index, &ctor_index);
cls->ctor->arity = 0;
// Temproary patch for moving functions and classes to constant buffer.
ASSERT(compiler->scope_depth == DEPTH_GLOBAL, OOPS);
int index = compilerAddVariable(compiler,
moduleSetGlobal(compiler->module, index, VAR_OBJ(cls));
// Check count exceeded.
if (cls_index >= MAX_CONSTANTS || ctor_index >= MAX_CONSTANTS) {
parseError(compiler, "A module should contain at most %d "
"unique constants.", MAX_CONSTANTS);
// Compile the constructor function.
ASSERT(compiler->func->ptr == compiler->module->body, OOPS);
Func curr_fn;
compilerPushFunc(compiler, &curr_fn, cls->ctor, ctor_index);
// Push an instance on the stack.
emitOpcode(compiler, OP_PUSH_INSTANCE);
emitShort(compiler, cls_index);
TokenType next = peek(compiler);
while (next != TK_END && next != TK_EOF) {
// Compile field name.
consume(compiler, TK_NAME, "Expected a type name.");
const char* f_name = compiler->parser.previous.start;
int f_len = compiler->parser.previous.length;
uint32_t f_index = moduleAddName(compiler->module, compiler->parser.vm,
f_name, f_len);
String* new_name = compiler->module->names.data[f_index];
for (uint32_t i = 0; i < cls->field_names.count; i++) {
String* prev = compiler->module->names.data[cls->field_names.data[i]];
if (IS_STR_EQ(new_name, prev)) {
parseError(compiler, "Class field with name '%s' already exists.",
pkUintBufferWrite(&cls->field_names, compiler->parser.vm, f_index);
// Consume the assignment expression.
consume(compiler, TK_EQ, "Expected an assignment after field name.");
compileExpression(compiler); // Assigned value.
// At this point the stack top would be the expression.
emitOpcode(compiler, OP_INST_APPEND);
next = peek(compiler);
consume(compiler, TK_END, "Expected 'end' after a class declaration end.");
// The instance pushed by the OP_PUSH_INSTANCE instruction is at the top
// of the stack, return it (Constructor will return the instance). Note that
// the emitFunctionEnd function will also add a return instruction but that's
// for functions which doesn't return anything explicitly. This return won't
// change compiler's stack size because it won't pop the return value.
emitOpcode(compiler, OP_RETURN);
return -1; // TODO;
// Compile a function and return it's index in the module's function buffer.
static int compileFunction(Compiler* compiler, FuncType fn_type) {
const char* name;
int name_length;
if (fn_type != FN_LITERAL) {
consume(compiler, TK_NAME, "Expected a function name.");
name = compiler->parser.previous.start;
name_length = compiler->parser.previous.length;
} else {
name_length = (int)strlen(name);
int fn_index;
Function* func = newFunction(compiler->parser.vm, name, name_length,
compiler->module, fn_type == FN_NATIVE, NULL,
if (fn_index >= MAX_CONSTANTS) {
parseError(compiler, "A module should contain at most %d "
"unique constants.", MAX_CONSTANTS);
if (fn_type != FN_LITERAL) {
// FIXME: remove native keyword for functions.
ASSERT(compiler->scope_depth == DEPTH_GLOBAL, OOPS);
int name_line = compiler->parser.previous.line;
int g_index = compilerAddVariable(compiler, name, name_length, name_line);
moduleSetGlobal(compiler->module, g_index, VAR_OBJ(func));
Func curr_fn;
compilerPushFunc(compiler, &curr_fn, func, fn_index);
int argc = 0;
compilerEnterBlock(compiler); // Parameter depth.
// Parameter list is optional.
if (match(compiler, TK_LPARAN) && !match(compiler, TK_RPARAN)) {
do {
consume(compiler, TK_NAME, "Expected a parameter name.");
const char* param_name = compiler->parser.previous.start;
uint32_t param_len = compiler->parser.previous.length;
// TODO: move this to a functions.
bool predefined = false;
for (int i = compiler->local_count - 1; i >= 0; i--) {
Local* local = &compiler->locals[i];
if (compiler->scope_depth != local->depth) break;
if (local->length == param_len &&
strncmp(local->name, param_name, param_len) == 0) {
predefined = true;
if (predefined) {
parseError(compiler, "Multiple definition of a parameter.");
compilerAddVariable(compiler, param_name, param_len,
} while (match(compiler, TK_COMMA));
consume(compiler, TK_RPARAN, "Expected ')' after parameter list.");
func->arity = argc;
compilerChangeStack(compiler, argc);
if (fn_type != FN_NATIVE) {
compileBlockBody(compiler, BLOCK_FUNC);
consume(compiler, TK_END, "Expected 'end' after function definition end.");
compilerExitBlock(compiler); // Parameter depth.
} else {
compilerExitBlock(compiler); // Parameter depth.
// Forward patch are pending so we can't dump constant value that
// needs to be patched.
//dumpFunctionCode(compiler->parser.vm, compiler->func->ptr);
return fn_index;
// Finish a block body.
static void compileBlockBody(Compiler* compiler, BlockType type) {
if (type == BLOCK_IF) {
consumeStartBlock(compiler, TK_THEN);
} else if (type == BLOCK_ELSE) {
} else if (type == BLOCK_FUNC) {
// Function body doesn't require a 'do' or 'then' delimiter to enter.
} else {
// For/While loop block body delimiter is 'do'.
consumeStartBlock(compiler, TK_DO);
TokenType next = peek(compiler);
while (!(next == TK_END || next == TK_EOF || (
(type == BLOCK_IF) && (next == TK_ELSE || next == TK_ELSIF)))) {
next = peek(compiler);
// Import a file at the given path (first it'll be resolved from the current
// path) and return it as a module pointer. And it'll emit opcodes to push
// that module to the stack.
static Module* importFile(Compiler* compiler, const char* path) {
ASSERT(compiler->scope_depth == DEPTH_GLOBAL, OOPS);
PKVM* vm = compiler->parser.vm;
// Resolve the path.
PkStringPtr resolved = { path, NULL, NULL };
if (vm->config.resolve_path_fn != NULL) {
resolved = vm->config.resolve_path_fn(vm, compiler->module->path->data,
if (resolved.string == NULL) {
parseError(compiler, "Cannot resolve path '%s' from '%s'", path,
// Create new string for the resolved path. And free the resolved path.
int index = (int)moduleAddName(compiler->module, compiler->parser.vm,
resolved.string, (uint32_t)strlen(resolved.string));
String* path_name = compiler->module->names.data[index];
if (resolved.on_done != NULL) resolved.on_done(vm, resolved);
// Check if the script already compiled and cached in the PKVM.
Var entry = mapGet(vm->modules, VAR_OBJ(path_name));
if (!IS_UNDEF(entry)) {
// Push the compiled script on the stack.
emitOpcode(compiler, OP_IMPORT);
emitShort(compiler, index);
return (Module*)AS_OBJ(entry);
// The script not exists in the VM, make sure we have the script loading
// api function.
if (vm->config.load_script_fn == NULL) {
parseError(compiler, "Cannot import. The hosting application haven't "
"registered the script loading API");
return NULL;
// Load the script at the path.
PkStringPtr source = vm->config.load_script_fn(vm, path_name->data);
if (source.string == NULL) {
parseError(compiler, "Error loading script at \"%s\"", path_name->data);
return NULL;
// Make a new module and to compile it.
Module* module = newModule(vm, path_name, false);
vmPushTempRef(vm, &module->_super); // scr.
mapSet(vm, vm->modules, VAR_OBJ(path_name), VAR_OBJ(module));
vmPopTempRef(vm); // scr.
// Push the compiled script on the stack.
emitOpcode(compiler, OP_IMPORT);
emitShort(compiler, index);
// Even if we're running on repl mode the imported module cannot run on
// repl mode.
PkCompileOptions options = pkNewCompilerOptions();
if (compiler->options) options = *compiler->options;
options.repl_mode = false;
// Compile the source to the module and clean the source.
PkResult result = compile(vm, module, source.string, &options);
if (source.on_done != NULL) source.on_done(vm, source);
if (result != PK_RESULT_SUCCESS) {
parseError(compiler, "Compilation of imported script '%s' failed",
return module;
// Import the native module from the PKVM's core_libs and it'll emit opcodes
// to push that module to the stack.
static Module* importCoreLib(Compiler* compiler, const char* name_start,
int name_length) {
ASSERT(compiler->scope_depth == DEPTH_GLOBAL, OOPS);
// Add the name to the module's name buffer, we need it as a key to the
// PKVM's module cache.
int index = (int)moduleAddName(compiler->module, compiler->parser.vm,
name_start, name_length);
String* module_name = compiler->module->names.data[index];
Var entry = mapGet(compiler->parser.vm->core_libs, VAR_OBJ(module_name));
if (IS_UNDEF(entry)) {
parseError(compiler, "No module named '%s' exists.", module_name->data);
return NULL;
// Push the module on the stack.
emitOpcode(compiler, OP_IMPORT);
emitShort(compiler, index);
return (Module*)AS_OBJ(entry);
// Push the imported module on the stack and return the pointer. It could be
// either core library or a local import.
static inline Module* compilerImport(Compiler* compiler) {
ASSERT(compiler->scope_depth == DEPTH_GLOBAL, OOPS);
// Get the module (from native libs or VM's cache or compile new one).
// And push it on the stack.
if (match(compiler, TK_NAME)) { //< Core library.
return importCoreLib(compiler, compiler->parser.previous.start,
} else if (match(compiler, TK_STRING)) { //< Local library.
Var var_path = compiler->parser.previous.value;
String* path = (String*)AS_OBJ(var_path);
return importFile(compiler, path->data);
// Invalid token after import/from keyword.
parseError(compiler, "Expected a module name or path to import.");
return NULL;
// Search for the name, and return it's index in the globals. If it's not
// exists in the globals it'll add a variable to the globals entry and return.
// But If the name is predefined function (cannot be modified). It'll set error
// and return -1.
static int compilerImportName(Compiler* compiler, int line,
const char* name, uint32_t length) {
ASSERT(compiler->scope_depth == DEPTH_GLOBAL, OOPS);
NameSearchResult result = compilerSearchName(compiler, name, length);
switch (result.type) {
return compilerAddVariable(compiler, name, length, line);
return result.index;
// TODO:
// Make it possible to override any name (ie. the syntax `print = 1`
// should pass) and allow imported entries to have the same name of
// builtin functions.
parseError(compiler, "Name '%.*s' already exists.", length, name);
return -1;
// This will called by the compilerImportAll() function to import a single
// entry from the imported module. (could be a function or global variable).
static void compilerImportSingleEntry(Compiler* compiler,
const char* name, uint32_t length) {
// Special names are begins with '@' (implicit main function, literal
// functions etc) skip them.
if (name[0] == SPECIAL_NAME_CHAR) return;
// Line number of the variables which will be bind to the imported symbol.
int line = compiler->parser.previous.line;
// Add the name to the **current** module's name buffer.
int name_index = (int)moduleAddName(compiler->module, compiler->parser.vm,
name, length);
// Get the global/function/class from the module.
emitOpcode(compiler, OP_GET_ATTRIB_KEEP);
emitShort(compiler, name_index);
int index = compilerImportName(compiler, line, name, length);
if (index != -1) emitStoreVariable(compiler, index, true);
emitOpcode(compiler, OP_POP);
// Import all from the module, which is also would be at the top of the stack
// before executing the below instructions.
static void compilerImportAll(Compiler* compiler, Module* module) {
ASSERT(module != NULL, OOPS);
ASSERT(compiler->scope_depth == DEPTH_GLOBAL, OOPS);
// Import all globals.
ASSERT(module->global_names.count == module->globals.count, OOPS);
for (uint32_t i = 0; i < module->globals.count; i++) {
ASSERT(module->global_names.data[i] < module->names.count, OOPS);
const String* name = module->names.data[module->global_names.data[i]];
compilerImportSingleEntry(compiler, name->data, name->length);
// from module import symbol [as alias [, symbol2 [as alias]]]
static void compileFromImport(Compiler* compiler) {
ASSERT(compiler->scope_depth == DEPTH_GLOBAL, OOPS);
// Import the library and push it on the stack. If the import failed
// lib_from would be NULL.
Module* lib_from = compilerImport(compiler);
// At this point the module would be on the stack before executing the next
// instruction.
consume(compiler, TK_IMPORT, "Expected keyword 'import'.");
if (match(compiler, TK_STAR)) {
// from math import *
if (lib_from) compilerImportAll(compiler, lib_from);
} else {
do {
// Consume the symbol name to import from the module.
consume(compiler, TK_NAME, "Expected symbol to import.");
const char* name = compiler->parser.previous.start;
uint32_t length = (uint32_t)compiler->parser.previous.length;
int line = compiler->parser.previous.line;
// Add the name of the symbol to the names buffer.
int name_index = (int)moduleAddName(compiler->module,
name, length);
// Don't pop the lib since it'll be used for the next entry.
emitOpcode(compiler, OP_GET_ATTRIB_KEEP);
emitShort(compiler, name_index); //< Name of the attrib.
// Check if it has an alias.
if (match(compiler, TK_AS)) {
// Consuming it'll update the previous token which would be the name of
// the binding variable.
consume(compiler, TK_NAME, "Expected a name after 'as'.");
// Set the imported symbol binding name, which wold be in the last token
// consumed by the first one or after the as keyword.
name = compiler->parser.previous.start;
length = (uint32_t)compiler->parser.previous.length;
line = compiler->parser.previous.line;
// Get the variable to bind the imported symbol, if we already have a
// variable with that name override it, otherwise use a new variable.
int var_index = compilerImportName(compiler, line, name, length);
if (var_index != -1) emitStoreVariable(compiler, var_index, true);
emitOpcode(compiler, OP_POP);
} while (match(compiler, TK_COMMA) && (skipNewLines(compiler), true));
// Done getting all the attributes, now pop the lib from the stack.
emitOpcode(compiler, OP_POP);
// Always end the import statement.
static void compileRegularImport(Compiler* compiler) {
ASSERT(compiler->scope_depth == DEPTH_GLOBAL, OOPS);
do {
// Import the library and push it on the stack. If it cannot import,
// the lib would be null, but we're not terminating here, just continue
// parsing for cascaded errors.
Module* lib = compilerImport(compiler);
// variable to bind the imported module.
int var_index = -1;
// Check if it has an alias, if so bind the variable with that name.
if (match(compiler, TK_AS)) {
// Consuming it'll update the previous token which would be the name of
// the binding variable.
consume(compiler, TK_NAME, "Expected a name after 'as'.");
// Get the variable to bind the imported symbol, if we already have a
// variable with that name override it, otherwise use a new variable.
const char* name = compiler->parser.previous.start;
int length = compiler->parser.previous.length;
int line = compiler->parser.previous.line;
var_index = compilerImportName(compiler, line, name, length);
} else {
// If it has a module name use it as binding variable.
// Core libs names are it's module name but for local libs it's optional
// to define a module name for a module.
if (lib && lib->name != NULL) {
// Get the variable to bind the imported symbol, if we already have a
// variable with that name override it, otherwise use a new variable.
const char* name = lib->name->data;
uint32_t length = lib->name->length;
int line = compiler->parser.previous.line;
var_index = compilerImportName(compiler, line, name, length);
} else {
// -- Nothing to do here --
// Importing from path which doesn't have a module name. Import
// everything of it. and bind to a variables.
if (var_index != -1) {
emitStoreVariable(compiler, var_index, true);
emitOpcode(compiler, OP_POP);
} else {
if (lib) compilerImportAll(compiler, lib);
// Done importing everything from lib now pop the lib.
emitOpcode(compiler, OP_POP);
} while (match(compiler, TK_COMMA) && (skipNewLines(compiler), true));
// Compiles an expression. An expression will result a value on top of the
// stack.
static void compileExpression(Compiler* compiler) {
parsePrecedence(compiler, PREC_LOWEST);
static void compileIfStatement(Compiler* compiler, bool elsif) {
compileExpression(compiler); //< Condition.
emitOpcode(compiler, OP_JUMP_IF_NOT);
int ifpatch = emitShort(compiler, 0xffff); //< Will be patched.
compileBlockBody(compiler, BLOCK_IF);
if (match(compiler, TK_ELSIF)) {
// Jump pass else.
emitOpcode(compiler, OP_JUMP);
int exit_jump = emitShort(compiler, 0xffff); //< Will be patched.
// if (false) jump here.
patchJump(compiler, ifpatch);
compileIfStatement(compiler, true);
patchJump(compiler, exit_jump);
} else if (match(compiler, TK_ELSE)) {
// Jump pass else.
emitOpcode(compiler, OP_JUMP);
int exit_jump = emitShort(compiler, 0xffff); //< Will be patched.
patchJump(compiler, ifpatch);
compileBlockBody(compiler, BLOCK_ELSE);
patchJump(compiler, exit_jump);
} else {
patchJump(compiler, ifpatch);
// elsif will not consume the 'end' keyword as it'll be leaved to be consumed
// by it's 'if'.
if (!elsif) {
consume(compiler, TK_END, "Expected 'end' after statement end.");
static void compileWhileStatement(Compiler* compiler) {
Loop loop;
loop.start = (int)_FN->opcodes.count;
loop.patch_count = 0;
loop.outer_loop = compiler->loop;
loop.depth = compiler->scope_depth;
compiler->loop = &loop;
compileExpression(compiler); //< Condition.
emitOpcode(compiler, OP_JUMP_IF_NOT);
int whilepatch = emitShort(compiler, 0xffff); //< Will be patched.
compileBlockBody(compiler, BLOCK_LOOP);
patchJump(compiler, whilepatch);
// Patch break statement.
for (int i = 0; i < compiler->loop->patch_count; i++) {
patchJump(compiler, compiler->loop->patches[i]);
compiler->loop = loop.outer_loop;
consume(compiler, TK_END, "Expected 'end' after statement end.");
static void compileForStatement(Compiler* compiler) {
consume(compiler, TK_NAME, "Expected an iterator name.");
// Unlike functions local variable could shadow a name.
const char* iter_name = compiler->parser.previous.start;
int iter_len = compiler->parser.previous.length;
int iter_line = compiler->parser.previous.line;
consume(compiler, TK_IN, "Expected 'in' after iterator name.");
// Compile and store sequence.
compilerAddVariable(compiler, "@Sequence", 9, iter_line); // Sequence
// Add iterator to locals. It's an increasing integer indicating that the
// current loop is nth starting from 0.
compilerAddVariable(compiler, "@iterator", 9, iter_line); // Iterator.
emitOpcode(compiler, OP_PUSH_0);
// Add the iteration value. It'll be updated to each element in an array of
// each character in a string etc.
compilerAddVariable(compiler, iter_name, iter_len, iter_line); // Iter value.
emitOpcode(compiler, OP_PUSH_NULL);
// Start the iteration, and check if the sequence is iterable.
emitOpcode(compiler, OP_ITER_TEST);
Loop loop;
loop.start = (int)_FN->opcodes.count;
loop.patch_count = 0;
loop.outer_loop = compiler->loop;
loop.depth = compiler->scope_depth;
compiler->loop = &loop;
// Compile next iteration.
emitOpcode(compiler, OP_ITER);
int forpatch = emitShort(compiler, 0xffff);
compileBlockBody(compiler, BLOCK_LOOP);
emitLoopJump(compiler); //< Loop back to iteration.
patchJump(compiler, forpatch); //< Patch exit iteration address.
// Patch break statement.
for (int i = 0; i < compiler->loop->patch_count; i++) {
patchJump(compiler, compiler->loop->patches[i]);
compiler->loop = loop.outer_loop;
consume(compiler, TK_END, "Expected 'end' after statement end.");
compilerExitBlock(compiler); //< Iterator scope.
// Compiles a statement. Assignment could be an assignment statement or a new
// variable declaration, which will be handled.
static void compileStatement(Compiler* compiler) {
// is_temporary will be set to true if the statement is an temporary
// expression, it'll used to be pop from the stack.
bool is_temporary = false;
// This will be set to true if the statement is an expression. It'll used to
// print it's value when running in REPL mode.
bool is_expression = false;
if (match(compiler, TK_BREAK)) {
if (compiler->loop == NULL) {
parseError(compiler, "Cannot use 'break' outside a loop.");
ASSERT(compiler->loop->patch_count < MAX_BREAK_PATCH,
"Too many break statements (" STRINGIFY(MAX_BREAK_PATCH) ")." );
// Pop all the locals at the loop's body depth.
compilerPopLocals(compiler, compiler->loop->depth + 1);
emitOpcode(compiler, OP_JUMP);
int patch = emitShort(compiler, 0xffff); //< Will be patched.
compiler->loop->patches[compiler->loop->patch_count++] = patch;
} else if (match(compiler, TK_CONTINUE)) {
if (compiler->loop == NULL) {
parseError(compiler, "Cannot use 'continue' outside a loop.");
// Pop all the locals at the loop's body depth.
compilerPopLocals(compiler, compiler->loop->depth + 1);
} else if (match(compiler, TK_RETURN)) {
if (compiler->scope_depth == DEPTH_GLOBAL) {
parseError(compiler, "Invalid 'return' outside a function.");
if (matchEndStatement(compiler)) {
emitOpcode(compiler, OP_PUSH_NULL);
emitOpcode(compiler, OP_RETURN);
} else {
compileExpression(compiler); //< Return value is at stack top.
// If the last expression parsed with compileExpression() is a call
// is_last_call would be true by now.
if (compiler->is_last_call) {
// Tail call optimization disabled at debug mode.
if (compiler->options && !compiler->options->debug) {
ASSERT(_FN->opcodes.count >= 2, OOPS); // OP_CALL, argc
ASSERT(_FN->opcodes.data[_FN->opcodes.count - 2] == OP_CALL, OOPS);
_FN->opcodes.data[_FN->opcodes.count - 2] = OP_TAIL_CALL;
emitOpcode(compiler, OP_RETURN);
} else if (match(compiler, TK_IF)) {
compileIfStatement(compiler, false);
} else if (match(compiler, TK_WHILE)) {
} else if (match(compiler, TK_FOR)) {
} else {
compiler->new_local = false;
is_expression = true;
if (!compiler->new_local) is_temporary = true;
compiler->new_local = false;
// If running REPL mode, print the expression's evaluated value.
if (compiler->options && compiler->options->repl_mode &&
compiler->func->ptr == compiler->module->body &&
is_expression /*&& compiler->scope_depth == DEPTH_GLOBAL*/) {
emitOpcode(compiler, OP_REPL_PRINT);
if (is_temporary) emitOpcode(compiler, OP_POP);
// Compile statements that are only valid at the top level of the module. Such
// as import statement, function define, and if we're running REPL mode top
// level expression's evaluated value will be printed.
static void compileTopLevelStatement(Compiler* compiler) {
// At the top level the stack size should be 0, before and after compiling
// a top level statement, since there aren't any locals at the top level.
ASSERT(compiler->parser.has_errors || compiler->stack_size == 0, OOPS);
if (match(compiler, TK_CLASS)) {
} else if (match(compiler, TK_NATIVE)) {
compileFunction(compiler, FN_NATIVE);
} else if (match(compiler, TK_DEF)) {
compileFunction(compiler, FN_SCRIPT);
} else if (match(compiler, TK_FROM)) {
} else if (match(compiler, TK_IMPORT)) {
} else if (match(compiler, TK_MODULE)) {
parseError(compiler, "Module name must be the first statement "
"of the script.");
} else {
// At the top level the stack size should be 0, before and after compiling
// a top level statement, since there aren't any locals at the top level.
ASSERT(compiler->parser.has_errors || compiler->stack_size == 0, OOPS);
PkResult compile(PKVM* vm, Module* module, const char* source,
const PkCompileOptions* options) {
// Skip utf8 BOM if there is any.
if (strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3;
Compiler _compiler;
Compiler* compiler = &_compiler; //< Compiler pointer for quick access.
compilerInit(compiler, vm, source, module, options);
// If compiling for an imported module the vm->compiler would be the compiler
// of the module that imported this module. Add the all the compilers into a
// link list.
compiler->next_compiler = vm->compiler;
vm->compiler = compiler;
// If the module doesn't has a body by default, it's probably was created by
// the native api function (pkNewModule() that'll return a module without a
// main function) so just create and add the function here.
if (module->body == NULL) moduleAddMain(vm, module);
// If we're compiling for a module that was already compiled (when running
// REPL or evaluating an expression) we don't need the old main anymore.
// just use the globals and functions of the module and use a new body func.
pkByteBufferClear(&module->body->fn->opcodes, vm);
// Remember the count of constants, names, and globals, If the compilation
// failed discard all of them and roll back.
uint32_t constants_count = module->constants.count;
uint32_t names_count = module->names.count;
uint32_t globals_count = module->globals.count;
Func curr_fn;
curr_fn.depth = DEPTH_MODULE;
curr_fn.ptr = module->body;
curr_fn.outer_func = NULL;
compiler->func = &curr_fn;
// Lex initial tokens. current <-- next.
if (match(compiler, TK_MODULE)) {
// If the module running a REPL or compiled multiple times by hosting
// application module attribute might already set. In that case make it
// Compile error.
if (module->name != NULL) {
parseError(compiler, "Module name already defined.");
} else {
consume(compiler, TK_NAME, "Expected a name for the module.");
const char* name = compiler->parser.previous.start;
uint32_t len = compiler->parser.previous.length;
module->name = newStringLength(vm, name, len);
while (!match(compiler, TK_EOF)) {
// Resolve forward names (function names that are used before defined).
for (int i = 0; i < compiler->parser.forwards_count; i++) {
ForwardName* forward = &compiler->parser.forwards[i];
const char* name = forward->name;
int length = forward->length;
int index = moduleGetGlobalIndex(compiler->module, name, (uint32_t)length);
if (index != -1) {
patchForward(compiler, forward->func, forward->instruction, index);
} else {
// need_more_lines is only true for unexpected EOF errors. For syntax
// errors it'll be false by now but. Here it's a semantic errors, so
// we're overriding it to false.
compiler->parser.need_more_lines = false;
resolveError(compiler, forward->line, "Name '%.*s' is not defined.",
length, name);
vm->compiler = compiler->next_compiler;
// If compilation failed, discard all the invalid functions and globals.
if (compiler->parser.has_errors) {
module->constants.count = constants_count;
module->names.count = names_count;
module->globals.count = module->global_names.count = globals_count;
dumpFunctionCode(compiler->parser.vm, module->body);
// Return the compilation result.
if (compiler->parser.has_errors) {
if (compiler->parser.repl_mode && compiler->parser.need_more_lines) {
PkResult pkCompileModule(PKVM* vm, PkHandle* module_handle, PkStringPtr source,
const PkCompileOptions* options) {
__ASSERT(module_handle != NULL, "Argument module was NULL.");
__ASSERT(IS_OBJ_TYPE(module_handle->value, OBJ_MODULE),
"Given handle is not a module.");
Module* module = (Module*)AS_OBJ(module_handle->value);
PkResult result = compile(vm, module, source.string, options);
if (source.on_done) source.on_done(vm, source);
return result;
void compilerMarkObjects(PKVM* vm, Compiler* compiler) {
// Mark the module which is currently being compiled.
markObject(vm, &compiler->module->_super);
// Mark the string literals (they haven't added to the module's literal
// buffer yet).
markValue(vm, compiler->parser.current.value);
markValue(vm, compiler->parser.previous.value);
markValue(vm, compiler->parser.next.value);
if (compiler->next_compiler != NULL) {
compilerMarkObjects(vm, compiler->next_compiler);