pocketlang/src/compiler.c

/*
 *  Copyright (c) 2021 Thakee Nathees
 *  Licensed under: MIT License
 */

#include "compiler.h"

#include "types/name_table.h"
#include "types/gen/byte_buffer.h"
#include "utils.h"
#include "vm.h"

// The maximum number of variables (or global if compiling top level script) 
// to lookup from the compiling context. Also it's limited by it's opcode 
// which is using a single byte value to identify the local.
#define MAX_VARIABLES 256

// The maximum number of constant literal a script can contain. Also it's
// limited by it's opcode which is using a short value to identify.
#define MAX_CONSTANTS (1 << 16)

// The maximum address possible to jump. Similar limitation as above.
#define MAX_JUMP (1 << 16)

// Max number of break statement in a loop statement to patch.
#define MAX_BREAK_PATCH 256

typedef enum {

	TK_ERROR = 0,
	TK_EOF,
	TK_LINE,

	// symbols
	TK_DOT,        // .
	TK_DOTDOT,     // ..
	TK_COMMA,       // ,
	TK_COLLON,     // :
	TK_SEMICOLLON, // ;
	TK_HASH,       // #
	TK_LPARAN,     // (
	TK_RPARAN,     // )
	TK_LBRACKET,   // [
	TK_RBRACKET,   // ]
	TK_LBRACE,     // {
	TK_RBRACE,     // }
	TK_PERCENT,    // %

	TK_TILD,       // ~
	TK_AMP,        // &
	TK_PIPE,       // |
	TK_CARET,      // ^

	TK_PLUS,       // +
	TK_MINUS,      // -
	TK_STAR,       // *
	TK_FSLASH,     // /
	TK_BSLASH,     // \.
	TK_EQ,         // =
	TK_GT,         // >
	TK_LT,         // <
	//TK_BANG,       // !  parsed as TK_NOT

	TK_EQEQ,       // ==
	TK_NOTEQ,      // !=
	TK_GTEQ,       // >=
	TK_LTEQ,       // <=

	TK_PLUSEQ,     // +=
	TK_MINUSEQ,    // -=
	TK_STAREQ,     // *=
	TK_DIVEQ,      // /=
	TK_SRIGHT,     // >>
	TK_SLEFT,      // <<

	//TODO:
	// >>= <<=
	//TK_PLUSPLUS,   // ++
	//TK_MINUSMINUS, // --
	//TK_MODEQ,      // %=
	//TK_XOREQ,      // ^=

	// Keywords.
	//TK_TYPE,     // type
	TK_IMPORT,     // import
	TK_ENUM,       // enum
	TK_DEF,        // def
	TK_NATIVE,     // native (C function declaration)
	TK_END,        // end

	TK_NULL,       // null
	TK_SELF,       // self
	TK_IS,         // is
	TK_IN,         // in
	TK_AND,        // and
	TK_OR,         // or
	TK_NOT,        // not
	TK_TRUE,       // true
	TK_FALSE,      // false

	// Type names for is test.
	// TK_NULL already defined.
	TK_BOOL_T,    // Bool
	TK_NUM_T,     // Num
	TK_STRING_T,  // String
	TK_ARRAY_T,   // Array
	TK_MAP_T,     // Map
	TK_RANGE_T,   // Range
	TK_FUNC_T,    // Function 
	TK_OBJ_T,     // Object (self, user data, etc.)

	TK_DO,         // do
	TK_WHILE,      // while
	TK_FOR,        // for
	TK_IF,         // if
	TK_ELIF,       // elif
	TK_ELSE,       // else
	TK_BREAK,      // break
	TK_CONTINUE,   // continue
	TK_RETURN,     // return

	TK_NAME,       // identifier

	TK_NUMBER,     // number literal
	TK_STRING,     // string literal

	/* String interpolation (reference wren-lang)
	 * but it doesn't support recursive ex: "a \(b + "\(c)")"
	 *  "a \(b) c \(d) e"
	 * tokenized as:
	 *   TK_STR_INTERP  "a "
	 *   TK_NAME        b
	 *   TK_STR_INTERP  " c "
	 *   TK_NAME        d
	 *   TK_STRING     " e" */
	 // TK_STR_INTERP, //< not yet.

} TokenType;

typedef struct {
	TokenType type;

	const char* start; //< Begining of the token in the source.
	int length;        //< Number of chars of the token.
	int line;          //< Line number of the token (1 based).
	Var value;         //< Literal value of the token.
} Token;

typedef struct {
	const char* identifier;
	int length;
	TokenType tk_type;
} _Keyword;

// List of keywords mapped into their identifiers.
static _Keyword _keywords[] = {
	//{ "type",   4, TK_TYPE },
	{ "import",   6,  TK_IMPORT },
	{ "enum",     4,  TK_ENUM },
	{ "def",      3,  TK_DEF },
	{ "native",   6,  TK_NATIVE },
	{ "end",      3,  TK_END },
	{ "null",     4, TK_NULL },
	{ "self",     4, TK_SELF },
	{ "is",       2, TK_IS },
	{ "in",       2, TK_IN },
	{ "and",      3, TK_AND },
	{ "or",       2, TK_OR },
	{ "not",      3, TK_NOT },
	{ "true",     4, TK_TRUE },
	{ "false",    5, TK_FALSE },
	{ "do",       2, TK_DO },
	{ "while",    5, TK_WHILE },
	{ "for",      3, TK_FOR },
	{ "if",       2, TK_IF },
	{ "elif",     4, TK_ELIF },
	{ "else",     4, TK_ELSE },
	{ "break",    5, TK_BREAK },
	{ "continue", 8, TK_CONTINUE },
	{ "return",   6, TK_RETURN },

	// Type names.
	{ "Bool",     4, TK_BOOL_T },
	{ "Num",      3, TK_NUM_T },
	{ "String",   6, TK_STRING_T },
	{ "Array",    5, TK_ARRAY_T },
	{ "Map",      3, TK_MAP_T },
	{ "Range",    5, TK_RANGE_T },
	{ "Object",   6, TK_OBJ_T },
	{ "Function", 8, TK_FUNC_T },

	{ NULL,      (TokenType)(0) }, // Sentinal to mark the end of the array
};

typedef struct {
	MSVM* vm;             //< Owner of the parser (for reporting errors, etc).

	const char* source; //< Currently compiled source.
	const char* path;   //< Path of the source.

	const char* token_start;  //< Start of the currently parsed token.
	const char* current_char; //< Current char position in the source.
	int current_line;         //< Line number of the current char.

	Token previous, current, next; //< Currently parsed tokens.

	bool has_errors;    //< True if any syntex error occured at compile time.
} Parser;

// Compiler Types ////////////////////////////////////////////////////////////

// Precedence parsing references:
// https://en.wikipedia.org/wiki/Shunting-yard_algorithm
// TODO: I should explicitly state wren-lang as a reference "globaly".

typedef enum {
	PREC_NONE,
	PREC_LOWEST,
	PREC_ASSIGNMENT,    // =
	PREC_LOGICAL_OR,    // or
	PREC_LOGICAL_AND,   // and
	PREC_LOGICAL_NOT,   // not
	PREC_EQUALITY,      // == !=
	PREC_IN,            // in
	PREC_IS,            // is
	PREC_COMPARISION,   // < > <= >=
	PREC_BITWISE_OR,    // |
	PREC_BITWISE_XOR,   // ^
	PREC_BITWISE_AND,   // &
	PREC_BITWISE_SHIFT, // << >>
	PREC_RANGE,         // ..
	PREC_TERM,          // + -
	PREC_FACTOR,        // * / %
	PREC_UNARY,         // - ! ~
	PREC_CALL,          // ()
	PREC_SUBSCRIPT,     // []
	PREC_ATTRIB,        // .index
	PREC_PRIMARY,
} Precedence;

typedef void (*GrammarFn)(Compiler* compiler, bool can_assign);

typedef struct {
	GrammarFn prefix;
	GrammarFn infix;
	Precedence precedence;
} GrammarRule;

typedef struct {
	const char* name; //< Directly points into the source string.
	int length;       //< Length of the name.
	int depth;        //< The depth the local is defined in. (-1 means global)
	int line;         //< The line variable declared for debugging.
} Variable;

typedef struct sLoop {

	// Index of the loop's start instruction where the execution will jump
	// back to once it reach the loop end or continue used.
	int start;

	// Index of the jump out address instruction to patch it's value once done
	// compiling the loop.
	int exit_jump;

	// Array of address indexes to patch break address.
	int patches[MAX_BREAK_PATCH];
	int patch_count;

	// The outer loop of the current loop used to set and reset the compiler's
	// current loop context.
	struct sLoop* outer_loop;

} Loop;

struct Compiler {

	MSVM* vm;
	Parser parser;

	// Current depth the compiler in (-1 means top level) 0 means function
	// level and > 0 is inner scope.
	int scope_depth;

	Variable variables[MAX_VARIABLES]; //< Variables in the current context.
	int var_count;                     //< Number of locals in [variables].

	int stack_size; //< Current size including locals ind temps.

	// TODO: compiler should mark Script* below not to be garbage collected.

	Script* script;      //< Current script.
	Loop* loop;          //< Current loop.
	Function* function;  //< Current function.
};

typedef struct {
	int params;
	int stack;
} OpInfo;

static OpInfo opcode_info[] = {
	#define OPCODE(name, params, stack) { params, stack },
	#include "opcodes.h"
	#undef OPCODE
};

/*****************************************************************************
 * ERROR HANDLERS                                                            *
 *****************************************************************************/

static void reportError(Parser* parser, const char* file, int line,
                        const char* fmt, va_list args) {
	parser->has_errors = true;
	ASSERT(false, "TODO:");
	// TODO: parser->vm->config.error_fn(...)
}

// Error caused at the middle of lexing (and TK_ERROR will be lexed insted).
static void lexError(Parser* parser, const char* fmt, ...) {
	va_list args;
	va_start(args, fmt);
	reportError(parser, parser->path, parser->current_line, fmt, args);
	va_end(args);
}

// Error caused when parsing. The associated token assumed to be last consumed
// which is [parser->previous].
static void parseError(Parser* parser, const char* fmt, ...) {

	Token* token = &parser->previous;

	// Lex errors would repored earlier by lexError and lexed a TK_ERROR token.
	if (token->type == TK_ERROR) return;

	va_list args;
	va_start(args, fmt);
	reportError(parser, parser->path, token->line, fmt, args);
	va_end(args);
}

/*****************************************************************************
 * LEXING                                                                    *
 *****************************************************************************/

// Forward declaration of lexer methods.

static char eatChar(Parser* parser);
static void setNextValueToken(Parser* parser, TokenType type, Var value);
static void setNextToken(Parser* parser, TokenType type);
static bool matchChar(Parser* parser, char c);
static bool matchLine(Parser* parser);

static void eatString(Parser* parser) {
	ByteBuffer buff;
	byteBufferInit(&buff);

	while (true) {
		char c = eatChar(parser);

		if (c == '"') break;

		if (c == '\0') {
			lexError(parser, "Non terminated string.");

			// Null byte is required by TK_EOF.
			parser->current_char--;
			break;
		}

		if (c == '\\') {
			switch (eatChar(parser)) {
				case '"': byteBufferWrite(&buff, parser->vm, '"'); break;
				case '\\': byteBufferWrite(&buff, parser->vm, '\\'); break;
				case 'n': byteBufferWrite(&buff, parser->vm, '\n'); break;
				case 'r': byteBufferWrite(&buff, parser->vm, '\r'); break;
				case 't': byteBufferWrite(&buff, parser->vm, '\t'); break;

				default:
					lexError(parser, "Error: invalid escape character");
					break;
			}
		} else {
			byteBufferWrite(&buff, parser->vm, c);
		}
	}

	// '\0' will be added by varNewSring();
	Var string = VAR_OBJ(&newString(parser->vm, (const char*)buff.data,
		(uint32_t)buff.count)->_super);

	byteBufferClear(&buff, parser->vm);

	setNextValueToken(parser, TK_STRING, string);
}

// Returns the current char of the parser on.
static char peekChar(Parser* parser) {
	return *parser->current_char;
}

// Returns the next char of the parser on.
static char peekNextChar(Parser* parser) {
	if (peekChar(parser) == '\0') return '\0';
	return *(parser->current_char + 1);
}

// Advance the parser by 1 char.
static char eatChar(Parser* parser) {
	char c = peekChar(parser);
	parser->current_char++;
	if (c == '\n') parser->current_line++;
	return c;
}

// Complete lexing an identifier name.
static void eatName(Parser* parser) {

	char c = peekChar(parser);
	while (utilIsName(c) || utilIsDigit(c)) {
		eatChar(parser);
		c = peekChar(parser);
	}

	const char* name_start = parser->token_start;

	TokenType type = TK_NAME;

	int length = (int)(parser->current_char - name_start);
	for (int i = 0; _keywords[i].identifier != NULL; i++) {
		if (_keywords[i].length == length &&
			strncmp(name_start, _keywords[i].identifier, length) == 0) {
			type = _keywords[i].tk_type;
			break;
		}
	}

	setNextToken(parser, type);
}

// Complete lexing a number literal.
static void eatNumber(Parser* parser) {

	// TODO: hex, binary and scientific literals.

	while (utilIsDigit(peekChar(parser)))
		eatChar(parser);

	if (matchChar(parser, '.')) {
		while (utilIsDigit(peekChar(parser)))
			eatChar(parser);
	}
	errno = 0;
	Var value = VAR_NUM(strtod(parser->token_start, NULL));
	if (errno == ERANGE) {
		const char* start = parser->token_start;
		int len = (parser->current_char - start);
		lexError(parser, "Literal is too large (%.*s)", len, start);
		value = VAR_NUM(0);
	}

	setNextValueToken(parser, TK_NUMBER, value);
}

// Read and ignore chars till it reach new line or EOF.
static void skipLineComment(Parser* parser) {
	char c = eatChar(parser);

	while (c != '\n' && c != '\0') {
		c = eatChar(parser);
	}
}

// Will skip multiple new lines.
static void skipNewLines(Parser* parser) {
	matchLine(parser);
}

// If the current char is [c] consume it and advance char by 1 and returns
// true otherwise returns false.
static bool matchChar(Parser* parser, char c) {
	if (peekChar(parser) != c) return false;
	eatChar(parser);
	return true;
}

// If the current char is [c] eat the char and add token two otherwise eat
// append token one.
static void setNextTwoCharToken(Parser* parser, char c, TokenType one,
	TokenType two) {
	if (matchChar(parser, c)) {
		setNextToken(parser, two);
	} else {
		setNextToken(parser, one);
	}
}

// Initialize the next token as the type.
static void setNextToken(Parser* parser, TokenType type) {
	parser->next.type = type;
	parser->next.start = parser->token_start;
	parser->next.length = (int)(parser->current_char - parser->token_start);
	parser->next.line = parser->current_line - ((type == TK_LINE) ? 1 : 0);
}

// Initialize the next token as the type and assign the value.
static void setNextValueToken(Parser* parser, TokenType type, Var value) {
	setNextToken(parser, type);
	parser->next.value = value;
}

// Lex the next token and set it as the next token.
static void lexToken(Parser* parser) {
	parser->previous = parser->current;
	parser->current = parser->next;

	if (parser->current.type == TK_EOF) return;

	while (peekChar(parser) != '\0') {
		parser->token_start = parser->current_char;
		char c = eatChar(parser);

		switch (c) {
			case ',': setNextToken(parser, TK_COMMA); return;
			case ':': setNextToken(parser, TK_COLLON); return;
			case ';': setNextToken(parser, TK_SEMICOLLON); return;
			case '#': setNextToken(parser, TK_HASH); return;
			case '(': setNextToken(parser, TK_LPARAN); return;
			case ')': setNextToken(parser, TK_RPARAN); return;
			case '[': setNextToken(parser, TK_LBRACKET); return;
			case ']': setNextToken(parser, TK_RBRACKET); return;
			case '{': setNextToken(parser, TK_LBRACE); return;
			case '}': setNextToken(parser, TK_RBRACE); return;
			case '%': setNextToken(parser, TK_PERCENT); return;

			case '~': setNextToken(parser, TK_TILD); return;
			case '&': setNextToken(parser, TK_AMP); return;
			case '|': setNextToken(parser, TK_PIPE); return;
			case '^': setNextToken(parser, TK_CARET); return;

			case '\n': setNextToken(parser, TK_LINE); return;

			case ' ':
			case '\t':
			case '\r': {
				char c = peekChar(parser);
				while (c == ' ' || c == '\t' || c == '\r') {
					eatChar(parser);
					c = peekChar(parser);
				}
				break;
			}

			case '.': // TODO: ".5" should be a valid number.
				setNextTwoCharToken(parser, '.', TK_DOT, TK_DOTDOT);
				return;

			case '=':
				setNextTwoCharToken(parser, '=', TK_EQ, TK_EQEQ);
				return;

			case '!':
				setNextTwoCharToken(parser, '=', TK_NOT, TK_NOTEQ);
				return;

			case '>':
				if (matchChar(parser, '>'))
					setNextToken(parser, TK_SRIGHT);
				else
					setNextTwoCharToken(parser, '=', TK_GT, TK_GTEQ);
				return;

			case '<':
				if (matchChar(parser, '<'))
					setNextToken(parser, TK_SLEFT);
				else
					setNextTwoCharToken(parser, '=', TK_LT, TK_LTEQ);
				return;

			case '+':
				setNextTwoCharToken(parser, '=', TK_PLUS, TK_PLUSEQ);
				return;

			case '-':
				setNextTwoCharToken(parser, '=', TK_MINUS, TK_MINUSEQ);
				return;

			case '*':
				setNextTwoCharToken(parser, '=', TK_STAR, TK_STAREQ);
				return;

			case '/':
				setNextTwoCharToken(parser, '=', TK_FSLASH, TK_DIVEQ);
				return;

			case '"': eatString(parser); return;

			default: {

				if (utilIsDigit(c)) {
					eatNumber(parser);
				} else if (utilIsName(c)) {
					eatName(parser);
				} else {
					if (c >= 32 && c <= 126) {
						lexError(parser, "Invalid character %c", c);
					} else {
						lexError(parser, "Invalid byte 0x%x", (uint8_t)c);
					}
					setNextToken(parser, TK_ERROR);
				}
				return;
			}
		}
	}

	setNextToken(parser, TK_EOF);
	parser->next.start = parser->current_char;
}

/*****************************************************************************
 * PARSING                                                                   *
 *****************************************************************************/

 // Initialize the parser.
static void parserInit(Parser* self, MSVM* vm, const char* source,
                       const char* path) {
	self->vm = vm;
	self->source = source;
	self->path = path;
	self->token_start = source;
	self->current_char = source;
	self->current_line = 1;
	self->has_errors = false;

	self->next.type = TK_ERROR;
	self->next.start = NULL;
	self->next.length = 0;
	self->next.line = 1;
	self->next.value = VAR_UNDEFINED;
}

// Returns current token type.
static TokenType peek(Parser* self) {
	return self->current.type;
}

// Returns next token type.
static TokenType peekNext(Parser* self) {
	return self->next.type;
}

// Consume the current token if it's expected and lex for the next token
// and return true otherwise reutrn false. It'll skips all the new lines
// inbetween thus matching TK_LINE is invalid.
static bool match(Parser* self, TokenType expected) {
	ASSERT(expected != TK_LINE, "Can't match TK_LINE.");
	matchLine(self);

	if (peek(self) != expected) return false;
	lexToken(self);
	return true;
}

// Match one or more lines and return true if there any.
static bool matchLine(Parser* parser) {
	if (peek(parser) != TK_LINE) return false;
	while (peek(parser) == TK_LINE)
		lexToken(parser);
	return true;
}

// Match semi collon or multiple new lines.
static void consumeEndStatement(Parser* parser) {
	bool consumed = false;

	// Semi collon must be on the same line.
	if (peek(parser) == TK_SEMICOLLON) {
		match(parser, TK_SEMICOLLON);
		consumed = true;
	}
	if (matchLine(parser)) consumed = true;
	if (!consumed && peek(parser) != TK_EOF) {
		parseError(parser, "Expected statement end with newline or ';'.");
	}
}

// Match optional "do" keyword and new lines.
static void consumeStartBlock(Parser* parser) {
	bool consumed = false;

	// "do" must be on the same line.
	if (peek(parser) == TK_DO) {
		match(parser, TK_DO);
		consumed = true;
	}
	if (matchLine(parser)) consumed = true;
	if (!consumed) {
		parseError(parser, "Expected enter block with newline or 'do'.");
	}
}

// Consume the the current token and if it's not [expected] emits error log
// and continue parsing for more error logs. It'll skips all the new lines
// inbetween thus matching TK_LINE is invald.
static void consume(Parser* self, TokenType expected, const char* err_msg) {
	ASSERT(expected != TK_LINE, "Can't match TK_LINE.");
	matchLine(self);

	lexToken(self);
	if (self->previous.type != expected) {
		parseError(self, "%s", err_msg);

		// If the next token is expected discard the current to minimize
		// cascaded errors and continue parsing.
		if (peek(self) == expected) {
			lexToken(self);
		}
	}
}

/*****************************************************************************
 * PARSING GRAMMAR                                                           *
 *****************************************************************************/

// Forward declaration of codegen functions.
static void emitOpcode(Compiler* compiler, Opcode opcode);
static int emitByte(Compiler* compiler, int byte);
static int emitShort(Compiler* compiler, int arg);
static int compilerAddConstant(Compiler* compiler, Var value);

// Forward declaration of grammar functions.
static void parsePrecedence(Compiler* compiler, Precedence precedence);

static void compileExpression(Compiler* compiler);
static void exprAssignment(Compiler* compiler, bool can_assign);

// Bool, Num, String, Null, -and- bool_t, Array_t, String_t, ...
static void exprLiteral(Compiler* compiler, bool can_assign);
static void exprName(Compiler* compiler, bool can_assign);

static void exprBinaryOp(Compiler* compiler, bool can_assign);
static void exprUnaryOp(Compiler* compiler, bool can_assign);

static void exprGrouping(Compiler* compiler, bool can_assign);
static void exprArray(Compiler* compiler, bool can_assign);
static void exprMap(Compiler* compiler, bool can_assign);

static void exprCall(Compiler* compiler, bool can_assign);
static void exprAttrib(Compiler* compiler, bool can_assign);
static void exprSubscript(Compiler* compiler, bool can_assign);

#define NO_RULE { NULL,          NULL,          PREC_NONE }
#define NO_INFIX PREC_NONE

GrammarRule rules[] = {  // Prefix       Infix             Infix Precedence
	/* TK_ERROR      */   NO_RULE,
	/* TK_EOF        */   NO_RULE,
	/* TK_LINE       */   NO_RULE,
	/* TK_DOT        */ { exprAttrib,    NULL,             PREC_ATTRIB },
	/* TK_DOTDOT     */ { NULL,          exprBinaryOp,     PREC_RANGE },
	/* TK_COMMA      */   NO_RULE,
	/* TK_COLLON     */   NO_RULE,
	/* TK_SEMICOLLON */   NO_RULE,
	/* TK_HASH       */   NO_RULE,
	/* TK_LPARAN     */ { exprGrouping,  exprCall,         PREC_CALL },
	/* TK_RPARAN     */   NO_RULE,
	/* TK_LBRACKET   */ { exprArray,     exprSubscript,    PREC_SUBSCRIPT },
	/* TK_RBRACKET   */   NO_RULE,
	/* TK_LBRACE     */ { exprMap,       NULL,             NO_INFIX },
	/* TK_RBRACE     */   NO_RULE,
	/* TK_PERCENT    */ { NULL,          exprBinaryOp,     PREC_FACTOR },
	/* TK_TILD       */ { exprUnaryOp,   NULL,             NO_INFIX },
	/* TK_AMP        */ { NULL,          exprBinaryOp,     PREC_BITWISE_AND },
	/* TK_PIPE       */ { NULL,          exprBinaryOp,     PREC_BITWISE_OR },
	/* TK_CARET      */ { NULL,          exprBinaryOp,     PREC_BITWISE_XOR },
	/* TK_PLUS       */ { NULL,          exprBinaryOp,     PREC_TERM },
	/* TK_MINUS      */ { exprUnaryOp,   exprBinaryOp,     PREC_TERM },
	/* TK_STAR       */ { NULL,          exprBinaryOp,     PREC_FACTOR },
	/* TK_FSLASH     */ { NULL,          exprBinaryOp,     PREC_FACTOR },
	/* TK_BSLASH     */   NO_RULE,
	/* TK_EQ         */ { NULL,          exprAssignment,   PREC_ASSIGNMENT },
	/* TK_GT         */ { NULL,          exprBinaryOp,     PREC_COMPARISION },
	/* TK_LT         */ { NULL,          exprBinaryOp,     PREC_COMPARISION },
	/* TK_EQEQ       */ { NULL,          exprBinaryOp,     PREC_EQUALITY },
	/* TK_NOTEQ      */ { NULL,          exprBinaryOp,     PREC_EQUALITY },
	/* TK_GTEQ       */ { NULL,          exprBinaryOp,     PREC_COMPARISION },
	/* TK_LTEQ       */ { NULL,          exprBinaryOp,     PREC_COMPARISION },
	/* TK_PLUSEQ     */ { NULL,          exprAssignment,   PREC_ASSIGNMENT },
	/* TK_MINUSEQ    */ { NULL,          exprAssignment,   PREC_ASSIGNMENT },
	/* TK_STAREQ     */ { NULL,          exprAssignment,   PREC_ASSIGNMENT },
	/* TK_DIVEQ      */ { NULL,          exprAssignment,   PREC_ASSIGNMENT },
	/* TK_SRIGHT     */ { NULL,          exprBinaryOp,     PREC_BITWISE_SHIFT },
	/* TK_SLEFT      */ { NULL,          exprBinaryOp,     PREC_BITWISE_SHIFT },
	/* TK_IMPORT     */   NO_RULE,
	/* TK_ENUM       */   NO_RULE,
	/* TK_DEF        */   NO_RULE,
	/* TK_EXTERN     */   NO_RULE,
	/* TK_END        */   NO_RULE,
	/* TK_NULL       */   NO_RULE,
	/* TK_SELF       */   NO_RULE,
	/* TK_IS         */ { NULL,          exprBinaryOp,     PREC_IS },
	/* TK_IN         */ { NULL,          exprBinaryOp,     PREC_IN },
	/* TK_AND        */ { NULL,          exprBinaryOp,     PREC_LOGICAL_AND },
	/* TK_OR         */ { NULL,          exprBinaryOp,     PREC_LOGICAL_OR },
	/* TK_NOT        */ { exprUnaryOp,   NULL,             PREC_LOGICAL_NOT },
	/* TK_TRUE       */ { exprLiteral,   NULL,             NO_INFIX },
	/* TK_FALSE      */ { exprLiteral,   NULL,             NO_INFIX },
	/* TK_BOOL_T     */ { exprLiteral,   NULL,             NO_INFIX },
	/* TK_NUM_T      */ { exprLiteral,   NULL,             NO_INFIX },
	/* TK_STRING_T   */ { exprLiteral,   NULL,             NO_INFIX },
	/* TK_ARRAY_T    */ { exprLiteral,   NULL,             NO_INFIX },
	/* TK_MAP_T      */ { exprLiteral,   NULL,             NO_INFIX },
	/* TK_RANGE_T    */ { exprLiteral,   NULL,             NO_INFIX },
	/* TK_FUNC_T     */ { exprLiteral,   NULL,             NO_INFIX },
	/* TK_OBJ_T      */ { exprLiteral,   NULL,             NO_INFIX },
	/* TK_DO         */   NO_RULE,
	/* TK_WHILE      */   NO_RULE,
	/* TK_FOR        */   NO_RULE,
	/* TK_IF         */   NO_RULE,
	/* TK_ELIF       */   NO_RULE,
	/* TK_ELSE       */   NO_RULE,
	/* TK_BREAK      */   NO_RULE,
	/* TK_CONTINUE   */   NO_RULE,
	/* TK_RETURN     */   NO_RULE,
	/* TK_NAME       */ { exprName,      NULL,             NO_INFIX },
	/* TK_NUMBER     */ { exprLiteral,   NULL,             NO_INFIX },
	/* TK_STRING     */ { exprLiteral,   NULL,             NO_INFIX },
};

static GrammarRule* getRule(TokenType type) {
	return &(rules[(int)type]);
}

static void exprAssignment(Compiler* compiler, bool can_assign) { ASSERT(false, "TODO:"); }

static void exprLiteral(Compiler* compiler, bool can_assign) {
	Token* value = &compiler->parser.previous;
	int index = compilerAddConstant(compiler, value->value);
	emitOpcode(compiler, OP_CONSTANT);
	emitShort(compiler, index);
}

static void exprName(Compiler* compiler, bool can_assign) { ASSERT(false, "TODO:"); }

static void exprBinaryOp(Compiler* compiler, bool can_assign) {
	TokenType op = compiler->parser.previous.type;
	skipNewLines(&compiler->parser);
	parsePrecedence(compiler, (Precedence)(getRule(op)->precedence + 1));

	switch (op) {
		case TK_DOTDOT:  emitOpcode(compiler, OP_RANGE);      break;
		case TK_PERCENT: emitOpcode(compiler, OP_MOD);        break;
		case TK_AMP:     emitOpcode(compiler, OP_BIT_AND);    break;
		case TK_PIPE:    emitOpcode(compiler, OP_BIT_OR);     break;
		case TK_CARET:   emitOpcode(compiler, OP_BIT_XOR);    break;
		case TK_PLUS:    emitOpcode(compiler, OP_ADD);        break;
		case TK_MINUS:   emitOpcode(compiler, OP_SUBTRACT);   break;
		case TK_STAR:    emitOpcode(compiler, OP_MULTIPLY);   break;
		case TK_FSLASH:  emitOpcode(compiler, OP_DIVIDE);     break;
		case TK_GT:      emitOpcode(compiler, OP_GT);         break;
		case TK_LT:      emitOpcode(compiler, OP_LT);         break;
		case TK_EQEQ:    emitOpcode(compiler, OP_EQEQ);       break;
		case TK_NOTEQ:   emitOpcode(compiler, OP_NOTEQ);      break;
		case TK_GTEQ:    emitOpcode(compiler, OP_GTEQ);       break;
		case TK_LTEQ:    emitOpcode(compiler, OP_LTEQ);       break;
		case TK_SRIGHT:  emitOpcode(compiler, OP_BIT_RSHIFT); break;
		case TK_SLEFT:   emitOpcode(compiler, OP_BIT_LSHIFT); break;
		case TK_IS:      emitOpcode(compiler, OP_IS);         break;
		case TK_IN:      emitOpcode(compiler, OP_IN);         break;
		case TK_AND:     emitOpcode(compiler, OP_AND);        break;
		case TK_OR:      emitOpcode(compiler, OP_OR);         break;
		default:
			UNREACHABLE();
	}
}

static void exprUnaryOp(Compiler* compiler, bool can_assign) {
	TokenType op = compiler->parser.previous.type;
	skipNewLines(&compiler->parser);
	parsePrecedence(compiler, (Precedence)(PREC_UNARY + 1));

	switch (op) {
		case TK_TILD:  emitOpcode(compiler, OP_BIT_NOT); break;
		case TK_MINUS: emitOpcode(compiler, OP_NEGATIVE); break;
		case TK_NOT:   emitOpcode(compiler, OP_NOT); break;
		default:
			UNREACHABLE();
	}
}

static void exprGrouping(Compiler* compiler, bool can_assign) {
	compileExpression(compiler);
	consume(&compiler->parser, TK_RPARAN, "Expected ')' after expression ");
}

static void exprArray(Compiler* compiler, bool can_assign) { ASSERT(false, "TODO:"); }
static void exprMap(Compiler* compiler, bool can_assign) { ASSERT(false, "TODO:"); }

static void exprCall(Compiler* compiler, bool can_assign) { ASSERT(false, "TODO:"); }
static void exprAttrib(Compiler* compiler, bool can_assign) { ASSERT(false, "TODO:"); }
static void exprSubscript(Compiler* compiler, bool can_assign) { ASSERT(false, "TODO:"); }

static void parsePrecedence(Compiler* compiler, Precedence precedence) {
	lexToken(&compiler->parser);
	GrammarFn prefix = getRule(compiler->parser.previous.type)->prefix;

	if (prefix == NULL) {
		parseError(&compiler->parser, "Expected an expression.");
		return;
	}

	bool can_assign = precedence <= PREC_ASSIGNMENT;
	prefix(compiler, can_assign);

	while (getRule(compiler->parser.current.type)->precedence >= precedence) {
		lexToken(&compiler->parser);
		GrammarFn infix = getRule(compiler->parser.previous.type)->infix;
		infix(compiler, can_assign);
	}
}

/*****************************************************************************
 * COMPILING                                                                 *
 *****************************************************************************/

// Used in searching for local variables.
typedef enum {
	SCOPE_ANY = -3,
	SCOPE_CURRENT,
} ScopeType;

// Result type for an identifier definition.
typedef enum {
	NAME_NOT_DEFINED,
	NAME_LOCAL_VAR, //< Including parameter.
	NAME_GLOBAL_VAR,
	NAME_FUNCTION,
} NameDefnType;

// Identifier search result.
typedef struct {

	NameDefnType type;

	// Could be found in one of the imported script or in it's imported script
	// recursively. If true [_extern] will be the script ID.
	bool is_extern;

	// Extern script's ID.
	ID _extern;

	union {
		int local;
		int global;
		int func;
	} index;

	// The line it declared.
	int line;

} NameSearchResult;

static void compilerInit(Compiler* compiler, MSVM* vm, const char* source,
                         const char* path) {
	parserInit(&compiler->parser, vm, source, path);
	compiler->vm = vm;
	vm->compiler = compiler;
	compiler->scope_depth = -1;
	compiler->var_count = 0;
	compiler->stack_size = 0;
	Loop* loop = NULL;
	Function* fn = NULL;
}

// Search for the name through compiler's variables. Returns -1 if not found.
static int compilerSearchVariables(Compiler* compiler, const char* name,
                                   int length, ScopeType scope) {

	for (int i = 0; i < compiler->var_count; i++) {
		Variable* variable = &compiler->variables[i];
		if (scope == SCOPE_CURRENT &&
			compiler->scope_depth != variable->depth) {
			continue;
		}
		if (variable->length == length &&
			strncmp(variable->name, name, length) == 0) {
			return i;
		}
	}

	// TODO: Search in imported scripts globals too. and return NameSearchResult.

	return -1;
}

// Will check if the name already defined.
static NameSearchResult compilerSearchName(Compiler* compiler,
                                           const char* name, int length) {
	// TODO:
	NameSearchResult result;
	result.type = NAME_NOT_DEFINED;
	return result;
}

// Add a variable and return it's index to the context. Assumes that the
// variable name is unique and not defined before in the current scope.
static int compilerAddVariable(Compiler* compiler, const char* name,
                                int length, int line) {
	Variable* variable = &compiler->variables[compiler->var_count];
	variable->name = name;
	variable->length = length;
	variable->depth = compiler->scope_depth;
	variable->line = line;
	return compiler->var_count++;
}

// Add a literal constant to scripts literals and return it's index.
static int compilerAddConstant(Compiler* compiler, Var value) {
	VarBuffer* literals = &compiler->script->literals;

	for (int i = 0; i < literals->count; i++) {
		if (isVauesSame(literals->data[i], value)) {
			return i;
		}
	}

	// Add new constant to script.
	if (literals->count < MAX_CONSTANTS) {
		varBufferWrite(literals, compiler->vm, value);
	} else {
		parseError(&compiler->parser, "A script should contain at most %d "
			"unique constants.", MAX_CONSTANTS);
	}
	return (int)literals->count - 1;
}

// Enters inside a block.
static void compilerEnterBlock(Compiler* compiler) {
	compiler->scope_depth++;
}

// Exits a block.
static void compilerExitBlock(Compiler* compiler) {
	ASSERT(compiler->scope_depth > -1, "Cannot exit toplevel.");

	while (compiler->variables[compiler->var_count - 1].depth >=
		compiler->scope_depth) {
		compiler->var_count--;
		compiler->stack_size--;
	}
	compiler->scope_depth--;
}

/*****************************************************************************
 * COMPILING (EMIT BYTECODE)                                                 *
 *****************************************************************************/

// Emit a single byte and return it's index.
static int emitByte(Compiler* compiler, int byte) {

	byteBufferWrite(&compiler->function->fn->opcodes, compiler->vm,
                    (uint8_t)byte);
	intBufferWrite(&compiler->function->fn->oplines, compiler->vm,
                   compiler->parser.previous.line);
	return (int)compiler->function->fn->opcodes.count - 1;
}

// Emit 2 bytes argument as big indian. return it's starting index.
static int emitShort(Compiler* compiler, int arg) {
	emitByte(compiler, (arg >> 8) & 0xff);
	return emitByte(compiler, arg & 0xff) - 1;
}

// Emits an instruction and update stack size (variable stack size opcodes
// should be handled).
static void emitOpcode(Compiler* compiler, Opcode opcode) {
	emitByte(compiler, (int)opcode);

	compiler->stack_size += opcode_info[opcode].stack;
	if (compiler->stack_size > compiler->function->fn->stack_size) {
		compiler->function->fn->stack_size = compiler->stack_size;
	}
}

// Emits a constant value if it doesn't exists on the current script it'll make
// one.
static void emitConstant(Compiler* compiler, Var value) {
	int index = compilerAddConstant(compiler, value);
	emitOpcode(compiler, OP_CONSTANT);
	emitShort(compiler, index);
}

static void patchJump(Compiler* compiler, int addr_index) {
	int jump_to = (int)compiler->function->fn->opcodes.count;
	ASSERT(jump_to < MAX_JUMP, "Too large address to jump.");

	compiler->function->fn->opcodes.data[addr_index] = (jump_to >> 8) & 0xff;
	compiler->function->fn->opcodes.data[addr_index + 1] = jump_to & 0xff;
}

 /*****************************************************************************
  * COMPILING (PARSE TOPLEVEL)                                                *
  *****************************************************************************/

static void compileStatement(Compiler* compiler);
static void compileBlockBody(Compiler* compiler, bool if_body);

static void compileFunction(Compiler* compiler, bool is_native) {

	Parser* parser = &compiler->parser;

	consume(&compiler->parser, TK_NAME, "Expected a function name.");

	const char* name_start = parser->previous.start;
	int name_length = parser->previous.length;
	NameSearchResult result = compilerSearchName(compiler, name_start,
		name_length);

	if (result.type != NAME_NOT_DEFINED) {
		// TODO: multiple definition error(); or allow name overriden.
	}

	int index = nameTableAdd(&compiler->script->function_names, compiler->vm,
		name_start, name_length);

	Function* func = newFunction(compiler->vm, nameTableGet(
		&compiler->script->function_names, index), compiler->script, is_native);

	vmPushTempRef(compiler->vm, &func->_super);
	functionBufferWrite(&compiler->script->functions, compiler->vm, func);
	vmPopTempRef(compiler->vm);

	compiler->function = func;

	consume(parser, TK_LPARAN, "Expected '(' after function name.");

	compiler->scope_depth++; // Parameter scope.

	// Compile parameter list.
	while (match(parser, TK_NAME)) {
		int predef = compilerSearchVariables(compiler, parser->previous.start,
			parser->previous.length, SCOPE_CURRENT);
		if (predef != -1) {
			parseError(parser, "Multiple definition of a parameter");
		}
		match(parser, TK_COMMA);
	}

	consume(parser, TK_RPARAN, "Expected ')' after parameters end.");
	consumeEndStatement(parser);

	if (is_native) { // Done here.
		compiler->scope_depth--; // Parameter scope.
		compiler->function = NULL;
		return;
	}
	
	compileBlockBody(compiler, false);

	compiler->scope_depth--; // Parameter scope.
	compiler->function = compiler->script->body;
}

// Finish a block body.
static void compileBlockBody(Compiler* compiler, bool if_body) {
	compilerEnterBlock(compiler);

	TokenType next = peek(&compiler->parser);
	while (!(next == TK_END || next == TK_EOF || (
		if_body && (next == TK_ELSE || next == TK_ELIF)))) {

		compileStatement(compiler);
		next = peek(&compiler->parser);
	}

	compilerExitBlock(compiler);
}

// Compiles an expression. An expression will result a value on top of the
// stack.
static void compileExpression(Compiler* compiler) {
	parsePrecedence(compiler, PREC_LOWEST);
}

static void compileIfStatement(Compiler* compiler) {

	compileExpression(compiler); //< Condition.
	emitOpcode(compiler, OP_JUMP_IF_NOT);
	int ifpatch = emitByte(compiler, 0xffff); //< Will be patched.

	consumeStartBlock(&compiler->parser);

	compileBlockBody(compiler, true);

	if (match(&compiler->parser, TK_ELIF)) {
		patchJump(compiler, ifpatch);
		compileBlockBody(compiler, true);

	} else if (match(&compiler->parser, TK_ELSE)) {
		patchJump(compiler, ifpatch);
		compileBlockBody(compiler, false);

	} else {
		patchJump(compiler, ifpatch);
	}
}

static void compileWhileStatement(Compiler* compiler) {
	Loop loop;
	loop.start = (int)compiler->function->fn->opcodes.count;
	loop.patch_count = 0;
	loop.outer_loop = compiler->loop;
	compiler->loop = &loop;

	compileExpression(compiler); //< Condition.
	emitOpcode(compiler, OP_JUMP_IF_NOT);
	int whilepatch = emitByte(compiler, 0xffff); //< Will be patched.

	compileBlockBody(compiler, false);

	emitOpcode(compiler, OP_JUMP);
	emitShort(compiler, loop.start);

	patchJump(compiler, whilepatch);

	// Patch break statement.
	for (int i = 0; i < compiler->loop->patch_count; i++) {
		patchJump(compiler, compiler->loop->patches[i]);
	}

	compiler->loop = loop.outer_loop;
}

static void compileForStatement(Compiler* compiler) {
	ASSERT(false, "TODO:");
}

// Compiles a statement. Assignment could be an assignment statement or a new
// variable declaration, which will be handled.
static void compileStatement(Compiler* compiler) {
	Parser* parser = &compiler->parser;
	if (match(parser, TK_BREAK)) {
		if (compiler->loop == NULL) {
			parseError(parser, "Cannot use 'break' outside a loop.");
			return;
		}

		ASSERT(compiler->loop->patch_count < MAX_BREAK_PATCH,
			"Too many break statements (" STRINGIFY(MAX_BREAK_PATCH) ")." );

		emitOpcode(compiler, OP_JUMP);
		int patch = emitByte(compiler, 0xffff); //< Will be patched.
		compiler->loop->patches[compiler->loop->patch_count++] = patch;

	} else if (match(parser, TK_CONTINUE)) {
		if (compiler->loop == NULL) {
			parseError(parser, "Cannot use 'continue' outside a loop.");
			return;
		}

		emitOpcode(compiler, OP_JUMP);
		emitShort(compiler, compiler->loop->start);

	} else if (match(parser, TK_RETURN)) {

		if (compiler->scope_depth == -1) {
			parseError(parser, "Invalid 'return' outside a function.");
			return;
		}

		if (peek(parser) == TK_SEMICOLLON || peek(parser) == TK_LINE) {
			emitOpcode(compiler, OP_PUSH_NULL);
			emitOpcode(compiler, OP_RETURN);
		} else {
			compileExpression(compiler); //< Return value is at stack top.
			emitOpcode(compiler, OP_RETURN);
		}
	} else if (match(parser, TK_IF)) {
		compileIfStatement(compiler);

	} else if (match(parser, TK_WHILE)) {
		compileWhileStatement(compiler);

	} else if (match(parser, TK_FOR)) {
		compileForStatement(compiler);

	} else {
		compileExpression(compiler);
		emitOpcode(compiler, OP_POP);
	}
}

Script* compileSource(MSVM* vm, const char* path) {

	MSLoadScriptResult res = vm->config.load_script_fn(vm, path);
	if (res.is_failed) // FIXME:
		vm->config.error_fn(vm, MS_ERROR_COMPILE, NULL, -1,
			"file load source failed.");
	const char* source = res.source;

	// Skip utf8 BOM if there is any.
	if (strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3;

	Compiler compiler;
	compilerInit(&compiler, vm, source, path);

	Script* script = newScript(vm);
	compiler.script = script;
	compiler.function = script->body;

	// Parser pointer for quick access.
	Parser* parser = &compiler.parser;

	// Lex initial tokens. current <-- next.
	lexToken(parser);
	lexToken(parser);
	skipNewLines(parser);

	while (!match(parser, TK_EOF)) {
		
		if (match(parser, TK_NATIVE)) {
			compileFunction(&compiler, true);

		} else if (match(parser, TK_DEF)) {
			compileFunction(&compiler, false);

		} else if (match(parser, TK_IMPORT)) {
			// TODO: import statement must be first of all other.
			ASSERT(false, "TODO:");
		} else {
			compileStatement(&compiler);
		}
	}

	// Source done callback.
	if (vm->config.load_script_done_fn != NULL)
		vm->config.load_script_done_fn(vm, path, res.user_data);

	vm->compiler = NULL;

	return script;
}