From e382a0430cbe3e22e06f8e5b2c14614434c07648 Mon Sep 17 00:00:00 2001 From: Thakee Nathees Date: Tue, 4 May 2021 15:54:26 +0530 Subject: [PATCH] map methods implemented --- src/compiler.c | 4 +- src/core.c | 8 +- src/debug.c | 6 +- src/types/buffer.template.h | 4 +- src/types/name_table.c | 2 +- src/types/name_table.h | 2 +- src/var.c | 195 +++++++++++++++++++++++++++++++++--- src/var.h | 14 ++- src/vm.c | 2 +- 9 files changed, 205 insertions(+), 32 deletions(-) diff --git a/src/compiler.c b/src/compiler.c index 25b3c81..40a8271 100644 --- a/src/compiler.c +++ b/src/compiler.c @@ -1310,8 +1310,8 @@ static int compilerAddVariable(Compiler* compiler, const char* name, static int compilerAddConstant(Compiler* compiler, Var value) { VarBuffer* literals = &compiler->script->literals; - for (int i = 0; i < literals->count; i++) { - if (isVauesSame(literals->data[i], value)) { + for (uint32_t i = 0; i < literals->count; i++) { + if (isValuesSame(literals->data[i], value)) { return i; } } diff --git a/src/core.c b/src/core.c index 8806d32..4b4e081 100644 --- a/src/core.c +++ b/src/core.c @@ -435,7 +435,7 @@ Var varGetAttrib(MSVM* vm, Var on, String* attrib) { Script* scr = (Script*)obj; // Search in functions. - int index = nameTableFind(&scr->function_names, attrib->data, + uint32_t index = nameTableFind(&scr->function_names, attrib->data, attrib->length); if (index != -1) { ASSERT_INDEX(index, scr->functions.count); @@ -501,7 +501,7 @@ do { \ // TODO: check globals HERE. // Check function. - int index = nameTableFind(&scr->function_names, attrib->data, + uint32_t index = nameTableFind(&scr->function_names, attrib->data, attrib->length); if (index != -1) { ASSERT_INDEX(index, scr->functions.count); @@ -644,7 +644,7 @@ bool varIterate(MSVM* vm, Var seq, Var* iterator, Var* value) { Object* obj = AS_OBJ(seq); - int32_t iter = 0; //< Nth iteration. + uint32_t iter = 0; //< Nth iteration. if (IS_NUM(*iterator)) { iter = _AS_INTEGER(*iterator); } @@ -653,7 +653,7 @@ bool varIterate(MSVM* vm, Var seq, Var* iterator, Var* value) { case OBJ_STRING: { // TODO: // Need to consider utf8. String* str = ((String*)obj); - if (iter < 0 || iter >= (int)str->length) { + if (iter < 0 || iter >= str->length) { return false; //< Stop iteration. } // TODO: Or I could add char as a type for efficiency. diff --git a/src/debug.c b/src/debug.c index 8cfa06e..45a96c7 100644 --- a/src/debug.c +++ b/src/debug.c @@ -43,7 +43,7 @@ static void _dumpValue(MSVM* vm, Var value, bool recursive) { printf("[...]"); } else { printf("["); - for (int i = 0; i < list->elements.count; i++) { + for (uint32_t i = 0; i < list->elements.count; i++) { if (i != 0) printf(", "); _dumpValue(vm, list->elements.data[i], true); } @@ -82,7 +82,7 @@ void dumpValue(MSVM* vm, Var value) { void dumpInstructions(MSVM* vm, Function* func) { - int i = 0; + uint32_t i = 0; uint8_t* opcodes = func->fn->opcodes.data; int* lines = func->fn->oplines.data; int line = 1, last_line = 0; @@ -115,7 +115,7 @@ void dumpInstructions(MSVM* vm, Function* func) { { int index = READ_SHORT(); printf("%5d ", index); - ASSERT_INDEX(index, func->owner->literals.count); + ASSERT_INDEX((uint32_t)index, func->owner->literals.count); Var value = func->owner->literals.data[index]; dumpValue(vm, value); printf("\n"); diff --git a/src/types/buffer.template.h b/src/types/buffer.template.h index a30327a..0c4afe1 100644 --- a/src/types/buffer.template.h +++ b/src/types/buffer.template.h @@ -19,8 +19,8 @@ typedef uint8_t $type$; typedef struct { $type$* data; - size_t count; - size_t capacity; + uint32_t count; + uint32_t capacity; } $name$Buffer; // Initialize a new buffer int instance. diff --git a/src/types/name_table.c b/src/types/name_table.c index 4b7b823..6ab7016 100644 --- a/src/types/name_table.c +++ b/src/types/name_table.c @@ -35,7 +35,7 @@ const char* nameTableGet(NameTable* self, int index) { return self->data[index]->data; } -int nameTableFind(NameTable* self, const char* name, size_t length) { +uint32_t nameTableFind(NameTable* self, const char* name, size_t length) { for (int i = 0; i < self->count; i++) { if (self->data[i]->length == length && diff --git a/src/types/name_table.h b/src/types/name_table.h index ee0e10f..cabb511 100644 --- a/src/types/name_table.h +++ b/src/types/name_table.h @@ -27,6 +27,6 @@ int nameTableAdd(NameTable* self, MSVM* vm, const char* name, size_t length, const char* nameTableGet(NameTable* self, int index); // Find and return the index of the name. If not found returns -1. -int nameTableFind(NameTable* self, const char* name, size_t length); +uint32_t nameTableFind(NameTable* self, const char* name, size_t length); #endif // SYMBOL_TABLE_H diff --git a/src/var.c b/src/var.c index 7353905..1839a71 100644 --- a/src/var.c +++ b/src/var.c @@ -42,7 +42,7 @@ const char* msAsString(MSVM* vm, Var value) { // The maximum percentage of the map entries that can be filled before the map // is grown. A lower percentage reduce collision which makes looks up faster // but take more memory. -#define MAP_FILL_PERCENT (75 / 100) +#define MAP_LOAD_PERCENT (75 / 100) // The factor a collection would grow by when it's exceeds the current capacity. // The new capacity will be calculated by multiplying it's old capacity by the @@ -94,7 +94,7 @@ double varToDouble(Var value) { #endif // VAR_NAN_TAGGING } -static String* allocateString(MSVM* vm, size_t length) { +static String* _allocateString(MSVM* vm, size_t length) { String* string = ALLOCATE_DYNAMIC(vm, String, length + 1, char); varInitObject(&string->_super, vm, OBJ_STRING); string->length = (uint32_t)length; @@ -106,9 +106,11 @@ String* newString(MSVM* vm, const char* text, uint32_t length) { ASSERT(length == 0 || text != NULL, "Unexpected NULL string."); - String* string = allocateString(vm, length); + String* string = _allocateString(vm, length); if (length != 0 && text != NULL) memcpy(string->data, text, length); + string->hash = utilHashString(string->data); + return string; } @@ -217,7 +219,7 @@ void listInsert(List* self, MSVM* vm, uint32_t index, Var value) { if (IS_OBJ(value)) vmPopTempRef(vm); // Shift the existing elements down. - for (int i = self->elements.count - 1; i > index; i--) { + for (uint32_t i = self->elements.count - 1; i > index; i--) { self->elements.data[i] = self->elements.data[i - 1]; } @@ -230,7 +232,7 @@ Var listRemoveAt(List* self, MSVM* vm, uint32_t index) { if (IS_OBJ(removed)) vmPushTempRef(vm, AS_OBJ(removed)); // Shift the rest of the elements up. - for (int i = index; i < self->elements.count - 1; i++) { + for (uint32_t i = index; i < self->elements.count - 1; i++) { self->elements.data[i] = self->elements.data[i + 1]; } @@ -289,24 +291,160 @@ static uint32_t _hashVar(Var value) { #endif } +// Find the entry with the [key]. Returns true if found and set [result] to +// point to the entry, return false otherwise and points [result] to where +// the entry should be inserted. +static bool _mapFindEntry(Map* self, Var key, MapEntry** result) { + + // An empty map won't contain the key. + if (self->capacity == 0) return false; + + // The [start_index] is where the entry supposed to be if there wasn't any + // collision occured. It'll be the start index for the linear probing. + uint32_t start_index = _hashVar(key) % self->capacity; + uint32_t index = start_index; + + // Keep track of the first tombstone after the [start_index] if we don't find + // the key anywhere. The tombstone would be the entry at where we will have + // to insert the key/value pair. + MapEntry* tombstone = NULL; + + do { + MapEntry* entry = &self->entries[index]; + + if (IS_UNDEF(entry->key)) { + ASSERT(IS_BOOL(entry->value), OOPS); + + if (IS_TRUE(entry->value)) { + + // We've found a tombstone, if we haven't found one [tombstone] should + // be updated. We still need to keep search for if the key exists. + if (tombstone == NULL) tombstone = entry; + + } else { + // We've found a new empty slot and the key isn't found. If we've + // found a tombstone along the sequence we could use that entry + // otherwise the entry at the current index. + + *result = (tombstone != NULL) ? tombstone : entry; + return false; + } + + } else if (isValuesEqual(entry->key, key)) { + // We've found the key. + *result = entry; + return true; + } + + index = (index + 1) % self->capacity; + + } while (index != start_index); + + // If we reach here means the map is filled with tombstone. Set the first + // tombstone as result for the next insertion and return false. + ASSERT(tombstone != NULL, OOPS); + *result = tombstone; + return false; +} + +// Add the key, value pair to the entries array of the map. Returns true if +// the entry added for the first time and false for replaced vlaue. +static bool _mapInsertEntry(Map* self, Var key, Var value) { + + ASSERT(self->capacity != 0, "Should ensure the capacity before inserting."); + + MapEntry* result; + if (_mapFindEntry(self, key, &result)) { + // Key already found, just replace the value. + result->value = value; + return false; + } else { + result->key = key; + result->value = value; + return true; + } +} + +// Resize the map's size to the given [capacity]. +static void _mapResize(Map* self, MSVM* vm, uint32_t capacity) { + + MapEntry* old_entries = self->entries; + uint32_t old_capacity = self->capacity; + + self->entries = ALLOCATE_ARRAY(vm, MapEntry, capacity); + self->capacity = capacity; + for (uint32_t i = 0; i < capacity; i++) { + self->entries->key = VAR_UNDEFINED; + self->entries->value = VAR_FALSE; + } + + // Insert the old entries to the new entries. + for (uint32_t i = 0; i < old_capacity; i++) { + // Skip the empty entries or tombstones. + if (IS_UNDEF(old_entries[i].key)) continue; + + _mapInsertEntry(self, old_entries[i].key, old_entries[i].value); + } + + DEALLOCATE(vm, old_entries); +} + Var mapGet(Map* self, Var key) { - TODO; + MapEntry* entry; + if (_mapFindEntry(self, key, &entry)) return entry->value; + return VAR_UNDEFINED; } void mapSet(Map* self, MSVM* vm, Var key, Var value) { - if (self->count + 1 > self->capacity * MAP_FILL_PERCENT) { - TODO; + // If map is about to fill, resize it first. + if (self->count + 1 > self->capacity * MAP_LOAD_PERCENT) { + uint32_t capacity = self->capacity * GROW_FACTOR; + if (capacity < MIN_CAPACITY) capacity = MIN_CAPACITY; + _mapResize(self, vm, capacity); + } + + if (_mapInsertEntry(self, key, value)) { + self->count++; //< A new key added. } - TODO; } void mapClear(Map* self, MSVM* vm) { - TODO; + DEALLOCATE(vm, self->entries); + self->entries = NULL; + self->capacity = 0; + self->count = 0; } Var mapRemoveKey(Map* self, MSVM* vm, Var key) { - TODO; + MapEntry* entry; + if (!_mapFindEntry(self, key, &entry)) return VAR_NULL; + + // Set the key as VAR_UNDEFINED to mark is as an available slow and set it's + // value to VAR_TRUE for tombstone. + Var value = entry->value; + entry->key = VAR_UNDEFINED; + entry->value = VAR_TRUE; + + self->count--; + + if (IS_OBJ(value)) vmPushTempRef(vm, AS_OBJ(value)); + + if (self->count == 0) { + // Clear the map if it's empty. + mapClear(self, vm); + + } else if (self->capacity > MIN_CAPACITY && + self->capacity / GROW_FACTOR > self->count / MAP_LOAD_PERCENT) { + uint32_t capacity = self->capacity / GROW_FACTOR; + if (capacity < MIN_CAPACITY) capacity = MIN_CAPACITY; + + _mapResize(self, vm, capacity); + } + + if (IS_OBJ(value)) vmPopTempRef(vm); + + return value; } void freeObject(MSVM* vm, Object* obj) { @@ -372,7 +510,7 @@ void freeObject(MSVM* vm, Object* obj) { const char* varTypeName(Var v) { if (IS_NULL(v)) return "null"; if (IS_BOOL(v)) return "bool"; - if (IS_NUM(v)) return "number"; + if (IS_NUM(v)) return "number"; ASSERT(IS_OBJ(v), OOPS); Object* obj = AS_OBJ(v); @@ -389,7 +527,7 @@ const char* varTypeName(Var v) { } } -bool isVauesSame(Var v1, Var v2) { +bool isValuesSame(Var v1, Var v2) { #if VAR_NAN_TAGGING // Bit representation of each values are unique so just compare the bits. return v1 == v2; @@ -398,6 +536,32 @@ bool isVauesSame(Var v1, Var v2) { #endif } +bool isValuesEqual(Var v1, Var v2) { + if (isValuesSame(v1, v2)) return true; + + // If we reach here only heap allocated objects could be compared. + if (!IS_OBJ(v1) || !IS_OBJ(v2)) return false; + + Object* o1 = AS_OBJ(v1), *o2 = AS_OBJ(v2); + if (o1->type != o2->type) return false; + + switch (o1->type) { + case OBJ_RANGE: + return ((Range*)o1)->from == ((Range*)o2)->from && + ((Range*)o1)->to == ((Range*)o2)->to; + + case OBJ_STRING: { + String* s1 = (String*)o1, *s2 = (String*)o2; + return s1->hash == s2->hash && + s1->length == s2->length && + memcmp(s1->data, s2->data, s1->length) == 0; + } + + default: + return false; + } +} + String* toString(MSVM* vm, Var v, bool recursive) { if (IS_NULL(v)) { @@ -421,7 +585,7 @@ String* toString(MSVM* vm, Var v, bool recursive) { switch (obj->type) { case OBJ_STRING: { - // If recursive return with quotes (ex: [42, "hello", 0..10]) + // If recursive return with quotes (ex: [42, "hello", 0..10]). if (!recursive) return newString(vm, ((String*)obj)->data, ((String*)obj)->length); TODO; //< Add quotes around the string. @@ -500,7 +664,7 @@ Var stringFormat(MSVM* vm, const char* fmt, ...) { va_end(arg_list); // Now build the new string. - String* result = allocateString(vm, total_length); + String* result = _allocateString(vm, total_length); va_start(arg_list, fmt); char* buff = result->data; for (const char* c = fmt; *c != '\0'; c++) { @@ -528,5 +692,6 @@ Var stringFormat(MSVM* vm, const char* fmt, ...) { } va_end(arg_list); + result->hash = utilHashString(result->data); return VAR_OBJ(result); } \ No newline at end of file diff --git a/src/var.h b/src/var.h index e484f6b..10f824e 100644 --- a/src/var.h +++ b/src/var.h @@ -219,6 +219,7 @@ struct Object { struct String { Object _super; + uint32_t hash; //< Hash value of the string. uint32_t length; //< Length of the string in \ref data. uint32_t capacity; //< Size of allocated \ref data. char data[DYNAMIC_TAIL_ARRAY]; @@ -231,8 +232,12 @@ struct List { }; typedef struct { + // If the key is VAR_UNDEFINED it's an empty slot and if the value is false + // the entry is new and available, if true it's a tumbstone - the entry + // previously used but then deleted. + Var key; //< The entry's key or VAR_UNDEFINED of the entry is not in use. - Var value; //< The entry's value (TODO: see wren for tombstone). + Var value; //< The entry's value. } MapEntry; struct Map { @@ -403,8 +408,11 @@ void freeObject(MSVM* vm, Object* obj); // Returns the type name of the var [v]. const char* varTypeName(Var v); -// Returns true if both variables are the same. -bool isVauesSame(Var v1, Var v2); +// Returns true if both variables are the same (ie v1 is v2). +bool isValuesSame(Var v1, Var v2); + +// Returns true if both variables are equal (ie v1 == v2). +bool isValuesEqual(Var v1, Var v2); // Returns the string version of the value. Note: pass false as [_recursive] // It's for internal use (or may be I could make a wrapper around). diff --git a/src/vm.c b/src/vm.c index 2ed44b5..5bfa89e 100644 --- a/src/vm.c +++ b/src/vm.c @@ -30,7 +30,7 @@ void* vmRealloc(MSVM* self, void* memory, size_t old_size, size_t new_size) { // TODO: Debug trace allocations here. // Track the total allocated memory of the VM to trigger the GC. - // if vmRealloc is called for freeing the old_size would be 0 since + // if vmRealloc is called for freeing, the old_size would be 0 since // deallocated bytes are traced by garbage collector. self->bytes_allocated += new_size - old_size;