luau/Common/include/Luau/DenseHash.h

667 lines
15 KiB
C
Raw Normal View History

// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#pragma once
#include "Luau/Common.h"
#include <stddef.h>
#include <functional>
#include <utility>
#include <type_traits>
#include <stdint.h>
namespace Luau
{
struct DenseHashPointer
{
size_t operator()(const void* key) const
{
return (uintptr_t(key) >> 4) ^ (uintptr_t(key) >> 9);
}
};
// Internal implementation of DenseHashSet and DenseHashMap
namespace detail
{
template<typename T>
using DenseHashDefault = std::conditional_t<std::is_pointer_v<T>, DenseHashPointer, std::hash<T>>;
template<typename Key, typename Item, typename MutableItem, typename ItemInterface, typename Hash, typename Eq>
class DenseHashTable
{
public:
class const_iterator;
2022-04-15 07:57:43 +08:00
class iterator;
explicit DenseHashTable(const Key& empty_key, size_t buckets = 0)
2022-09-24 03:17:25 +08:00
: data(nullptr)
, capacity(0)
, count(0)
, empty_key(empty_key)
{
2022-09-24 03:17:25 +08:00
// validate that equality operator is at least somewhat functional
LUAU_ASSERT(eq(empty_key, empty_key));
// buckets has to be power-of-two or zero
LUAU_ASSERT((buckets & (buckets - 1)) == 0);
if (buckets)
2022-09-24 03:17:25 +08:00
{
data = static_cast<Item*>(::operator new(sizeof(Item) * buckets));
capacity = buckets;
ItemInterface::fill(data, buckets, empty_key);
}
}
~DenseHashTable()
{
if (data)
destroy();
}
DenseHashTable(const DenseHashTable& other)
: data(nullptr)
, capacity(0)
, count(other.count)
, empty_key(other.empty_key)
{
if (other.capacity)
{
data = static_cast<Item*>(::operator new(sizeof(Item) * other.capacity));
for (size_t i = 0; i < other.capacity; ++i)
{
new (&data[i]) Item(other.data[i]);
capacity = i + 1; // if Item copy throws, capacity will note the number of initialized objects for destroy() to clean up
}
}
}
DenseHashTable(DenseHashTable&& other)
: data(other.data)
, capacity(other.capacity)
, count(other.count)
, empty_key(other.empty_key)
{
other.data = nullptr;
other.capacity = 0;
other.count = 0;
}
DenseHashTable& operator=(DenseHashTable&& other)
{
if (this != &other)
{
if (data)
destroy();
data = other.data;
capacity = other.capacity;
count = other.count;
empty_key = other.empty_key;
other.data = nullptr;
other.capacity = 0;
other.count = 0;
}
return *this;
}
DenseHashTable& operator=(const DenseHashTable& other)
{
if (this != &other)
{
DenseHashTable copy(other);
*this = std::move(copy);
}
return *this;
}
void clear()
{
2022-09-24 03:17:25 +08:00
if (count == 0)
return;
if (capacity > 32)
{
destroy();
}
else
{
ItemInterface::destroy(data, capacity);
ItemInterface::fill(data, capacity, empty_key);
}
count = 0;
}
2022-09-24 03:17:25 +08:00
void destroy()
{
ItemInterface::destroy(data, capacity);
::operator delete(data);
data = nullptr;
capacity = 0;
}
Item* insert_unsafe(const Key& key)
{
// It is invalid to insert empty_key into the table since it acts as a "entry does not exist" marker
LUAU_ASSERT(!eq(key, empty_key));
2022-09-24 03:17:25 +08:00
size_t hashmod = capacity - 1;
size_t bucket = hasher(key) & hashmod;
for (size_t probe = 0; probe <= hashmod; ++probe)
{
Item& probe_item = data[bucket];
// Element does not exist, insert here
if (eq(ItemInterface::getKey(probe_item), empty_key))
{
ItemInterface::setKey(probe_item, key);
count++;
return &probe_item;
}
// Element already exists
if (eq(ItemInterface::getKey(probe_item), key))
{
return &probe_item;
}
// Hash collision, quadratic probing
bucket = (bucket + probe + 1) & hashmod;
}
// Hash table is full - this should not happen
LUAU_ASSERT(false);
return NULL;
}
const Item* find(const Key& key) const
{
2022-09-24 03:17:25 +08:00
if (count == 0)
return 0;
if (eq(key, empty_key))
return 0;
2022-09-24 03:17:25 +08:00
size_t hashmod = capacity - 1;
size_t bucket = hasher(key) & hashmod;
for (size_t probe = 0; probe <= hashmod; ++probe)
{
const Item& probe_item = data[bucket];
// Element exists
if (eq(ItemInterface::getKey(probe_item), key))
return &probe_item;
// Element does not exist
if (eq(ItemInterface::getKey(probe_item), empty_key))
return NULL;
// Hash collision, quadratic probing
bucket = (bucket + probe + 1) & hashmod;
}
// Hash table is full - this should not happen
LUAU_ASSERT(false);
return NULL;
}
void rehash()
{
2022-09-24 03:17:25 +08:00
size_t newsize = capacity == 0 ? 16 : capacity * 2;
DenseHashTable newtable(empty_key, newsize);
2022-09-24 03:17:25 +08:00
for (size_t i = 0; i < capacity; ++i)
{
const Key& key = ItemInterface::getKey(data[i]);
if (!eq(key, empty_key))
{
Item* item = newtable.insert_unsafe(key);
*item = std::move(data[i]);
}
}
LUAU_ASSERT(count == newtable.count);
2022-09-24 03:17:25 +08:00
std::swap(data, newtable.data);
std::swap(capacity, newtable.capacity);
}
void rehash_if_full(const Key& key)
{
if (count >= capacity * 3 / 4 && !find(key))
{
rehash();
}
}
const_iterator begin() const
{
size_t start = 0;
2022-09-24 03:17:25 +08:00
while (start < capacity && eq(ItemInterface::getKey(data[start]), empty_key))
start++;
return const_iterator(this, start);
}
const_iterator end() const
{
2022-09-24 03:17:25 +08:00
return const_iterator(this, capacity);
}
2022-04-15 07:57:43 +08:00
iterator begin()
{
size_t start = 0;
2022-09-24 03:17:25 +08:00
while (start < capacity && eq(ItemInterface::getKey(data[start]), empty_key))
2022-04-15 07:57:43 +08:00
start++;
return iterator(this, start);
}
iterator end()
{
2022-09-24 03:17:25 +08:00
return iterator(this, capacity);
2022-04-15 07:57:43 +08:00
}
size_t size() const
{
return count;
}
class const_iterator
{
public:
using value_type = Item;
using reference = Item&;
using pointer = Item*;
Sync to upstream/release/607 (#1131) # What's changed? * Fix up the `std::iterator_traits` definitions for some Luau data structures. * Replace some of the usages of `std::unordered_set` and `std::unordered_map` with Luau-provided data structures to increase performance and reduce overall number of heap allocations. * Update some of the documentation links in comments throughout the codebase to correctly point to the moved repository. * Expanded JSON encoder for AST to support singleton types. * Fixed a bug in `luau-analyze` where exceptions in the last module being checked during multithreaded analysis would not be rethrown. ### New type solver * Introduce a `refine` type family to handle deferred refinements during type inference, replacing the old `RefineConstraint`. * Continued work on the implementation of type states, fixing some known bugs/blockers. * Added support for variadic functions in new non-strict mode, enabling broader support for builtins and the Roblox API. ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-12-16 05:29:06 +08:00
using difference_type = ptrdiff_t;
using iterator_category = std::forward_iterator_tag;
const_iterator()
: set(0)
, index(0)
{
}
const_iterator(const DenseHashTable<Key, Item, MutableItem, ItemInterface, Hash, Eq>* set, size_t index)
: set(set)
, index(index)
{
}
const Item& operator*() const
{
return set->data[index];
}
const Item* operator->() const
{
return &set->data[index];
}
bool operator==(const const_iterator& other) const
{
return set == other.set && index == other.index;
}
bool operator!=(const const_iterator& other) const
{
return set != other.set || index != other.index;
}
const_iterator& operator++()
{
2022-09-24 03:17:25 +08:00
size_t size = set->capacity;
do
{
index++;
} while (index < size && set->eq(ItemInterface::getKey(set->data[index]), set->empty_key));
return *this;
}
const_iterator operator++(int)
{
const_iterator res = *this;
++*this;
return res;
}
private:
const DenseHashTable<Key, Item, MutableItem, ItemInterface, Hash, Eq>* set;
size_t index;
};
2022-04-15 07:57:43 +08:00
class iterator
{
public:
Sync to upstream/release/607 (#1131) # What's changed? * Fix up the `std::iterator_traits` definitions for some Luau data structures. * Replace some of the usages of `std::unordered_set` and `std::unordered_map` with Luau-provided data structures to increase performance and reduce overall number of heap allocations. * Update some of the documentation links in comments throughout the codebase to correctly point to the moved repository. * Expanded JSON encoder for AST to support singleton types. * Fixed a bug in `luau-analyze` where exceptions in the last module being checked during multithreaded analysis would not be rethrown. ### New type solver * Introduce a `refine` type family to handle deferred refinements during type inference, replacing the old `RefineConstraint`. * Continued work on the implementation of type states, fixing some known bugs/blockers. * Added support for variadic functions in new non-strict mode, enabling broader support for builtins and the Roblox API. ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-12-16 05:29:06 +08:00
using value_type = MutableItem;
using reference = MutableItem&;
using pointer = MutableItem*;
using difference_type = ptrdiff_t;
using iterator_category = std::forward_iterator_tag;
2022-04-15 07:57:43 +08:00
iterator()
: set(0)
, index(0)
{
}
iterator(DenseHashTable<Key, Item, MutableItem, ItemInterface, Hash, Eq>* set, size_t index)
: set(set)
, index(index)
{
}
MutableItem& operator*() const
{
return *reinterpret_cast<MutableItem*>(&set->data[index]);
}
MutableItem* operator->() const
{
return reinterpret_cast<MutableItem*>(&set->data[index]);
}
bool operator==(const iterator& other) const
{
return set == other.set && index == other.index;
}
bool operator!=(const iterator& other) const
{
return set != other.set || index != other.index;
}
iterator& operator++()
{
2022-09-24 03:17:25 +08:00
size_t size = set->capacity;
2022-04-15 07:57:43 +08:00
do
{
index++;
} while (index < size && set->eq(ItemInterface::getKey(set->data[index]), set->empty_key));
return *this;
}
iterator operator++(int)
{
iterator res = *this;
++*this;
return res;
}
private:
DenseHashTable<Key, Item, MutableItem, ItemInterface, Hash, Eq>* set;
size_t index;
};
private:
2022-09-24 03:17:25 +08:00
Item* data;
size_t capacity;
size_t count;
Key empty_key;
Hash hasher;
Eq eq;
};
template<typename Key>
struct ItemInterfaceSet
{
static const Key& getKey(const Key& item)
{
return item;
}
static void setKey(Key& item, const Key& key)
{
item = key;
}
2022-09-24 03:17:25 +08:00
static void fill(Key* data, size_t count, const Key& key)
{
for (size_t i = 0; i < count; ++i)
new (&data[i]) Key(key);
}
static void destroy(Key* data, size_t count)
{
2022-09-24 03:17:25 +08:00
for (size_t i = 0; i < count; ++i)
data[i].~Key();
}
};
template<typename Key, typename Value>
struct ItemInterfaceMap
{
static const Key& getKey(const std::pair<Key, Value>& item)
{
return item.first;
}
static void setKey(std::pair<Key, Value>& item, const Key& key)
{
item.first = key;
}
2022-09-24 03:17:25 +08:00
static void fill(std::pair<Key, Value>* data, size_t count, const Key& key)
{
for (size_t i = 0; i < count; ++i)
{
new (&data[i].first) Key(key);
new (&data[i].second) Value();
}
}
static void destroy(std::pair<Key, Value>* data, size_t count)
{
2022-09-24 03:17:25 +08:00
for (size_t i = 0; i < count; ++i)
{
data[i].first.~Key();
data[i].second.~Value();
}
}
};
} // namespace detail
// This is a faster alternative of unordered_set, but it does not implement the same interface (i.e. it does not support erasing)
template<typename Key, typename Hash = detail::DenseHashDefault<Key>, typename Eq = std::equal_to<Key>>
class DenseHashSet
{
typedef detail::DenseHashTable<Key, Key, Key, detail::ItemInterfaceSet<Key>, Hash, Eq> Impl;
Impl impl;
public:
typedef typename Impl::const_iterator const_iterator;
2022-04-15 07:57:43 +08:00
typedef typename Impl::iterator iterator;
explicit DenseHashSet(const Key& empty_key, size_t buckets = 0)
: impl(empty_key, buckets)
{
}
void clear()
{
impl.clear();
}
const Key& insert(const Key& key)
{
impl.rehash_if_full(key);
return *impl.insert_unsafe(key);
}
const Key* find(const Key& key) const
{
return impl.find(key);
}
bool contains(const Key& key) const
{
return impl.find(key) != 0;
}
size_t size() const
{
return impl.size();
}
bool empty() const
{
return impl.size() == 0;
}
const_iterator begin() const
{
return impl.begin();
}
const_iterator end() const
{
return impl.end();
}
2022-04-15 07:57:43 +08:00
iterator begin()
{
return impl.begin();
}
iterator end()
{
return impl.end();
}
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-11 05:10:07 +08:00
bool operator==(const DenseHashSet<Key, Hash, Eq>& other) const
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-11 05:10:07 +08:00
{
if (size() != other.size())
return false;
for (const Key& k : *this)
{
if (!other.contains(k))
return false;
}
return true;
}
bool operator!=(const DenseHashSet<Key, Hash, Eq>& other) const
Sync to upstream/release/603 (#1097) # What's changed? - Record the location of properties for table types (closes #802) - Implement stricter UTF-8 validations as per the RFC (https://github.com/luau-lang/rfcs/pull/1) - Implement `buffer` as a new type in both the old and new solvers. - Changed errors produced by some `buffer` builtins to be a bit more generic to avoid platform-dependent error messages. - Fixed a bug where `Unifier` would copy some persistent types, tripping some internal assertions. - Type checking rules on relational operators is now a little bit more lax. - Improve dead code elimination for some `if` statements with complex always-false conditions ## New type solver - Dataflow analysis now generates phi nodes on exit of branches. - Dataflow analysis avoids producing a new definition for locals or properties that are not owned by that loop. - If a function parameter has been constrained to `never`, report errors at all uses of that parameter within that function. - Switch to using the new `Luau::Set` to replace `std::unordered_set` to alleviate some poor allocation characteristics which was negatively affecting overall performance. - Subtyping can now report many failing reasons instead of just the first one that we happened to find during the test. - Subtyping now also report reasons for type pack mismatches. - When visiting `if` statements or expressions, the resulting context are the common terms in both branches. ## Native codegen - Implement support for `buffer` builtins to its IR for x64 and A64. - Optimized `table.insert` by not inserting a table barrier if it is fastcalled with a constant. ## Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Arseny Kapoulkine <arseny@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com>
2023-11-11 05:10:07 +08:00
{
return !(*this == other);
}
};
// This is a faster alternative of unordered_map, but it does not implement the same interface (i.e. it does not support erasing and has
// contains() instead of find())
template<typename Key, typename Value, typename Hash = detail::DenseHashDefault<Key>, typename Eq = std::equal_to<Key>>
class DenseHashMap
{
typedef detail::DenseHashTable<Key, std::pair<Key, Value>, std::pair<const Key, Value>, detail::ItemInterfaceMap<Key, Value>, Hash, Eq> Impl;
Impl impl;
public:
typedef typename Impl::const_iterator const_iterator;
2022-04-15 07:57:43 +08:00
typedef typename Impl::iterator iterator;
explicit DenseHashMap(const Key& empty_key, size_t buckets = 0)
: impl(empty_key, buckets)
{
}
void clear()
{
impl.clear();
}
// Note: this reference is invalidated by any insert operation (i.e. operator[])
Value& operator[](const Key& key)
{
impl.rehash_if_full(key);
return impl.insert_unsafe(key)->second;
}
// Note: this pointer is invalidated by any insert operation (i.e. operator[])
const Value* find(const Key& key) const
{
const std::pair<Key, Value>* result = impl.find(key);
return result ? &result->second : NULL;
}
// Note: this pointer is invalidated by any insert operation (i.e. operator[])
Value* find(const Key& key)
{
const std::pair<Key, Value>* result = impl.find(key);
return result ? const_cast<Value*>(&result->second) : NULL;
}
bool contains(const Key& key) const
{
return impl.find(key) != 0;
}
std::pair<Value&, bool> try_insert(const Key& key, const Value& value)
{
impl.rehash_if_full(key);
size_t before = impl.size();
std::pair<Key, Value>* slot = impl.insert_unsafe(key);
// Value is fresh if container count has increased
bool fresh = impl.size() > before;
if (fresh)
slot->second = value;
return std::make_pair(std::ref(slot->second), fresh);
}
size_t size() const
{
return impl.size();
}
bool empty() const
{
return impl.size() == 0;
}
const_iterator begin() const
{
return impl.begin();
}
2022-04-15 07:57:43 +08:00
const_iterator end() const
{
return impl.end();
}
2022-04-15 07:57:43 +08:00
iterator begin()
{
return impl.begin();
}
iterator end()
{
return impl.end();
}
};
} // namespace Luau