mirror of
https://github.com/luau-lang/luau.git
synced 2024-11-15 14:25:44 +08:00
443903aa00
# What's changed? * Luau allocation scheme was changed to handle allocations in 513-1024 byte range internally without falling back to global allocator * coroutine/thread creation no longer requires any global allocations, making it up to 15% faster (vs libc malloc) * table construction for 17-32 keys or 33-64 array elements is up to 30% faster (vs libc malloc) ### New Type Solver * Cyclic unary negation type families are reduced to `number` when possible * Class types are skipped when searching for free types in unifier to improve performance * Fixed issues with table type inference when metatables are present * Improved inference of iteration loop types * Fixed an issue with bidirectional inference of method calls * Type simplification will now preserve error suppression markers ### Native Code Generation * Fixed TAG_VECTOR skip optimization to not break instruction use counts (broken optimization wasn't included in 614) * Fixed missing side-effect when optimizing generic loop preparation instruction --- ### Internal Contributors Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com> Co-authored-by: Vyacheslav Egorov <vegorov@roblox.com> --------- Co-authored-by: Aaron Weiss <aaronweiss@roblox.com> Co-authored-by: Alexander McCord <amccord@roblox.com> Co-authored-by: Andy Friesen <afriesen@roblox.com> Co-authored-by: Vighnesh <vvijay@roblox.com> Co-authored-by: Aviral Goel <agoel@roblox.com> Co-authored-by: David Cope <dcope@roblox.com> Co-authored-by: Lily Brown <lbrown@roblox.com>
1556 lines
53 KiB
Lua
1556 lines
53 KiB
Lua
--[=[------------------------------------------------------------------------------------------------------------------------
|
|
-- HashLib by Egor Skriptunoff, boatbomber, and howmanysmall
|
|
|
|
--------------------------------------------------------------------------------------------------------------------------
|
|
|
|
Module was originally written by Egor Skriptunoff and distributed under an MIT license.
|
|
It can be found here: https://github.com/Egor-Skriptunoff/pure_lua_SHA/blob/master/sha2.lua
|
|
|
|
That version was around 3000 lines long, and supported Lua versions 5.1, 5.2, 5.3, and 5.4, and LuaJIT.
|
|
Although that is super cool, Roblox only uses Lua 5.1, so that was extreme overkill.
|
|
|
|
I, boatbomber, worked to port it to Roblox in a way that doesn't overcomplicate it with support of unreachable
|
|
cases. Then, howmanysmall did some final optimizations that really squeeze out all the performance possible.
|
|
It's gotten stupid fast, thanks to her!
|
|
|
|
After quite a bit of work and benchmarking, this is what we were left with.
|
|
Enjoy!
|
|
|
|
--------------------------------------------------------------------------------------------------------------------------
|
|
|
|
DESCRIPTION:
|
|
This module contains functions to calculate SHA digest:
|
|
MD5, SHA-1,
|
|
SHA-224, SHA-256, SHA-512/224, SHA-512/256, SHA-384, SHA-512,
|
|
SHA3-224, SHA3-256, SHA3-384, SHA3-512, SHAKE128, SHAKE256,
|
|
HMAC
|
|
Additionally, it has a few extra utility functions:
|
|
hex_to_bin
|
|
base64_to_bin
|
|
bin_to_base64
|
|
Written in pure Lua.
|
|
USAGE:
|
|
Input data should be a string
|
|
Result (SHA digest) is returned in hexadecimal representation as a string of lowercase hex digits.
|
|
Simplest usage example:
|
|
local HashLib = require(script.HashLib)
|
|
local your_hash = HashLib.sha256("your string")
|
|
API:
|
|
HashLib.md5
|
|
HashLib.sha1
|
|
SHA2 hash functions:
|
|
HashLib.sha224
|
|
HashLib.sha256
|
|
HashLib.sha512_224
|
|
HashLib.sha512_256
|
|
HashLib.sha384
|
|
HashLib.sha512
|
|
SHA3 hash functions:
|
|
HashLib.sha3_224
|
|
HashLib.sha3_256
|
|
HashLib.sha3_384
|
|
HashLib.sha3_512
|
|
HashLib.shake128
|
|
HashLib.shake256
|
|
Misc utilities:
|
|
HashLib.hmac (Applicable to any hash function from this module except SHAKE*)
|
|
HashLib.hex_to_bin
|
|
HashLib.base64_to_bin
|
|
HashLib.bin_to_base64
|
|
|
|
--]=]
|
|
---------------------------------------------------------------------------
|
|
|
|
local Base64 = require(script.Base64)
|
|
|
|
--------------------------------------------------------------------------------
|
|
-- LOCALIZATION FOR VM OPTIMIZATIONS
|
|
--------------------------------------------------------------------------------
|
|
|
|
local ipairs = ipairs
|
|
|
|
--------------------------------------------------------------------------------
|
|
-- 32-BIT BITWISE FUNCTIONS
|
|
--------------------------------------------------------------------------------
|
|
-- Only low 32 bits of function arguments matter, high bits are ignored
|
|
-- The result of all functions (except HEX) is an integer inside "correct range":
|
|
-- for "bit" library: (-TWO_POW_31)..(TWO_POW_31-1)
|
|
-- for "bit32" library: 0..(TWO_POW_32-1)
|
|
local bit32_band = bit32.band -- 2 arguments
|
|
local bit32_bor = bit32.bor -- 2 arguments
|
|
local bit32_bxor = bit32.bxor -- 2..5 arguments
|
|
local bit32_lshift = bit32.lshift -- second argument is integer 0..31
|
|
local bit32_rshift = bit32.rshift -- second argument is integer 0..31
|
|
local bit32_lrotate = bit32.lrotate -- second argument is integer 0..31
|
|
local bit32_rrotate = bit32.rrotate -- second argument is integer 0..31
|
|
|
|
--------------------------------------------------------------------------------
|
|
-- CREATING OPTIMIZED INNER LOOP
|
|
--------------------------------------------------------------------------------
|
|
-- Arrays of SHA2 "magic numbers" (in "INT64" and "FFI" branches "*_lo" arrays contain 64-bit values)
|
|
local sha2_K_lo, sha2_K_hi, sha2_H_lo, sha2_H_hi, sha3_RC_lo, sha3_RC_hi = {}, {}, {}, {}, {}, {}
|
|
local sha2_H_ext256 = {
|
|
[224] = {},
|
|
[256] = sha2_H_hi,
|
|
}
|
|
|
|
local sha2_H_ext512_lo, sha2_H_ext512_hi = {
|
|
[384] = {},
|
|
[512] = sha2_H_lo,
|
|
}, {
|
|
[384] = {},
|
|
[512] = sha2_H_hi,
|
|
}
|
|
|
|
local md5_K, md5_sha1_H = {}, { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0 }
|
|
local md5_next_shift = {
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
28,
|
|
25,
|
|
26,
|
|
27,
|
|
0,
|
|
0,
|
|
10,
|
|
9,
|
|
11,
|
|
12,
|
|
0,
|
|
15,
|
|
16,
|
|
17,
|
|
18,
|
|
0,
|
|
20,
|
|
22,
|
|
23,
|
|
21,
|
|
}
|
|
local HEX64, XOR64A5, lanes_index_base -- defined only for branches that internally use 64-bit integers: "INT64" and "FFI"
|
|
local common_W = {} -- temporary table shared between all calculations (to avoid creating new temporary table every time)
|
|
local K_lo_modulo, hi_factor, hi_factor_keccak = 4294967296, 0, 0
|
|
|
|
local TWO_POW_NEG_56 = 2 ^ -56
|
|
local TWO_POW_NEG_17 = 2 ^ -17
|
|
|
|
local TWO_POW_2 = 2 ^ 2
|
|
local TWO_POW_3 = 2 ^ 3
|
|
local TWO_POW_4 = 2 ^ 4
|
|
local TWO_POW_5 = 2 ^ 5
|
|
local TWO_POW_6 = 2 ^ 6
|
|
local TWO_POW_7 = 2 ^ 7
|
|
local TWO_POW_8 = 2 ^ 8
|
|
local TWO_POW_9 = 2 ^ 9
|
|
local TWO_POW_10 = 2 ^ 10
|
|
local TWO_POW_11 = 2 ^ 11
|
|
local TWO_POW_12 = 2 ^ 12
|
|
local TWO_POW_13 = 2 ^ 13
|
|
local TWO_POW_14 = 2 ^ 14
|
|
local TWO_POW_15 = 2 ^ 15
|
|
local TWO_POW_16 = 2 ^ 16
|
|
local TWO_POW_17 = 2 ^ 17
|
|
local TWO_POW_18 = 2 ^ 18
|
|
local TWO_POW_19 = 2 ^ 19
|
|
local TWO_POW_20 = 2 ^ 20
|
|
local TWO_POW_21 = 2 ^ 21
|
|
local TWO_POW_22 = 2 ^ 22
|
|
local TWO_POW_23 = 2 ^ 23
|
|
local TWO_POW_24 = 2 ^ 24
|
|
local TWO_POW_25 = 2 ^ 25
|
|
local TWO_POW_26 = 2 ^ 26
|
|
local TWO_POW_27 = 2 ^ 27
|
|
local TWO_POW_28 = 2 ^ 28
|
|
local TWO_POW_29 = 2 ^ 29
|
|
local TWO_POW_30 = 2 ^ 30
|
|
local TWO_POW_31 = 2 ^ 31
|
|
local TWO_POW_32 = 2 ^ 32
|
|
local TWO_POW_40 = 2 ^ 40
|
|
|
|
local TWO56_POW_7 = 256 ^ 7
|
|
|
|
-- Implementation for Lua 5.1/5.2 (with or without bitwise library available)
|
|
local function sha256_feed_64(H, str, offs, size)
|
|
-- offs >= 0, size >= 0, size is multiple of 64
|
|
local W, K = common_W, sha2_K_hi
|
|
local h1, h2, h3, h4, h5, h6, h7, h8 = H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8]
|
|
for pos = offs, offs + size - 1, 64 do
|
|
for j = 1, 16 do
|
|
pos = pos + 4
|
|
local a, b, c, d = string.byte(str, pos - 3, pos)
|
|
W[j] = ((a * 256 + b) * 256 + c) * 256 + d
|
|
end
|
|
|
|
for j = 17, 64 do
|
|
local a, b = W[j - 15], W[j - 2]
|
|
W[j] = bit32_bxor(bit32_rrotate(a, 7), bit32_lrotate(a, 14), bit32_rshift(a, 3))
|
|
+ bit32_bxor(bit32_lrotate(b, 15), bit32_lrotate(b, 13), bit32_rshift(b, 10))
|
|
+ W[j - 7]
|
|
+ W[j - 16]
|
|
end
|
|
|
|
local a, b, c, d, e, f, g, h = h1, h2, h3, h4, h5, h6, h7, h8
|
|
for j = 1, 64 do
|
|
local z = bit32_bxor(bit32_rrotate(e, 6), bit32_rrotate(e, 11), bit32_lrotate(e, 7))
|
|
+ bit32_band(e, f)
|
|
+ bit32_band(-1 - e, g)
|
|
+ h
|
|
+ K[j]
|
|
+ W[j]
|
|
h = g
|
|
g = f
|
|
f = e
|
|
e = z + d
|
|
d = c
|
|
c = b
|
|
b = a
|
|
a = z
|
|
+ bit32_band(d, c)
|
|
+ bit32_band(a, bit32_bxor(d, c))
|
|
+ bit32_bxor(bit32_rrotate(a, 2), bit32_rrotate(a, 13), bit32_lrotate(a, 10))
|
|
end
|
|
|
|
h1, h2, h3, h4 = (a + h1) % 4294967296, (b + h2) % 4294967296, (c + h3) % 4294967296, (d + h4) % 4294967296
|
|
h5, h6, h7, h8 = (e + h5) % 4294967296, (f + h6) % 4294967296, (g + h7) % 4294967296, (h + h8) % 4294967296
|
|
end
|
|
|
|
H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8] = h1, h2, h3, h4, h5, h6, h7, h8
|
|
end
|
|
|
|
local function sha512_feed_128(H_lo, H_hi, str, offs, size)
|
|
-- offs >= 0, size >= 0, size is multiple of 128
|
|
-- W1_hi, W1_lo, W2_hi, W2_lo, ... Wk_hi = W[2*k-1], Wk_lo = W[2*k]
|
|
local W, K_lo, K_hi = common_W, sha2_K_lo, sha2_K_hi
|
|
local h1_lo, h2_lo, h3_lo, h4_lo, h5_lo, h6_lo, h7_lo, h8_lo =
|
|
H_lo[1], H_lo[2], H_lo[3], H_lo[4], H_lo[5], H_lo[6], H_lo[7], H_lo[8]
|
|
local h1_hi, h2_hi, h3_hi, h4_hi, h5_hi, h6_hi, h7_hi, h8_hi =
|
|
H_hi[1], H_hi[2], H_hi[3], H_hi[4], H_hi[5], H_hi[6], H_hi[7], H_hi[8]
|
|
for pos = offs, offs + size - 1, 128 do
|
|
for j = 1, 16 * 2 do
|
|
pos = pos + 4
|
|
local a, b, c, d = string.byte(str, pos - 3, pos)
|
|
W[j] = ((a * 256 + b) * 256 + c) * 256 + d
|
|
end
|
|
|
|
for jj = 34, 160, 2 do
|
|
local a_lo, a_hi, b_lo, b_hi = W[jj - 30], W[jj - 31], W[jj - 4], W[jj - 5]
|
|
local tmp1 = bit32_bxor(
|
|
bit32_rshift(a_lo, 1) + bit32_lshift(a_hi, 31),
|
|
bit32_rshift(a_lo, 8) + bit32_lshift(a_hi, 24),
|
|
bit32_rshift(a_lo, 7) + bit32_lshift(a_hi, 25)
|
|
) % 4294967296 + bit32_bxor(
|
|
bit32_rshift(b_lo, 19) + bit32_lshift(b_hi, 13),
|
|
bit32_lshift(b_lo, 3) + bit32_rshift(b_hi, 29),
|
|
bit32_rshift(b_lo, 6) + bit32_lshift(b_hi, 26)
|
|
) % 4294967296 + W[jj - 14] + W[jj - 32]
|
|
|
|
local tmp2 = tmp1 % 4294967296
|
|
W[jj - 1] = bit32_bxor(
|
|
bit32_rshift(a_hi, 1) + bit32_lshift(a_lo, 31),
|
|
bit32_rshift(a_hi, 8) + bit32_lshift(a_lo, 24),
|
|
bit32_rshift(a_hi, 7)
|
|
) + bit32_bxor(
|
|
bit32_rshift(b_hi, 19) + bit32_lshift(b_lo, 13),
|
|
bit32_lshift(b_hi, 3) + bit32_rshift(b_lo, 29),
|
|
bit32_rshift(b_hi, 6)
|
|
) + W[jj - 15] + W[jj - 33] + (tmp1 - tmp2) / 4294967296
|
|
|
|
W[jj] = tmp2
|
|
end
|
|
|
|
local a_lo, b_lo, c_lo, d_lo, e_lo, f_lo, g_lo, h_lo = h1_lo, h2_lo, h3_lo, h4_lo, h5_lo, h6_lo, h7_lo, h8_lo
|
|
local a_hi, b_hi, c_hi, d_hi, e_hi, f_hi, g_hi, h_hi = h1_hi, h2_hi, h3_hi, h4_hi, h5_hi, h6_hi, h7_hi, h8_hi
|
|
for j = 1, 80 do
|
|
local jj = 2 * j
|
|
local tmp1 = bit32_bxor(
|
|
bit32_rshift(e_lo, 14) + bit32_lshift(e_hi, 18),
|
|
bit32_rshift(e_lo, 18) + bit32_lshift(e_hi, 14),
|
|
bit32_lshift(e_lo, 23) + bit32_rshift(e_hi, 9)
|
|
) % 4294967296 + (bit32_band(e_lo, f_lo) + bit32_band(-1 - e_lo, g_lo)) % 4294967296 + h_lo + K_lo[j] + W[jj]
|
|
|
|
local z_lo = tmp1 % 4294967296
|
|
local z_hi = bit32_bxor(
|
|
bit32_rshift(e_hi, 14) + bit32_lshift(e_lo, 18),
|
|
bit32_rshift(e_hi, 18) + bit32_lshift(e_lo, 14),
|
|
bit32_lshift(e_hi, 23) + bit32_rshift(e_lo, 9)
|
|
) + bit32_band(e_hi, f_hi) + bit32_band(-1 - e_hi, g_hi) + h_hi + K_hi[j] + W[jj - 1] + (tmp1 - z_lo) / 4294967296
|
|
|
|
h_lo = g_lo
|
|
h_hi = g_hi
|
|
g_lo = f_lo
|
|
g_hi = f_hi
|
|
f_lo = e_lo
|
|
f_hi = e_hi
|
|
tmp1 = z_lo + d_lo
|
|
e_lo = tmp1 % 4294967296
|
|
e_hi = z_hi + d_hi + (tmp1 - e_lo) / 4294967296
|
|
d_lo = c_lo
|
|
d_hi = c_hi
|
|
c_lo = b_lo
|
|
c_hi = b_hi
|
|
b_lo = a_lo
|
|
b_hi = a_hi
|
|
tmp1 = z_lo
|
|
+ (bit32_band(d_lo, c_lo) + bit32_band(b_lo, bit32_bxor(d_lo, c_lo))) % 4294967296
|
|
+ bit32_bxor(
|
|
bit32_rshift(b_lo, 28) + bit32_lshift(b_hi, 4),
|
|
bit32_lshift(b_lo, 30) + bit32_rshift(b_hi, 2),
|
|
bit32_lshift(b_lo, 25) + bit32_rshift(b_hi, 7)
|
|
)
|
|
% 4294967296
|
|
a_lo = tmp1 % 4294967296
|
|
a_hi = z_hi
|
|
+ (bit32_band(d_hi, c_hi) + bit32_band(b_hi, bit32_bxor(d_hi, c_hi)))
|
|
+ bit32_bxor(
|
|
bit32_rshift(b_hi, 28) + bit32_lshift(b_lo, 4),
|
|
bit32_lshift(b_hi, 30) + bit32_rshift(b_lo, 2),
|
|
bit32_lshift(b_hi, 25) + bit32_rshift(b_lo, 7)
|
|
)
|
|
+ (tmp1 - a_lo) / 4294967296
|
|
end
|
|
|
|
a_lo = h1_lo + a_lo
|
|
h1_lo = a_lo % 4294967296
|
|
h1_hi = (h1_hi + a_hi + (a_lo - h1_lo) / 4294967296) % 4294967296
|
|
a_lo = h2_lo + b_lo
|
|
h2_lo = a_lo % 4294967296
|
|
h2_hi = (h2_hi + b_hi + (a_lo - h2_lo) / 4294967296) % 4294967296
|
|
a_lo = h3_lo + c_lo
|
|
h3_lo = a_lo % 4294967296
|
|
h3_hi = (h3_hi + c_hi + (a_lo - h3_lo) / 4294967296) % 4294967296
|
|
a_lo = h4_lo + d_lo
|
|
h4_lo = a_lo % 4294967296
|
|
h4_hi = (h4_hi + d_hi + (a_lo - h4_lo) / 4294967296) % 4294967296
|
|
a_lo = h5_lo + e_lo
|
|
h5_lo = a_lo % 4294967296
|
|
h5_hi = (h5_hi + e_hi + (a_lo - h5_lo) / 4294967296) % 4294967296
|
|
a_lo = h6_lo + f_lo
|
|
h6_lo = a_lo % 4294967296
|
|
h6_hi = (h6_hi + f_hi + (a_lo - h6_lo) / 4294967296) % 4294967296
|
|
a_lo = h7_lo + g_lo
|
|
h7_lo = a_lo % 4294967296
|
|
h7_hi = (h7_hi + g_hi + (a_lo - h7_lo) / 4294967296) % 4294967296
|
|
a_lo = h8_lo + h_lo
|
|
h8_lo = a_lo % 4294967296
|
|
h8_hi = (h8_hi + h_hi + (a_lo - h8_lo) / 4294967296) % 4294967296
|
|
end
|
|
|
|
H_lo[1], H_lo[2], H_lo[3], H_lo[4], H_lo[5], H_lo[6], H_lo[7], H_lo[8] =
|
|
h1_lo, h2_lo, h3_lo, h4_lo, h5_lo, h6_lo, h7_lo, h8_lo
|
|
H_hi[1], H_hi[2], H_hi[3], H_hi[4], H_hi[5], H_hi[6], H_hi[7], H_hi[8] =
|
|
h1_hi, h2_hi, h3_hi, h4_hi, h5_hi, h6_hi, h7_hi, h8_hi
|
|
end
|
|
|
|
local function md5_feed_64(H, str, offs, size)
|
|
-- offs >= 0, size >= 0, size is multiple of 64
|
|
local W, K, md5_next_shift = common_W, md5_K, md5_next_shift
|
|
local h1, h2, h3, h4 = H[1], H[2], H[3], H[4]
|
|
for pos = offs, offs + size - 1, 64 do
|
|
for j = 1, 16 do
|
|
pos = pos + 4
|
|
local a, b, c, d = string.byte(str, pos - 3, pos)
|
|
W[j] = ((d * 256 + c) * 256 + b) * 256 + a
|
|
end
|
|
|
|
local a, b, c, d = h1, h2, h3, h4
|
|
local s = 25
|
|
for j = 1, 16 do
|
|
local F = bit32_rrotate(bit32_band(b, c) + bit32_band(-1 - b, d) + a + K[j] + W[j], s) + b
|
|
s = md5_next_shift[s]
|
|
a = d
|
|
d = c
|
|
c = b
|
|
b = F
|
|
end
|
|
|
|
s = 27
|
|
for j = 17, 32 do
|
|
local F = bit32_rrotate(bit32_band(d, b) + bit32_band(-1 - d, c) + a + K[j] + W[(5 * j - 4) % 16 + 1], s)
|
|
+ b
|
|
s = md5_next_shift[s]
|
|
a = d
|
|
d = c
|
|
c = b
|
|
b = F
|
|
end
|
|
|
|
s = 28
|
|
for j = 33, 48 do
|
|
local F = bit32_rrotate(bit32_bxor(bit32_bxor(b, c), d) + a + K[j] + W[(3 * j + 2) % 16 + 1], s) + b
|
|
s = md5_next_shift[s]
|
|
a = d
|
|
d = c
|
|
c = b
|
|
b = F
|
|
end
|
|
|
|
s = 26
|
|
for j = 49, 64 do
|
|
local F = bit32_rrotate(bit32_bxor(c, bit32_bor(b, -1 - d)) + a + K[j] + W[(j * 7 - 7) % 16 + 1], s) + b
|
|
s = md5_next_shift[s]
|
|
a = d
|
|
d = c
|
|
c = b
|
|
b = F
|
|
end
|
|
|
|
h1 = (a + h1) % 4294967296
|
|
h2 = (b + h2) % 4294967296
|
|
h3 = (c + h3) % 4294967296
|
|
h4 = (d + h4) % 4294967296
|
|
end
|
|
|
|
H[1], H[2], H[3], H[4] = h1, h2, h3, h4
|
|
end
|
|
|
|
local function sha1_feed_64(H, str, offs, size)
|
|
-- offs >= 0, size >= 0, size is multiple of 64
|
|
local W = common_W
|
|
local h1, h2, h3, h4, h5 = H[1], H[2], H[3], H[4], H[5]
|
|
for pos = offs, offs + size - 1, 64 do
|
|
for j = 1, 16 do
|
|
pos = pos + 4
|
|
local a, b, c, d = string.byte(str, pos - 3, pos)
|
|
W[j] = ((a * 256 + b) * 256 + c) * 256 + d
|
|
end
|
|
|
|
for j = 17, 80 do
|
|
W[j] = bit32_lrotate(bit32_bxor(W[j - 3], W[j - 8], W[j - 14], W[j - 16]), 1)
|
|
end
|
|
|
|
local a, b, c, d, e = h1, h2, h3, h4, h5
|
|
for j = 1, 20 do
|
|
local z = bit32_lrotate(a, 5) + bit32_band(b, c) + bit32_band(-1 - b, d) + 0x5A827999 + W[j] + e -- constant = math.floor(TWO_POW_30 * sqrt(2))
|
|
e = d
|
|
d = c
|
|
c = bit32_rrotate(b, 2)
|
|
b = a
|
|
a = z
|
|
end
|
|
|
|
for j = 21, 40 do
|
|
local z = bit32_lrotate(a, 5) + bit32_bxor(b, c, d) + 0x6ED9EBA1 + W[j] + e -- TWO_POW_30 * sqrt(3)
|
|
e = d
|
|
d = c
|
|
c = bit32_rrotate(b, 2)
|
|
b = a
|
|
a = z
|
|
end
|
|
|
|
for j = 41, 60 do
|
|
local z = bit32_lrotate(a, 5) + bit32_band(d, c) + bit32_band(b, bit32_bxor(d, c)) + 0x8F1BBCDC + W[j] + e -- TWO_POW_30 * sqrt(5)
|
|
e = d
|
|
d = c
|
|
c = bit32_rrotate(b, 2)
|
|
b = a
|
|
a = z
|
|
end
|
|
|
|
for j = 61, 80 do
|
|
local z = bit32_lrotate(a, 5) + bit32_bxor(b, c, d) + 0xCA62C1D6 + W[j] + e -- TWO_POW_30 * sqrt(10)
|
|
e = d
|
|
d = c
|
|
c = bit32_rrotate(b, 2)
|
|
b = a
|
|
a = z
|
|
end
|
|
|
|
h1 = (a + h1) % 4294967296
|
|
h2 = (b + h2) % 4294967296
|
|
h3 = (c + h3) % 4294967296
|
|
h4 = (d + h4) % 4294967296
|
|
h5 = (e + h5) % 4294967296
|
|
end
|
|
|
|
H[1], H[2], H[3], H[4], H[5] = h1, h2, h3, h4, h5
|
|
end
|
|
|
|
local function keccak_feed(lanes_lo, lanes_hi, str, offs, size, block_size_in_bytes)
|
|
-- This is an example of a Lua function having 79 local variables :-)
|
|
-- offs >= 0, size >= 0, size is multiple of block_size_in_bytes, block_size_in_bytes is positive multiple of 8
|
|
local RC_lo, RC_hi = sha3_RC_lo, sha3_RC_hi
|
|
local qwords_qty = block_size_in_bytes / 8
|
|
for pos = offs, offs + size - 1, block_size_in_bytes do
|
|
for j = 1, qwords_qty do
|
|
local a, b, c, d = string.byte(str, pos + 1, pos + 4)
|
|
lanes_lo[j] = bit32_bxor(lanes_lo[j], ((d * 256 + c) * 256 + b) * 256 + a)
|
|
pos = pos + 8
|
|
a, b, c, d = string.byte(str, pos - 3, pos)
|
|
lanes_hi[j] = bit32_bxor(lanes_hi[j], ((d * 256 + c) * 256 + b) * 256 + a)
|
|
end
|
|
|
|
local L01_lo, L01_hi, L02_lo, L02_hi, L03_lo, L03_hi, L04_lo, L04_hi, L05_lo, L05_hi, L06_lo, L06_hi, L07_lo, L07_hi, L08_lo, L08_hi, L09_lo, L09_hi, L10_lo, L10_hi, L11_lo, L11_hi, L12_lo, L12_hi, L13_lo, L13_hi, L14_lo, L14_hi, L15_lo, L15_hi, L16_lo, L16_hi, L17_lo, L17_hi, L18_lo, L18_hi, L19_lo, L19_hi, L20_lo, L20_hi, L21_lo, L21_hi, L22_lo, L22_hi, L23_lo, L23_hi, L24_lo, L24_hi, L25_lo, L25_hi =
|
|
lanes_lo[1],
|
|
lanes_hi[1],
|
|
lanes_lo[2],
|
|
lanes_hi[2],
|
|
lanes_lo[3],
|
|
lanes_hi[3],
|
|
lanes_lo[4],
|
|
lanes_hi[4],
|
|
lanes_lo[5],
|
|
lanes_hi[5],
|
|
lanes_lo[6],
|
|
lanes_hi[6],
|
|
lanes_lo[7],
|
|
lanes_hi[7],
|
|
lanes_lo[8],
|
|
lanes_hi[8],
|
|
lanes_lo[9],
|
|
lanes_hi[9],
|
|
lanes_lo[10],
|
|
lanes_hi[10],
|
|
lanes_lo[11],
|
|
lanes_hi[11],
|
|
lanes_lo[12],
|
|
lanes_hi[12],
|
|
lanes_lo[13],
|
|
lanes_hi[13],
|
|
lanes_lo[14],
|
|
lanes_hi[14],
|
|
lanes_lo[15],
|
|
lanes_hi[15],
|
|
lanes_lo[16],
|
|
lanes_hi[16],
|
|
lanes_lo[17],
|
|
lanes_hi[17],
|
|
lanes_lo[18],
|
|
lanes_hi[18],
|
|
lanes_lo[19],
|
|
lanes_hi[19],
|
|
lanes_lo[20],
|
|
lanes_hi[20],
|
|
lanes_lo[21],
|
|
lanes_hi[21],
|
|
lanes_lo[22],
|
|
lanes_hi[22],
|
|
lanes_lo[23],
|
|
lanes_hi[23],
|
|
lanes_lo[24],
|
|
lanes_hi[24],
|
|
lanes_lo[25],
|
|
lanes_hi[25]
|
|
|
|
for round_idx = 1, 24 do
|
|
local C1_lo = bit32_bxor(L01_lo, L06_lo, L11_lo, L16_lo, L21_lo)
|
|
local C1_hi = bit32_bxor(L01_hi, L06_hi, L11_hi, L16_hi, L21_hi)
|
|
local C2_lo = bit32_bxor(L02_lo, L07_lo, L12_lo, L17_lo, L22_lo)
|
|
local C2_hi = bit32_bxor(L02_hi, L07_hi, L12_hi, L17_hi, L22_hi)
|
|
local C3_lo = bit32_bxor(L03_lo, L08_lo, L13_lo, L18_lo, L23_lo)
|
|
local C3_hi = bit32_bxor(L03_hi, L08_hi, L13_hi, L18_hi, L23_hi)
|
|
local C4_lo = bit32_bxor(L04_lo, L09_lo, L14_lo, L19_lo, L24_lo)
|
|
local C4_hi = bit32_bxor(L04_hi, L09_hi, L14_hi, L19_hi, L24_hi)
|
|
local C5_lo = bit32_bxor(L05_lo, L10_lo, L15_lo, L20_lo, L25_lo)
|
|
local C5_hi = bit32_bxor(L05_hi, L10_hi, L15_hi, L20_hi, L25_hi)
|
|
|
|
local D_lo = bit32_bxor(C1_lo, C3_lo * 2 + (C3_hi % TWO_POW_32 - C3_hi % TWO_POW_31) / TWO_POW_31)
|
|
local D_hi = bit32_bxor(C1_hi, C3_hi * 2 + (C3_lo % TWO_POW_32 - C3_lo % TWO_POW_31) / TWO_POW_31)
|
|
|
|
local T0_lo = bit32_bxor(D_lo, L02_lo)
|
|
local T0_hi = bit32_bxor(D_hi, L02_hi)
|
|
local T1_lo = bit32_bxor(D_lo, L07_lo)
|
|
local T1_hi = bit32_bxor(D_hi, L07_hi)
|
|
local T2_lo = bit32_bxor(D_lo, L12_lo)
|
|
local T2_hi = bit32_bxor(D_hi, L12_hi)
|
|
local T3_lo = bit32_bxor(D_lo, L17_lo)
|
|
local T3_hi = bit32_bxor(D_hi, L17_hi)
|
|
local T4_lo = bit32_bxor(D_lo, L22_lo)
|
|
local T4_hi = bit32_bxor(D_hi, L22_hi)
|
|
|
|
L02_lo = (T1_lo % TWO_POW_32 - T1_lo % TWO_POW_20) / TWO_POW_20 + T1_hi * TWO_POW_12
|
|
L02_hi = (T1_hi % TWO_POW_32 - T1_hi % TWO_POW_20) / TWO_POW_20 + T1_lo * TWO_POW_12
|
|
L07_lo = (T3_lo % TWO_POW_32 - T3_lo % TWO_POW_19) / TWO_POW_19 + T3_hi * TWO_POW_13
|
|
L07_hi = (T3_hi % TWO_POW_32 - T3_hi % TWO_POW_19) / TWO_POW_19 + T3_lo * TWO_POW_13
|
|
L12_lo = T0_lo * 2 + (T0_hi % TWO_POW_32 - T0_hi % TWO_POW_31) / TWO_POW_31
|
|
L12_hi = T0_hi * 2 + (T0_lo % TWO_POW_32 - T0_lo % TWO_POW_31) / TWO_POW_31
|
|
L17_lo = T2_lo * TWO_POW_10 + (T2_hi % TWO_POW_32 - T2_hi % TWO_POW_22) / TWO_POW_22
|
|
L17_hi = T2_hi * TWO_POW_10 + (T2_lo % TWO_POW_32 - T2_lo % TWO_POW_22) / TWO_POW_22
|
|
L22_lo = T4_lo * TWO_POW_2 + (T4_hi % TWO_POW_32 - T4_hi % TWO_POW_30) / TWO_POW_30
|
|
L22_hi = T4_hi * TWO_POW_2 + (T4_lo % TWO_POW_32 - T4_lo % TWO_POW_30) / TWO_POW_30
|
|
|
|
D_lo = bit32_bxor(C2_lo, C4_lo * 2 + (C4_hi % TWO_POW_32 - C4_hi % TWO_POW_31) / TWO_POW_31)
|
|
D_hi = bit32_bxor(C2_hi, C4_hi * 2 + (C4_lo % TWO_POW_32 - C4_lo % TWO_POW_31) / TWO_POW_31)
|
|
|
|
T0_lo = bit32_bxor(D_lo, L03_lo)
|
|
T0_hi = bit32_bxor(D_hi, L03_hi)
|
|
T1_lo = bit32_bxor(D_lo, L08_lo)
|
|
T1_hi = bit32_bxor(D_hi, L08_hi)
|
|
T2_lo = bit32_bxor(D_lo, L13_lo)
|
|
T2_hi = bit32_bxor(D_hi, L13_hi)
|
|
T3_lo = bit32_bxor(D_lo, L18_lo)
|
|
T3_hi = bit32_bxor(D_hi, L18_hi)
|
|
T4_lo = bit32_bxor(D_lo, L23_lo)
|
|
T4_hi = bit32_bxor(D_hi, L23_hi)
|
|
|
|
L03_lo = (T2_lo % TWO_POW_32 - T2_lo % TWO_POW_21) / TWO_POW_21 + T2_hi * TWO_POW_11
|
|
L03_hi = (T2_hi % TWO_POW_32 - T2_hi % TWO_POW_21) / TWO_POW_21 + T2_lo * TWO_POW_11
|
|
L08_lo = (T4_lo % TWO_POW_32 - T4_lo % TWO_POW_3) / TWO_POW_3 + T4_hi * TWO_POW_29 % TWO_POW_32
|
|
L08_hi = (T4_hi % TWO_POW_32 - T4_hi % TWO_POW_3) / TWO_POW_3 + T4_lo * TWO_POW_29 % TWO_POW_32
|
|
L13_lo = T1_lo * TWO_POW_6 + (T1_hi % TWO_POW_32 - T1_hi % TWO_POW_26) / TWO_POW_26
|
|
L13_hi = T1_hi * TWO_POW_6 + (T1_lo % TWO_POW_32 - T1_lo % TWO_POW_26) / TWO_POW_26
|
|
L18_lo = T3_lo * TWO_POW_15 + (T3_hi % TWO_POW_32 - T3_hi % TWO_POW_17) / TWO_POW_17
|
|
L18_hi = T3_hi * TWO_POW_15 + (T3_lo % TWO_POW_32 - T3_lo % TWO_POW_17) / TWO_POW_17
|
|
L23_lo = (T0_lo % TWO_POW_32 - T0_lo % TWO_POW_2) / TWO_POW_2 + T0_hi * TWO_POW_30 % TWO_POW_32
|
|
L23_hi = (T0_hi % TWO_POW_32 - T0_hi % TWO_POW_2) / TWO_POW_2 + T0_lo * TWO_POW_30 % TWO_POW_32
|
|
|
|
D_lo = bit32_bxor(C3_lo, C5_lo * 2 + (C5_hi % TWO_POW_32 - C5_hi % TWO_POW_31) / TWO_POW_31)
|
|
D_hi = bit32_bxor(C3_hi, C5_hi * 2 + (C5_lo % TWO_POW_32 - C5_lo % TWO_POW_31) / TWO_POW_31)
|
|
|
|
T0_lo = bit32_bxor(D_lo, L04_lo)
|
|
T0_hi = bit32_bxor(D_hi, L04_hi)
|
|
T1_lo = bit32_bxor(D_lo, L09_lo)
|
|
T1_hi = bit32_bxor(D_hi, L09_hi)
|
|
T2_lo = bit32_bxor(D_lo, L14_lo)
|
|
T2_hi = bit32_bxor(D_hi, L14_hi)
|
|
T3_lo = bit32_bxor(D_lo, L19_lo)
|
|
T3_hi = bit32_bxor(D_hi, L19_hi)
|
|
T4_lo = bit32_bxor(D_lo, L24_lo)
|
|
T4_hi = bit32_bxor(D_hi, L24_hi)
|
|
|
|
L04_lo = T3_lo * TWO_POW_21 % TWO_POW_32 + (T3_hi % TWO_POW_32 - T3_hi % TWO_POW_11) / TWO_POW_11
|
|
L04_hi = T3_hi * TWO_POW_21 % TWO_POW_32 + (T3_lo % TWO_POW_32 - T3_lo % TWO_POW_11) / TWO_POW_11
|
|
L09_lo = T0_lo * TWO_POW_28 % TWO_POW_32 + (T0_hi % TWO_POW_32 - T0_hi % TWO_POW_4) / TWO_POW_4
|
|
L09_hi = T0_hi * TWO_POW_28 % TWO_POW_32 + (T0_lo % TWO_POW_32 - T0_lo % TWO_POW_4) / TWO_POW_4
|
|
L14_lo = T2_lo * TWO_POW_25 % TWO_POW_32 + (T2_hi % TWO_POW_32 - T2_hi % TWO_POW_7) / TWO_POW_7
|
|
L14_hi = T2_hi * TWO_POW_25 % TWO_POW_32 + (T2_lo % TWO_POW_32 - T2_lo % TWO_POW_7) / TWO_POW_7
|
|
L19_lo = (T4_lo % TWO_POW_32 - T4_lo % TWO_POW_8) / TWO_POW_8 + T4_hi * TWO_POW_24 % TWO_POW_32
|
|
L19_hi = (T4_hi % TWO_POW_32 - T4_hi % TWO_POW_8) / TWO_POW_8 + T4_lo * TWO_POW_24 % TWO_POW_32
|
|
L24_lo = (T1_lo % TWO_POW_32 - T1_lo % TWO_POW_9) / TWO_POW_9 + T1_hi * TWO_POW_23 % TWO_POW_32
|
|
L24_hi = (T1_hi % TWO_POW_32 - T1_hi % TWO_POW_9) / TWO_POW_9 + T1_lo * TWO_POW_23 % TWO_POW_32
|
|
|
|
D_lo = bit32_bxor(C4_lo, C1_lo * 2 + (C1_hi % TWO_POW_32 - C1_hi % TWO_POW_31) / TWO_POW_31)
|
|
D_hi = bit32_bxor(C4_hi, C1_hi * 2 + (C1_lo % TWO_POW_32 - C1_lo % TWO_POW_31) / TWO_POW_31)
|
|
|
|
T0_lo = bit32_bxor(D_lo, L05_lo)
|
|
T0_hi = bit32_bxor(D_hi, L05_hi)
|
|
T1_lo = bit32_bxor(D_lo, L10_lo)
|
|
T1_hi = bit32_bxor(D_hi, L10_hi)
|
|
T2_lo = bit32_bxor(D_lo, L15_lo)
|
|
T2_hi = bit32_bxor(D_hi, L15_hi)
|
|
T3_lo = bit32_bxor(D_lo, L20_lo)
|
|
T3_hi = bit32_bxor(D_hi, L20_hi)
|
|
T4_lo = bit32_bxor(D_lo, L25_lo)
|
|
T4_hi = bit32_bxor(D_hi, L25_hi)
|
|
|
|
L05_lo = T4_lo * TWO_POW_14 + (T4_hi % TWO_POW_32 - T4_hi % TWO_POW_18) / TWO_POW_18
|
|
L05_hi = T4_hi * TWO_POW_14 + (T4_lo % TWO_POW_32 - T4_lo % TWO_POW_18) / TWO_POW_18
|
|
L10_lo = T1_lo * TWO_POW_20 % TWO_POW_32 + (T1_hi % TWO_POW_32 - T1_hi % TWO_POW_12) / TWO_POW_12
|
|
L10_hi = T1_hi * TWO_POW_20 % TWO_POW_32 + (T1_lo % TWO_POW_32 - T1_lo % TWO_POW_12) / TWO_POW_12
|
|
L15_lo = T3_lo * TWO_POW_8 + (T3_hi % TWO_POW_32 - T3_hi % TWO_POW_24) / TWO_POW_24
|
|
L15_hi = T3_hi * TWO_POW_8 + (T3_lo % TWO_POW_32 - T3_lo % TWO_POW_24) / TWO_POW_24
|
|
L20_lo = T0_lo * TWO_POW_27 % TWO_POW_32 + (T0_hi % TWO_POW_32 - T0_hi % TWO_POW_5) / TWO_POW_5
|
|
L20_hi = T0_hi * TWO_POW_27 % TWO_POW_32 + (T0_lo % TWO_POW_32 - T0_lo % TWO_POW_5) / TWO_POW_5
|
|
L25_lo = (T2_lo % TWO_POW_32 - T2_lo % TWO_POW_25) / TWO_POW_25 + T2_hi * TWO_POW_7
|
|
L25_hi = (T2_hi % TWO_POW_32 - T2_hi % TWO_POW_25) / TWO_POW_25 + T2_lo * TWO_POW_7
|
|
|
|
D_lo = bit32_bxor(C5_lo, C2_lo * 2 + (C2_hi % TWO_POW_32 - C2_hi % TWO_POW_31) / TWO_POW_31)
|
|
D_hi = bit32_bxor(C5_hi, C2_hi * 2 + (C2_lo % TWO_POW_32 - C2_lo % TWO_POW_31) / TWO_POW_31)
|
|
|
|
T1_lo = bit32_bxor(D_lo, L06_lo)
|
|
T1_hi = bit32_bxor(D_hi, L06_hi)
|
|
T2_lo = bit32_bxor(D_lo, L11_lo)
|
|
T2_hi = bit32_bxor(D_hi, L11_hi)
|
|
T3_lo = bit32_bxor(D_lo, L16_lo)
|
|
T3_hi = bit32_bxor(D_hi, L16_hi)
|
|
T4_lo = bit32_bxor(D_lo, L21_lo)
|
|
T4_hi = bit32_bxor(D_hi, L21_hi)
|
|
|
|
L06_lo = T2_lo * TWO_POW_3 + (T2_hi % TWO_POW_32 - T2_hi % TWO_POW_29) / TWO_POW_29
|
|
L06_hi = T2_hi * TWO_POW_3 + (T2_lo % TWO_POW_32 - T2_lo % TWO_POW_29) / TWO_POW_29
|
|
L11_lo = T4_lo * TWO_POW_18 + (T4_hi % TWO_POW_32 - T4_hi % TWO_POW_14) / TWO_POW_14
|
|
L11_hi = T4_hi * TWO_POW_18 + (T4_lo % TWO_POW_32 - T4_lo % TWO_POW_14) / TWO_POW_14
|
|
L16_lo = (T1_lo % TWO_POW_32 - T1_lo % TWO_POW_28) / TWO_POW_28 + T1_hi * TWO_POW_4
|
|
L16_hi = (T1_hi % TWO_POW_32 - T1_hi % TWO_POW_28) / TWO_POW_28 + T1_lo * TWO_POW_4
|
|
L21_lo = (T3_lo % TWO_POW_32 - T3_lo % TWO_POW_23) / TWO_POW_23 + T3_hi * TWO_POW_9
|
|
L21_hi = (T3_hi % TWO_POW_32 - T3_hi % TWO_POW_23) / TWO_POW_23 + T3_lo * TWO_POW_9
|
|
|
|
L01_lo = bit32_bxor(D_lo, L01_lo)
|
|
L01_hi = bit32_bxor(D_hi, L01_hi)
|
|
L01_lo, L02_lo, L03_lo, L04_lo, L05_lo =
|
|
bit32_bxor(L01_lo, bit32_band(-1 - L02_lo, L03_lo)),
|
|
bit32_bxor(L02_lo, bit32_band(-1 - L03_lo, L04_lo)),
|
|
bit32_bxor(L03_lo, bit32_band(-1 - L04_lo, L05_lo)),
|
|
bit32_bxor(L04_lo, bit32_band(-1 - L05_lo, L01_lo)),
|
|
bit32_bxor(L05_lo, bit32_band(-1 - L01_lo, L02_lo))
|
|
L01_hi, L02_hi, L03_hi, L04_hi, L05_hi =
|
|
bit32_bxor(L01_hi, bit32_band(-1 - L02_hi, L03_hi)),
|
|
bit32_bxor(L02_hi, bit32_band(-1 - L03_hi, L04_hi)),
|
|
bit32_bxor(L03_hi, bit32_band(-1 - L04_hi, L05_hi)),
|
|
bit32_bxor(L04_hi, bit32_band(-1 - L05_hi, L01_hi)),
|
|
bit32_bxor(L05_hi, bit32_band(-1 - L01_hi, L02_hi))
|
|
L06_lo, L07_lo, L08_lo, L09_lo, L10_lo =
|
|
bit32_bxor(L09_lo, bit32_band(-1 - L10_lo, L06_lo)),
|
|
bit32_bxor(L10_lo, bit32_band(-1 - L06_lo, L07_lo)),
|
|
bit32_bxor(L06_lo, bit32_band(-1 - L07_lo, L08_lo)),
|
|
bit32_bxor(L07_lo, bit32_band(-1 - L08_lo, L09_lo)),
|
|
bit32_bxor(L08_lo, bit32_band(-1 - L09_lo, L10_lo))
|
|
L06_hi, L07_hi, L08_hi, L09_hi, L10_hi =
|
|
bit32_bxor(L09_hi, bit32_band(-1 - L10_hi, L06_hi)),
|
|
bit32_bxor(L10_hi, bit32_band(-1 - L06_hi, L07_hi)),
|
|
bit32_bxor(L06_hi, bit32_band(-1 - L07_hi, L08_hi)),
|
|
bit32_bxor(L07_hi, bit32_band(-1 - L08_hi, L09_hi)),
|
|
bit32_bxor(L08_hi, bit32_band(-1 - L09_hi, L10_hi))
|
|
L11_lo, L12_lo, L13_lo, L14_lo, L15_lo =
|
|
bit32_bxor(L12_lo, bit32_band(-1 - L13_lo, L14_lo)),
|
|
bit32_bxor(L13_lo, bit32_band(-1 - L14_lo, L15_lo)),
|
|
bit32_bxor(L14_lo, bit32_band(-1 - L15_lo, L11_lo)),
|
|
bit32_bxor(L15_lo, bit32_band(-1 - L11_lo, L12_lo)),
|
|
bit32_bxor(L11_lo, bit32_band(-1 - L12_lo, L13_lo))
|
|
L11_hi, L12_hi, L13_hi, L14_hi, L15_hi =
|
|
bit32_bxor(L12_hi, bit32_band(-1 - L13_hi, L14_hi)),
|
|
bit32_bxor(L13_hi, bit32_band(-1 - L14_hi, L15_hi)),
|
|
bit32_bxor(L14_hi, bit32_band(-1 - L15_hi, L11_hi)),
|
|
bit32_bxor(L15_hi, bit32_band(-1 - L11_hi, L12_hi)),
|
|
bit32_bxor(L11_hi, bit32_band(-1 - L12_hi, L13_hi))
|
|
L16_lo, L17_lo, L18_lo, L19_lo, L20_lo =
|
|
bit32_bxor(L20_lo, bit32_band(-1 - L16_lo, L17_lo)),
|
|
bit32_bxor(L16_lo, bit32_band(-1 - L17_lo, L18_lo)),
|
|
bit32_bxor(L17_lo, bit32_band(-1 - L18_lo, L19_lo)),
|
|
bit32_bxor(L18_lo, bit32_band(-1 - L19_lo, L20_lo)),
|
|
bit32_bxor(L19_lo, bit32_band(-1 - L20_lo, L16_lo))
|
|
L16_hi, L17_hi, L18_hi, L19_hi, L20_hi =
|
|
bit32_bxor(L20_hi, bit32_band(-1 - L16_hi, L17_hi)),
|
|
bit32_bxor(L16_hi, bit32_band(-1 - L17_hi, L18_hi)),
|
|
bit32_bxor(L17_hi, bit32_band(-1 - L18_hi, L19_hi)),
|
|
bit32_bxor(L18_hi, bit32_band(-1 - L19_hi, L20_hi)),
|
|
bit32_bxor(L19_hi, bit32_band(-1 - L20_hi, L16_hi))
|
|
L21_lo, L22_lo, L23_lo, L24_lo, L25_lo =
|
|
bit32_bxor(L23_lo, bit32_band(-1 - L24_lo, L25_lo)),
|
|
bit32_bxor(L24_lo, bit32_band(-1 - L25_lo, L21_lo)),
|
|
bit32_bxor(L25_lo, bit32_band(-1 - L21_lo, L22_lo)),
|
|
bit32_bxor(L21_lo, bit32_band(-1 - L22_lo, L23_lo)),
|
|
bit32_bxor(L22_lo, bit32_band(-1 - L23_lo, L24_lo))
|
|
L21_hi, L22_hi, L23_hi, L24_hi, L25_hi =
|
|
bit32_bxor(L23_hi, bit32_band(-1 - L24_hi, L25_hi)),
|
|
bit32_bxor(L24_hi, bit32_band(-1 - L25_hi, L21_hi)),
|
|
bit32_bxor(L25_hi, bit32_band(-1 - L21_hi, L22_hi)),
|
|
bit32_bxor(L21_hi, bit32_band(-1 - L22_hi, L23_hi)),
|
|
bit32_bxor(L22_hi, bit32_band(-1 - L23_hi, L24_hi))
|
|
L01_lo = bit32_bxor(L01_lo, RC_lo[round_idx])
|
|
L01_hi = L01_hi + RC_hi[round_idx] -- RC_hi[] is either 0 or 0x80000000, so we could use fast addition instead of slow XOR
|
|
end
|
|
|
|
lanes_lo[1] = L01_lo
|
|
lanes_hi[1] = L01_hi
|
|
lanes_lo[2] = L02_lo
|
|
lanes_hi[2] = L02_hi
|
|
lanes_lo[3] = L03_lo
|
|
lanes_hi[3] = L03_hi
|
|
lanes_lo[4] = L04_lo
|
|
lanes_hi[4] = L04_hi
|
|
lanes_lo[5] = L05_lo
|
|
lanes_hi[5] = L05_hi
|
|
lanes_lo[6] = L06_lo
|
|
lanes_hi[6] = L06_hi
|
|
lanes_lo[7] = L07_lo
|
|
lanes_hi[7] = L07_hi
|
|
lanes_lo[8] = L08_lo
|
|
lanes_hi[8] = L08_hi
|
|
lanes_lo[9] = L09_lo
|
|
lanes_hi[9] = L09_hi
|
|
lanes_lo[10] = L10_lo
|
|
lanes_hi[10] = L10_hi
|
|
lanes_lo[11] = L11_lo
|
|
lanes_hi[11] = L11_hi
|
|
lanes_lo[12] = L12_lo
|
|
lanes_hi[12] = L12_hi
|
|
lanes_lo[13] = L13_lo
|
|
lanes_hi[13] = L13_hi
|
|
lanes_lo[14] = L14_lo
|
|
lanes_hi[14] = L14_hi
|
|
lanes_lo[15] = L15_lo
|
|
lanes_hi[15] = L15_hi
|
|
lanes_lo[16] = L16_lo
|
|
lanes_hi[16] = L16_hi
|
|
lanes_lo[17] = L17_lo
|
|
lanes_hi[17] = L17_hi
|
|
lanes_lo[18] = L18_lo
|
|
lanes_hi[18] = L18_hi
|
|
lanes_lo[19] = L19_lo
|
|
lanes_hi[19] = L19_hi
|
|
lanes_lo[20] = L20_lo
|
|
lanes_hi[20] = L20_hi
|
|
lanes_lo[21] = L21_lo
|
|
lanes_hi[21] = L21_hi
|
|
lanes_lo[22] = L22_lo
|
|
lanes_hi[22] = L22_hi
|
|
lanes_lo[23] = L23_lo
|
|
lanes_hi[23] = L23_hi
|
|
lanes_lo[24] = L24_lo
|
|
lanes_hi[24] = L24_hi
|
|
lanes_lo[25] = L25_lo
|
|
lanes_hi[25] = L25_hi
|
|
end
|
|
end
|
|
|
|
--------------------------------------------------------------------------------
|
|
-- MAGIC NUMBERS CALCULATOR
|
|
--------------------------------------------------------------------------------
|
|
-- Q:
|
|
-- Is 53-bit "double" math enough to calculate square roots and cube roots of primes with 64 correct bits after decimal point?
|
|
-- A:
|
|
-- Yes, 53-bit "double" arithmetic is enough.
|
|
-- We could obtain first 40 bits by direct calculation of p^(1/3) and next 40 bits by one step of Newton's method.
|
|
do
|
|
local function mul(src1, src2, factor, result_length)
|
|
-- src1, src2 - long integers (arrays of digits in base TWO_POW_24)
|
|
-- factor - small integer
|
|
-- returns long integer result (src1 * src2 * factor) and its floating point approximation
|
|
local result, carry, value, weight = table.create(result_length), 0, 0, 1
|
|
for j = 1, result_length do
|
|
for k = math.max(1, j + 1 - #src2), math.min(j, #src1) do
|
|
carry = carry + factor * src1[k] * src2[j + 1 - k] -- "int32" is not enough for multiplication result, that's why "factor" must be of type "double"
|
|
end
|
|
|
|
local digit = carry % TWO_POW_24
|
|
result[j] = math.floor(digit)
|
|
carry = (carry - digit) / TWO_POW_24
|
|
value = value + digit * weight
|
|
weight = weight * TWO_POW_24
|
|
end
|
|
|
|
return result, value
|
|
end
|
|
|
|
local idx, step, p, one, sqrt_hi, sqrt_lo = 0, { 4, 1, 2, -2, 2 }, 4, { 1 }, sha2_H_hi, sha2_H_lo
|
|
repeat
|
|
p = p + step[p % 6]
|
|
local d = 1
|
|
repeat
|
|
d = d + step[d % 6]
|
|
if d * d > p then
|
|
-- next prime number is found
|
|
local root = p ^ (1 / 3)
|
|
local R = root * TWO_POW_40
|
|
R = mul(table.create(1, math.floor(R)), one, 1, 2)
|
|
local _, delta = mul(R, mul(R, R, 1, 4), -1, 4)
|
|
local hi = R[2] % 65536 * 65536 + math.floor(R[1] / 256)
|
|
local lo = R[1] % 256 * 16777216 + math.floor(delta * (TWO_POW_NEG_56 / 3) * root / p)
|
|
|
|
if idx < 16 then
|
|
root = math.sqrt(p)
|
|
R = root * TWO_POW_40
|
|
R = mul(table.create(1, math.floor(R)), one, 1, 2)
|
|
_, delta = mul(R, R, -1, 2)
|
|
local hi = R[2] % 65536 * 65536 + math.floor(R[1] / 256)
|
|
local lo = R[1] % 256 * 16777216 + math.floor(delta * TWO_POW_NEG_17 / root)
|
|
local idx = idx % 8 + 1
|
|
sha2_H_ext256[224][idx] = lo
|
|
sqrt_hi[idx], sqrt_lo[idx] = hi, lo + hi * hi_factor
|
|
if idx > 7 then
|
|
sqrt_hi, sqrt_lo = sha2_H_ext512_hi[384], sha2_H_ext512_lo[384]
|
|
end
|
|
end
|
|
|
|
idx = idx + 1
|
|
sha2_K_hi[idx], sha2_K_lo[idx] = hi, lo % K_lo_modulo + hi * hi_factor
|
|
break
|
|
end
|
|
until p % d == 0
|
|
until idx > 79
|
|
end
|
|
|
|
-- Calculating IVs for SHA512/224 and SHA512/256
|
|
for width = 224, 256, 32 do
|
|
local H_lo, H_hi = {}, nil
|
|
if XOR64A5 then
|
|
for j = 1, 8 do
|
|
H_lo[j] = XOR64A5(sha2_H_lo[j])
|
|
end
|
|
else
|
|
H_hi = {}
|
|
for j = 1, 8 do
|
|
H_lo[j] = bit32_bxor(sha2_H_lo[j], 0xA5A5A5A5) % 4294967296
|
|
H_hi[j] = bit32_bxor(sha2_H_hi[j], 0xA5A5A5A5) % 4294967296
|
|
end
|
|
end
|
|
|
|
sha512_feed_128(H_lo, H_hi, "SHA-512/" .. tostring(width) .. "\128" .. string.rep("\0", 115) .. "\88", 0, 128)
|
|
sha2_H_ext512_lo[width] = H_lo
|
|
sha2_H_ext512_hi[width] = H_hi
|
|
end
|
|
|
|
-- Constants for MD5
|
|
do
|
|
for idx = 1, 64 do
|
|
-- we can't use formula math.floor(abs(sin(idx))*TWO_POW_32) because its result may be beyond integer range on Lua built with 32-bit integers
|
|
local hi, lo = math.modf(math.abs(math.sin(idx)) * TWO_POW_16)
|
|
md5_K[idx] = hi * 65536 + math.floor(lo * TWO_POW_16)
|
|
end
|
|
end
|
|
|
|
-- Constants for SHA3
|
|
do
|
|
local sh_reg = 29
|
|
local function next_bit()
|
|
local r = sh_reg % 2
|
|
sh_reg = bit32_bxor((sh_reg - r) / 2, 142 * r)
|
|
return r
|
|
end
|
|
|
|
for idx = 1, 24 do
|
|
local lo, m = 0, nil
|
|
for _ = 1, 6 do
|
|
m = m and m * m * 2 or 1
|
|
lo = lo + next_bit() * m
|
|
end
|
|
|
|
local hi = next_bit() * m
|
|
sha3_RC_hi[idx], sha3_RC_lo[idx] = hi, lo + hi * hi_factor_keccak
|
|
end
|
|
end
|
|
|
|
--------------------------------------------------------------------------------
|
|
-- MAIN FUNCTIONS
|
|
--------------------------------------------------------------------------------
|
|
local function sha256ext(width, message)
|
|
-- Create an instance (private objects for current calculation)
|
|
local Array256 = sha2_H_ext256[width] -- # == 8
|
|
local length, tail = 0, ""
|
|
local H = table.create(8)
|
|
H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8] =
|
|
Array256[1], Array256[2], Array256[3], Array256[4], Array256[5], Array256[6], Array256[7], Array256[8]
|
|
|
|
local function partial(message_part)
|
|
if message_part then
|
|
local partLength = #message_part
|
|
if tail then
|
|
length = length + partLength
|
|
local offs = 0
|
|
local tailLength = #tail
|
|
if tail ~= "" and tailLength + partLength >= 64 then
|
|
offs = 64 - tailLength
|
|
sha256_feed_64(H, tail .. string.sub(message_part, 1, offs), 0, 64)
|
|
tail = ""
|
|
end
|
|
|
|
local size = partLength - offs
|
|
local size_tail = size % 64
|
|
sha256_feed_64(H, message_part, offs, size - size_tail)
|
|
tail = tail .. string.sub(message_part, partLength + 1 - size_tail)
|
|
return partial
|
|
else
|
|
error("Adding more chunks is not allowed after receiving the result", 2)
|
|
end
|
|
else
|
|
if tail then
|
|
local final_blocks = table.create(10) --{tail, "\128", string.rep("\0", (-9 - length) % 64 + 1)}
|
|
final_blocks[1] = tail
|
|
final_blocks[2] = "\128"
|
|
final_blocks[3] = string.rep("\0", (-9 - length) % 64 + 1)
|
|
|
|
tail = nil
|
|
-- Assuming user data length is shorter than (TWO_POW_53)-9 bytes
|
|
-- Anyway, it looks very unrealistic that someone would spend more than a year of calculations to process TWO_POW_53 bytes of data by using this Lua script :-)
|
|
-- TWO_POW_53 bytes = TWO_POW_56 bits, so "bit-counter" fits in 7 bytes
|
|
length = length * (8 / TWO56_POW_7) -- convert "byte-counter" to "bit-counter" and move decimal point to the left
|
|
for j = 4, 10 do
|
|
length = length % 1 * 256
|
|
final_blocks[j] = string.char(math.floor(length))
|
|
end
|
|
|
|
final_blocks = table.concat(final_blocks)
|
|
sha256_feed_64(H, final_blocks, 0, #final_blocks)
|
|
local max_reg = width / 32
|
|
for j = 1, max_reg do
|
|
H[j] = string.format("%08x", H[j] % 4294967296)
|
|
end
|
|
|
|
H = table.concat(H, "", 1, max_reg)
|
|
end
|
|
|
|
return H
|
|
end
|
|
end
|
|
|
|
if message then
|
|
-- Actually perform calculations and return the SHA256 digest of a message
|
|
return partial(message)()
|
|
else
|
|
-- Return function for chunk-by-chunk loading
|
|
-- User should feed every chunk of input data as single argument to this function and finally get SHA256 digest by invoking this function without an argument
|
|
return partial
|
|
end
|
|
end
|
|
|
|
local function sha512ext(width, message)
|
|
-- Create an instance (private objects for current calculation)
|
|
local length, tail, H_lo, H_hi =
|
|
0,
|
|
"",
|
|
table.pack(table.unpack(sha2_H_ext512_lo[width])),
|
|
not HEX64 and table.pack(table.unpack(sha2_H_ext512_hi[width]))
|
|
|
|
local function partial(message_part)
|
|
if message_part then
|
|
local partLength = #message_part
|
|
if tail then
|
|
length = length + partLength
|
|
local offs = 0
|
|
if tail ~= "" and #tail + partLength >= 128 then
|
|
offs = 128 - #tail
|
|
sha512_feed_128(H_lo, H_hi, tail .. string.sub(message_part, 1, offs), 0, 128)
|
|
tail = ""
|
|
end
|
|
|
|
local size = partLength - offs
|
|
local size_tail = size % 128
|
|
sha512_feed_128(H_lo, H_hi, message_part, offs, size - size_tail)
|
|
tail = tail .. string.sub(message_part, partLength + 1 - size_tail)
|
|
return partial
|
|
else
|
|
error("Adding more chunks is not allowed after receiving the result", 2)
|
|
end
|
|
else
|
|
if tail then
|
|
local final_blocks = table.create(3) --{tail, "\128", string.rep("\0", (-17-length) % 128 + 9)}
|
|
final_blocks[1] = tail
|
|
final_blocks[2] = "\128"
|
|
final_blocks[3] = string.rep("\0", (-17 - length) % 128 + 9)
|
|
|
|
tail = nil
|
|
-- Assuming user data length is shorter than (TWO_POW_53)-17 bytes
|
|
-- TWO_POW_53 bytes = TWO_POW_56 bits, so "bit-counter" fits in 7 bytes
|
|
length = length * (8 / TWO56_POW_7) -- convert "byte-counter" to "bit-counter" and move floating point to the left
|
|
for j = 4, 10 do
|
|
length = length % 1 * 256
|
|
final_blocks[j] = string.char(math.floor(length))
|
|
end
|
|
|
|
final_blocks = table.concat(final_blocks)
|
|
sha512_feed_128(H_lo, H_hi, final_blocks, 0, #final_blocks)
|
|
local max_reg = math.ceil(width / 64)
|
|
|
|
if HEX64 then
|
|
for j = 1, max_reg do
|
|
H_lo[j] = HEX64(H_lo[j])
|
|
end
|
|
else
|
|
for j = 1, max_reg do
|
|
H_lo[j] = string.format("%08x", H_hi[j] % 4294967296)
|
|
.. string.format("%08x", H_lo[j] % 4294967296)
|
|
end
|
|
|
|
H_hi = nil
|
|
end
|
|
|
|
H_lo = string.sub(table.concat(H_lo, "", 1, max_reg), 1, width / 4)
|
|
end
|
|
|
|
return H_lo
|
|
end
|
|
end
|
|
|
|
if message then
|
|
-- Actually perform calculations and return the SHA512 digest of a message
|
|
return partial(message)()
|
|
else
|
|
-- Return function for chunk-by-chunk loading
|
|
-- User should feed every chunk of input data as single argument to this function and finally get SHA512 digest by invoking this function without an argument
|
|
return partial
|
|
end
|
|
end
|
|
|
|
local function md5(message)
|
|
-- Create an instance (private objects for current calculation)
|
|
local H, length, tail = table.create(4), 0, ""
|
|
H[1], H[2], H[3], H[4] = md5_sha1_H[1], md5_sha1_H[2], md5_sha1_H[3], md5_sha1_H[4]
|
|
|
|
local function partial(message_part)
|
|
if message_part then
|
|
local partLength = #message_part
|
|
if tail then
|
|
length = length + partLength
|
|
local offs = 0
|
|
if tail ~= "" and #tail + partLength >= 64 then
|
|
offs = 64 - #tail
|
|
md5_feed_64(H, tail .. string.sub(message_part, 1, offs), 0, 64)
|
|
tail = ""
|
|
end
|
|
|
|
local size = partLength - offs
|
|
local size_tail = size % 64
|
|
md5_feed_64(H, message_part, offs, size - size_tail)
|
|
tail = tail .. string.sub(message_part, partLength + 1 - size_tail)
|
|
return partial
|
|
else
|
|
error("Adding more chunks is not allowed after receiving the result", 2)
|
|
end
|
|
else
|
|
if tail then
|
|
local final_blocks = table.create(3) --{tail, "\128", string.rep("\0", (-9 - length) % 64)}
|
|
final_blocks[1] = tail
|
|
final_blocks[2] = "\128"
|
|
final_blocks[3] = string.rep("\0", (-9 - length) % 64)
|
|
tail = nil
|
|
length = length * 8 -- convert "byte-counter" to "bit-counter"
|
|
for j = 4, 11 do
|
|
local low_byte = length % 256
|
|
final_blocks[j] = string.char(low_byte)
|
|
length = (length - low_byte) / 256
|
|
end
|
|
|
|
final_blocks = table.concat(final_blocks)
|
|
md5_feed_64(H, final_blocks, 0, #final_blocks)
|
|
for j = 1, 4 do
|
|
H[j] = string.format("%08x", H[j] % 4294967296)
|
|
end
|
|
|
|
H = string.gsub(table.concat(H), "(..)(..)(..)(..)", "%4%3%2%1")
|
|
end
|
|
|
|
return H
|
|
end
|
|
end
|
|
|
|
if message then
|
|
-- Actually perform calculations and return the MD5 digest of a message
|
|
return partial(message)()
|
|
else
|
|
-- Return function for chunk-by-chunk loading
|
|
-- User should feed every chunk of input data as single argument to this function and finally get MD5 digest by invoking this function without an argument
|
|
return partial
|
|
end
|
|
end
|
|
|
|
local function sha1(message)
|
|
-- Create an instance (private objects for current calculation)
|
|
local H, length, tail = table.pack(table.unpack(md5_sha1_H)), 0, ""
|
|
|
|
local function partial(message_part)
|
|
if message_part then
|
|
local partLength = #message_part
|
|
if tail then
|
|
length = length + partLength
|
|
local offs = 0
|
|
if tail ~= "" and #tail + partLength >= 64 then
|
|
offs = 64 - #tail
|
|
sha1_feed_64(H, tail .. string.sub(message_part, 1, offs), 0, 64)
|
|
tail = ""
|
|
end
|
|
|
|
local size = partLength - offs
|
|
local size_tail = size % 64
|
|
sha1_feed_64(H, message_part, offs, size - size_tail)
|
|
tail = tail .. string.sub(message_part, partLength + 1 - size_tail)
|
|
return partial
|
|
else
|
|
error("Adding more chunks is not allowed after receiving the result", 2)
|
|
end
|
|
else
|
|
if tail then
|
|
local final_blocks = table.create(10) --{tail, "\128", string.rep("\0", (-9 - length) % 64 + 1)}
|
|
final_blocks[1] = tail
|
|
final_blocks[2] = "\128"
|
|
final_blocks[3] = string.rep("\0", (-9 - length) % 64 + 1)
|
|
tail = nil
|
|
|
|
-- Assuming user data length is shorter than (TWO_POW_53)-9 bytes
|
|
-- TWO_POW_53 bytes = TWO_POW_56 bits, so "bit-counter" fits in 7 bytes
|
|
length = length * (8 / TWO56_POW_7) -- convert "byte-counter" to "bit-counter" and move decimal point to the left
|
|
for j = 4, 10 do
|
|
length = length % 1 * 256
|
|
final_blocks[j] = string.char(math.floor(length))
|
|
end
|
|
|
|
final_blocks = table.concat(final_blocks)
|
|
sha1_feed_64(H, final_blocks, 0, #final_blocks)
|
|
for j = 1, 5 do
|
|
H[j] = string.format("%08x", H[j] % 4294967296)
|
|
end
|
|
|
|
H = table.concat(H)
|
|
end
|
|
|
|
return H
|
|
end
|
|
end
|
|
|
|
if message then
|
|
-- Actually perform calculations and return the SHA-1 digest of a message
|
|
return partial(message)()
|
|
else
|
|
-- Return function for chunk-by-chunk loading
|
|
-- User should feed every chunk of input data as single argument to this function and finally get SHA-1 digest by invoking this function without an argument
|
|
return partial
|
|
end
|
|
end
|
|
|
|
local function keccak(block_size_in_bytes, digest_size_in_bytes, is_SHAKE, message)
|
|
-- "block_size_in_bytes" is multiple of 8
|
|
if type(digest_size_in_bytes) ~= "number" then
|
|
-- arguments in SHAKE are swapped:
|
|
-- NIST FIPS 202 defines SHAKE(message,num_bits)
|
|
-- this module defines SHAKE(num_bytes,message)
|
|
-- it's easy to forget about this swap, hence the check
|
|
error("Argument 'digest_size_in_bytes' must be a number", 2)
|
|
end
|
|
|
|
-- Create an instance (private objects for current calculation)
|
|
local tail, lanes_lo, lanes_hi = "", table.create(25, 0), hi_factor_keccak == 0 and table.create(25, 0)
|
|
local result
|
|
|
|
--~ pad the input N using the pad function, yielding a padded bit string P with a length divisible by r (such that n = len(P)/r is integer),
|
|
--~ break P into n consecutive r-bit pieces P0, ..., Pn-1 (last is zero-padded)
|
|
--~ initialize the state S to a string of b 0 bits.
|
|
--~ absorb the input into the state: For each block Pi,
|
|
--~ extend Pi at the end by a string of c 0 bits, yielding one of length b,
|
|
--~ XOR that with S and
|
|
--~ apply the block permutation f to the result, yielding a new state S
|
|
--~ initialize Z to be the empty string
|
|
--~ while the length of Z is less than d:
|
|
--~ append the first r bits of S to Z
|
|
--~ if Z is still less than d bits long, apply f to S, yielding a new state S.
|
|
--~ truncate Z to d bits
|
|
local function partial(message_part)
|
|
if message_part then
|
|
local partLength = #message_part
|
|
if tail then
|
|
local offs = 0
|
|
if tail ~= "" and #tail + partLength >= block_size_in_bytes then
|
|
offs = block_size_in_bytes - #tail
|
|
keccak_feed(
|
|
lanes_lo,
|
|
lanes_hi,
|
|
tail .. string.sub(message_part, 1, offs),
|
|
0,
|
|
block_size_in_bytes,
|
|
block_size_in_bytes
|
|
)
|
|
tail = ""
|
|
end
|
|
|
|
local size = partLength - offs
|
|
local size_tail = size % block_size_in_bytes
|
|
keccak_feed(lanes_lo, lanes_hi, message_part, offs, size - size_tail, block_size_in_bytes)
|
|
tail = tail .. string.sub(message_part, partLength + 1 - size_tail)
|
|
return partial
|
|
else
|
|
error("Adding more chunks is not allowed after receiving the result", 2)
|
|
end
|
|
else
|
|
if tail then
|
|
-- append the following bits to the message: for usual SHA3: 011(0*)1, for SHAKE: 11111(0*)1
|
|
local gap_start = is_SHAKE and 31 or 6
|
|
tail = tail
|
|
.. (
|
|
#tail + 1 == block_size_in_bytes and string.char(gap_start + 128)
|
|
or string.char(gap_start) .. string.rep("\0", (-2 - #tail) % block_size_in_bytes) .. "\128"
|
|
)
|
|
keccak_feed(lanes_lo, lanes_hi, tail, 0, #tail, block_size_in_bytes)
|
|
tail = nil
|
|
|
|
local lanes_used = 0
|
|
local total_lanes = math.floor(block_size_in_bytes / 8)
|
|
local qwords = {}
|
|
|
|
local function get_next_qwords_of_digest(qwords_qty)
|
|
-- returns not more than 'qwords_qty' qwords ('qwords_qty' might be non-integer)
|
|
-- doesn't go across keccak-buffer boundary
|
|
-- block_size_in_bytes is a multiple of 8, so, keccak-buffer contains integer number of qwords
|
|
if lanes_used >= total_lanes then
|
|
keccak_feed(lanes_lo, lanes_hi, "\0\0\0\0\0\0\0\0", 0, 8, 8)
|
|
lanes_used = 0
|
|
end
|
|
|
|
qwords_qty = math.floor(math.min(qwords_qty, total_lanes - lanes_used))
|
|
if hi_factor_keccak ~= 0 then
|
|
for j = 1, qwords_qty do
|
|
qwords[j] = HEX64(lanes_lo[lanes_used + j - 1 + lanes_index_base])
|
|
end
|
|
else
|
|
for j = 1, qwords_qty do
|
|
qwords[j] = string.format("%08x", lanes_hi[lanes_used + j] % 4294967296)
|
|
.. string.format("%08x", lanes_lo[lanes_used + j] % 4294967296)
|
|
end
|
|
end
|
|
|
|
lanes_used = lanes_used + qwords_qty
|
|
return string.gsub(
|
|
table.concat(qwords, "", 1, qwords_qty),
|
|
"(..)(..)(..)(..)(..)(..)(..)(..)",
|
|
"%8%7%6%5%4%3%2%1"
|
|
),
|
|
qwords_qty * 8
|
|
end
|
|
|
|
local parts = {} -- digest parts
|
|
local last_part, last_part_size = "", 0
|
|
|
|
local function get_next_part_of_digest(bytes_needed)
|
|
-- returns 'bytes_needed' bytes, for arbitrary integer 'bytes_needed'
|
|
bytes_needed = bytes_needed or 1
|
|
if bytes_needed <= last_part_size then
|
|
last_part_size = last_part_size - bytes_needed
|
|
local part_size_in_nibbles = bytes_needed * 2
|
|
local result = string.sub(last_part, 1, part_size_in_nibbles)
|
|
last_part = string.sub(last_part, part_size_in_nibbles + 1)
|
|
return result
|
|
end
|
|
|
|
local parts_qty = 0
|
|
if last_part_size > 0 then
|
|
parts_qty = 1
|
|
parts[parts_qty] = last_part
|
|
bytes_needed = bytes_needed - last_part_size
|
|
end
|
|
|
|
-- repeats until the length is enough
|
|
while bytes_needed >= 8 do
|
|
local next_part, next_part_size = get_next_qwords_of_digest(bytes_needed / 8)
|
|
parts_qty = parts_qty + 1
|
|
parts[parts_qty] = next_part
|
|
bytes_needed = bytes_needed - next_part_size
|
|
end
|
|
|
|
if bytes_needed > 0 then
|
|
last_part, last_part_size = get_next_qwords_of_digest(1)
|
|
parts_qty = parts_qty + 1
|
|
parts[parts_qty] = get_next_part_of_digest(bytes_needed)
|
|
else
|
|
last_part, last_part_size = "", 0
|
|
end
|
|
|
|
return table.concat(parts, "", 1, parts_qty)
|
|
end
|
|
|
|
if digest_size_in_bytes < 0 then
|
|
result = get_next_part_of_digest
|
|
else
|
|
result = get_next_part_of_digest(digest_size_in_bytes)
|
|
end
|
|
end
|
|
|
|
return result
|
|
end
|
|
end
|
|
|
|
if message then
|
|
-- Actually perform calculations and return the SHA3 digest of a message
|
|
return partial(message)()
|
|
else
|
|
-- Return function for chunk-by-chunk loading
|
|
-- User should feed every chunk of input data as single argument to this function and finally get SHA3 digest by invoking this function without an argument
|
|
return partial
|
|
end
|
|
end
|
|
|
|
local function HexToBinFunction(hh)
|
|
return string.char(tonumber(hh, 16))
|
|
end
|
|
|
|
local function hex2bin(hex_string)
|
|
return (string.gsub(hex_string, "%x%x", HexToBinFunction))
|
|
end
|
|
|
|
local base64_symbols = {
|
|
["+"] = 62,
|
|
["-"] = 62,
|
|
[62] = "+",
|
|
["/"] = 63,
|
|
["_"] = 63,
|
|
[63] = "/",
|
|
["="] = -1,
|
|
["."] = -1,
|
|
[-1] = "=",
|
|
}
|
|
|
|
local symbol_index = 0
|
|
for j, pair in ipairs({ "AZ", "az", "09" }) do
|
|
for ascii = string.byte(pair), string.byte(pair, 2) do
|
|
local ch = string.char(ascii)
|
|
base64_symbols[ch] = symbol_index
|
|
base64_symbols[symbol_index] = ch
|
|
symbol_index = symbol_index + 1
|
|
end
|
|
end
|
|
|
|
local function bin2base64(binary_string)
|
|
local stringLength = #binary_string
|
|
local result = table.create(math.ceil(stringLength / 3))
|
|
local length = 0
|
|
|
|
for pos = 1, #binary_string, 3 do
|
|
local c1, c2, c3, c4 = string.byte(string.sub(binary_string, pos, pos + 2) .. "\0", 1, -1)
|
|
length = length + 1
|
|
result[length] = base64_symbols[math.floor(c1 / 4)]
|
|
.. base64_symbols[c1 % 4 * 16 + math.floor(c2 / 16)]
|
|
.. base64_symbols[c3 and c2 % 16 * 4 + math.floor(c3 / 64) or -1]
|
|
.. base64_symbols[c4 and c3 % 64 or -1]
|
|
end
|
|
|
|
return table.concat(result)
|
|
end
|
|
|
|
local function base642bin(base64_string)
|
|
local result, chars_qty = {}, 3
|
|
for pos, ch in string.gmatch(string.gsub(base64_string, "%s+", ""), "()(.)") do
|
|
local code = base64_symbols[ch]
|
|
if code < 0 then
|
|
chars_qty = chars_qty - 1
|
|
code = 0
|
|
end
|
|
|
|
local idx = pos % 4
|
|
if idx > 0 then
|
|
result[-idx] = code
|
|
else
|
|
local c1 = result[-1] * 4 + math.floor(result[-2] / 16)
|
|
local c2 = (result[-2] % 16) * 16 + math.floor(result[-3] / 4)
|
|
local c3 = (result[-3] % 4) * 64 + code
|
|
result[#result + 1] = string.sub(string.char(c1, c2, c3), 1, chars_qty)
|
|
end
|
|
end
|
|
|
|
return table.concat(result)
|
|
end
|
|
|
|
local block_size_for_HMAC -- this table will be initialized at the end of the module
|
|
--local function pad_and_xor(str, result_length, byte_for_xor)
|
|
-- return string.gsub(str, ".", function(c)
|
|
-- return string.char(bit32_bxor(string.byte(c), byte_for_xor))
|
|
-- end) .. string.rep(string.char(byte_for_xor), result_length - #str)
|
|
--end
|
|
|
|
-- For the sake of speed of converting hexes to strings, there's a map of the conversions here
|
|
local BinaryStringMap = {}
|
|
for Index = 0, 255 do
|
|
BinaryStringMap[string.format("%02x", Index)] = string.char(Index)
|
|
end
|
|
|
|
-- Update 02.14.20 - added AsBinary for easy GameAnalytics replacement.
|
|
local function hmac(hash_func, key, message, AsBinary)
|
|
-- Create an instance (private objects for current calculation)
|
|
local block_size = block_size_for_HMAC[hash_func]
|
|
if not block_size then
|
|
error("Unknown hash function", 2)
|
|
end
|
|
|
|
local KeyLength = #key
|
|
if KeyLength > block_size then
|
|
key = string.gsub(hash_func(key), "%x%x", HexToBinFunction)
|
|
KeyLength = #key
|
|
end
|
|
|
|
local append = hash_func()(string.gsub(key, ".", function(c)
|
|
return string.char(bit32_bxor(string.byte(c), 0x36))
|
|
end) .. string.rep("6", block_size - KeyLength)) -- 6 = string.char(0x36)
|
|
|
|
local result
|
|
|
|
local function partial(message_part)
|
|
if not message_part then
|
|
result = result
|
|
or hash_func(
|
|
string.gsub(key, ".", function(c)
|
|
return string.char(bit32_bxor(string.byte(c), 0x5c))
|
|
end)
|
|
.. string.rep("\\", block_size - KeyLength) -- \ = string.char(0x5c)
|
|
.. (string.gsub(append(), "%x%x", HexToBinFunction))
|
|
)
|
|
|
|
return result
|
|
elseif result then
|
|
error("Adding more chunks is not allowed after receiving the result", 2)
|
|
else
|
|
append(message_part)
|
|
return partial
|
|
end
|
|
end
|
|
|
|
if message then
|
|
-- Actually perform calculations and return the HMAC of a message
|
|
local FinalMessage = partial(message)()
|
|
return AsBinary and (string.gsub(FinalMessage, "%x%x", BinaryStringMap)) or FinalMessage
|
|
else
|
|
-- Return function for chunk-by-chunk loading of a message
|
|
-- User should feed every chunk of the message as single argument to this function and finally get HMAC by invoking this function without an argument
|
|
return partial
|
|
end
|
|
end
|
|
|
|
local sha = {
|
|
md5 = md5,
|
|
sha1 = sha1,
|
|
-- SHA2 hash functions:
|
|
sha224 = function(message)
|
|
return sha256ext(224, message)
|
|
end,
|
|
|
|
sha256 = function(message)
|
|
return sha256ext(256, message)
|
|
end,
|
|
|
|
sha512_224 = function(message)
|
|
return sha512ext(224, message)
|
|
end,
|
|
|
|
sha512_256 = function(message)
|
|
return sha512ext(256, message)
|
|
end,
|
|
|
|
sha384 = function(message)
|
|
return sha512ext(384, message)
|
|
end,
|
|
|
|
sha512 = function(message)
|
|
return sha512ext(512, message)
|
|
end,
|
|
|
|
-- SHA3 hash functions:
|
|
sha3_224 = function(message)
|
|
return keccak((1600 - 2 * 224) / 8, 224 / 8, false, message)
|
|
end,
|
|
|
|
sha3_256 = function(message)
|
|
return keccak((1600 - 2 * 256) / 8, 256 / 8, false, message)
|
|
end,
|
|
|
|
sha3_384 = function(message)
|
|
return keccak((1600 - 2 * 384) / 8, 384 / 8, false, message)
|
|
end,
|
|
|
|
sha3_512 = function(message)
|
|
return keccak((1600 - 2 * 512) / 8, 512 / 8, false, message)
|
|
end,
|
|
|
|
shake128 = function(message, digest_size_in_bytes)
|
|
return keccak((1600 - 2 * 128) / 8, digest_size_in_bytes, true, message)
|
|
end,
|
|
|
|
shake256 = function(message, digest_size_in_bytes)
|
|
return keccak((1600 - 2 * 256) / 8, digest_size_in_bytes, true, message)
|
|
end,
|
|
|
|
-- misc utilities:
|
|
hmac = hmac, -- HMAC(hash_func, key, message) is applicable to any hash function from this module except SHAKE*
|
|
hex_to_bin = hex2bin, -- converts hexadecimal representation to binary string
|
|
base64_to_bin = base642bin, -- converts base64 representation to binary string
|
|
bin_to_base64 = bin2base64, -- converts binary string to base64 representation
|
|
base64_encode = Base64.Encode,
|
|
base64_decode = Base64.Decode,
|
|
}
|
|
|
|
block_size_for_HMAC = {
|
|
[sha.md5] = 64,
|
|
[sha.sha1] = 64,
|
|
[sha.sha224] = 64,
|
|
[sha.sha256] = 64,
|
|
[sha.sha512_224] = 128,
|
|
[sha.sha512_256] = 128,
|
|
[sha.sha384] = 128,
|
|
[sha.sha512] = 128,
|
|
[sha.sha3_224] = (1600 - 2 * 224) / 8,
|
|
[sha.sha3_256] = (1600 - 2 * 256) / 8,
|
|
[sha.sha3_384] = (1600 - 2 * 384) / 8,
|
|
[sha.sha3_512] = (1600 - 2 * 512) / 8,
|
|
}
|
|
|
|
return sha
|