--[=[------------------------------------------------------------------------------------------------------------------------ -- HashLib by Egor Skriptunoff, boatbomber, and howmanysmall -------------------------------------------------------------------------------------------------------------------------- Module was originally written by Egor Skriptunoff and distributed under an MIT license. It can be found here: https://github.com/Egor-Skriptunoff/pure_lua_SHA/blob/master/sha2.lua That version was around 3000 lines long, and supported Lua versions 5.1, 5.2, 5.3, and 5.4, and LuaJIT. Although that is super cool, Roblox only uses Lua 5.1, so that was extreme overkill. I, boatbomber, worked to port it to Roblox in a way that doesn't overcomplicate it with support of unreachable cases. Then, howmanysmall did some final optimizations that really squeeze out all the performance possible. It's gotten stupid fast, thanks to her! After quite a bit of work and benchmarking, this is what we were left with. Enjoy! -------------------------------------------------------------------------------------------------------------------------- DESCRIPTION: This module contains functions to calculate SHA digest: MD5, SHA-1, SHA-224, SHA-256, SHA-512/224, SHA-512/256, SHA-384, SHA-512, SHA3-224, SHA3-256, SHA3-384, SHA3-512, SHAKE128, SHAKE256, HMAC Additionally, it has a few extra utility functions: hex_to_bin base64_to_bin bin_to_base64 Written in pure Lua. USAGE: Input data should be a string Result (SHA digest) is returned in hexadecimal representation as a string of lowercase hex digits. Simplest usage example: local HashLib = require(script.HashLib) local your_hash = HashLib.sha256("your string") API: HashLib.md5 HashLib.sha1 SHA2 hash functions: HashLib.sha224 HashLib.sha256 HashLib.sha512_224 HashLib.sha512_256 HashLib.sha384 HashLib.sha512 SHA3 hash functions: HashLib.sha3_224 HashLib.sha3_256 HashLib.sha3_384 HashLib.sha3_512 HashLib.shake128 HashLib.shake256 Misc utilities: HashLib.hmac (Applicable to any hash function from this module except SHAKE*) HashLib.hex_to_bin HashLib.base64_to_bin HashLib.bin_to_base64 --]=] --------------------------------------------------------------------------- local Base64 = require(script.Base64) -------------------------------------------------------------------------------- -- LOCALIZATION FOR VM OPTIMIZATIONS -------------------------------------------------------------------------------- local ipairs = ipairs -------------------------------------------------------------------------------- -- 32-BIT BITWISE FUNCTIONS -------------------------------------------------------------------------------- -- Only low 32 bits of function arguments matter, high bits are ignored -- The result of all functions (except HEX) is an integer inside "correct range": -- for "bit" library: (-TWO_POW_31)..(TWO_POW_31-1) -- for "bit32" library: 0..(TWO_POW_32-1) local bit32_band = bit32.band -- 2 arguments local bit32_bor = bit32.bor -- 2 arguments local bit32_bxor = bit32.bxor -- 2..5 arguments local bit32_lshift = bit32.lshift -- second argument is integer 0..31 local bit32_rshift = bit32.rshift -- second argument is integer 0..31 local bit32_lrotate = bit32.lrotate -- second argument is integer 0..31 local bit32_rrotate = bit32.rrotate -- second argument is integer 0..31 -------------------------------------------------------------------------------- -- CREATING OPTIMIZED INNER LOOP -------------------------------------------------------------------------------- -- Arrays of SHA2 "magic numbers" (in "INT64" and "FFI" branches "*_lo" arrays contain 64-bit values) local sha2_K_lo, sha2_K_hi, sha2_H_lo, sha2_H_hi, sha3_RC_lo, sha3_RC_hi = {}, {}, {}, {}, {}, {} local sha2_H_ext256 = { [224] = {}, [256] = sha2_H_hi, } local sha2_H_ext512_lo, sha2_H_ext512_hi = { [384] = {}, [512] = sha2_H_lo, }, { [384] = {}, [512] = sha2_H_hi, } local md5_K, md5_sha1_H = {}, { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0 } local md5_next_shift = { 0, 0, 0, 0, 0, 0, 0, 0, 28, 25, 26, 27, 0, 0, 10, 9, 11, 12, 0, 15, 16, 17, 18, 0, 20, 22, 23, 21, } local HEX64, XOR64A5, lanes_index_base -- defined only for branches that internally use 64-bit integers: "INT64" and "FFI" local common_W = {} -- temporary table shared between all calculations (to avoid creating new temporary table every time) local K_lo_modulo, hi_factor, hi_factor_keccak = 4294967296, 0, 0 local TWO_POW_NEG_56 = 2 ^ -56 local TWO_POW_NEG_17 = 2 ^ -17 local TWO_POW_2 = 2 ^ 2 local TWO_POW_3 = 2 ^ 3 local TWO_POW_4 = 2 ^ 4 local TWO_POW_5 = 2 ^ 5 local TWO_POW_6 = 2 ^ 6 local TWO_POW_7 = 2 ^ 7 local TWO_POW_8 = 2 ^ 8 local TWO_POW_9 = 2 ^ 9 local TWO_POW_10 = 2 ^ 10 local TWO_POW_11 = 2 ^ 11 local TWO_POW_12 = 2 ^ 12 local TWO_POW_13 = 2 ^ 13 local TWO_POW_14 = 2 ^ 14 local TWO_POW_15 = 2 ^ 15 local TWO_POW_16 = 2 ^ 16 local TWO_POW_17 = 2 ^ 17 local TWO_POW_18 = 2 ^ 18 local TWO_POW_19 = 2 ^ 19 local TWO_POW_20 = 2 ^ 20 local TWO_POW_21 = 2 ^ 21 local TWO_POW_22 = 2 ^ 22 local TWO_POW_23 = 2 ^ 23 local TWO_POW_24 = 2 ^ 24 local TWO_POW_25 = 2 ^ 25 local TWO_POW_26 = 2 ^ 26 local TWO_POW_27 = 2 ^ 27 local TWO_POW_28 = 2 ^ 28 local TWO_POW_29 = 2 ^ 29 local TWO_POW_30 = 2 ^ 30 local TWO_POW_31 = 2 ^ 31 local TWO_POW_32 = 2 ^ 32 local TWO_POW_40 = 2 ^ 40 local TWO56_POW_7 = 256 ^ 7 -- Implementation for Lua 5.1/5.2 (with or without bitwise library available) local function sha256_feed_64(H, str, offs, size) -- offs >= 0, size >= 0, size is multiple of 64 local W, K = common_W, sha2_K_hi local h1, h2, h3, h4, h5, h6, h7, h8 = H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8] for pos = offs, offs + size - 1, 64 do for j = 1, 16 do pos = pos + 4 local a, b, c, d = string.byte(str, pos - 3, pos) W[j] = ((a * 256 + b) * 256 + c) * 256 + d end for j = 17, 64 do local a, b = W[j - 15], W[j - 2] W[j] = bit32_bxor(bit32_rrotate(a, 7), bit32_lrotate(a, 14), bit32_rshift(a, 3)) + bit32_bxor(bit32_lrotate(b, 15), bit32_lrotate(b, 13), bit32_rshift(b, 10)) + W[j - 7] + W[j - 16] end local a, b, c, d, e, f, g, h = h1, h2, h3, h4, h5, h6, h7, h8 for j = 1, 64 do local z = bit32_bxor(bit32_rrotate(e, 6), bit32_rrotate(e, 11), bit32_lrotate(e, 7)) + bit32_band(e, f) + bit32_band(-1 - e, g) + h + K[j] + W[j] h = g g = f f = e e = z + d d = c c = b b = a a = z + bit32_band(d, c) + bit32_band(a, bit32_bxor(d, c)) + bit32_bxor(bit32_rrotate(a, 2), bit32_rrotate(a, 13), bit32_lrotate(a, 10)) end h1, h2, h3, h4 = (a + h1) % 4294967296, (b + h2) % 4294967296, (c + h3) % 4294967296, (d + h4) % 4294967296 h5, h6, h7, h8 = (e + h5) % 4294967296, (f + h6) % 4294967296, (g + h7) % 4294967296, (h + h8) % 4294967296 end H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8] = h1, h2, h3, h4, h5, h6, h7, h8 end local function sha512_feed_128(H_lo, H_hi, str, offs, size) -- offs >= 0, size >= 0, size is multiple of 128 -- W1_hi, W1_lo, W2_hi, W2_lo, ... Wk_hi = W[2*k-1], Wk_lo = W[2*k] local W, K_lo, K_hi = common_W, sha2_K_lo, sha2_K_hi local h1_lo, h2_lo, h3_lo, h4_lo, h5_lo, h6_lo, h7_lo, h8_lo = H_lo[1], H_lo[2], H_lo[3], H_lo[4], H_lo[5], H_lo[6], H_lo[7], H_lo[8] local h1_hi, h2_hi, h3_hi, h4_hi, h5_hi, h6_hi, h7_hi, h8_hi = H_hi[1], H_hi[2], H_hi[3], H_hi[4], H_hi[5], H_hi[6], H_hi[7], H_hi[8] for pos = offs, offs + size - 1, 128 do for j = 1, 16 * 2 do pos = pos + 4 local a, b, c, d = string.byte(str, pos - 3, pos) W[j] = ((a * 256 + b) * 256 + c) * 256 + d end for jj = 34, 160, 2 do local a_lo, a_hi, b_lo, b_hi = W[jj - 30], W[jj - 31], W[jj - 4], W[jj - 5] local tmp1 = bit32_bxor( bit32_rshift(a_lo, 1) + bit32_lshift(a_hi, 31), bit32_rshift(a_lo, 8) + bit32_lshift(a_hi, 24), bit32_rshift(a_lo, 7) + bit32_lshift(a_hi, 25) ) % 4294967296 + bit32_bxor( bit32_rshift(b_lo, 19) + bit32_lshift(b_hi, 13), bit32_lshift(b_lo, 3) + bit32_rshift(b_hi, 29), bit32_rshift(b_lo, 6) + bit32_lshift(b_hi, 26) ) % 4294967296 + W[jj - 14] + W[jj - 32] local tmp2 = tmp1 % 4294967296 W[jj - 1] = bit32_bxor( bit32_rshift(a_hi, 1) + bit32_lshift(a_lo, 31), bit32_rshift(a_hi, 8) + bit32_lshift(a_lo, 24), bit32_rshift(a_hi, 7) ) + bit32_bxor( bit32_rshift(b_hi, 19) + bit32_lshift(b_lo, 13), bit32_lshift(b_hi, 3) + bit32_rshift(b_lo, 29), bit32_rshift(b_hi, 6) ) + W[jj - 15] + W[jj - 33] + (tmp1 - tmp2) / 4294967296 W[jj] = tmp2 end local a_lo, b_lo, c_lo, d_lo, e_lo, f_lo, g_lo, h_lo = h1_lo, h2_lo, h3_lo, h4_lo, h5_lo, h6_lo, h7_lo, h8_lo local a_hi, b_hi, c_hi, d_hi, e_hi, f_hi, g_hi, h_hi = h1_hi, h2_hi, h3_hi, h4_hi, h5_hi, h6_hi, h7_hi, h8_hi for j = 1, 80 do local jj = 2 * j local tmp1 = bit32_bxor( bit32_rshift(e_lo, 14) + bit32_lshift(e_hi, 18), bit32_rshift(e_lo, 18) + bit32_lshift(e_hi, 14), bit32_lshift(e_lo, 23) + bit32_rshift(e_hi, 9) ) % 4294967296 + (bit32_band(e_lo, f_lo) + bit32_band(-1 - e_lo, g_lo)) % 4294967296 + h_lo + K_lo[j] + W[jj] local z_lo = tmp1 % 4294967296 local z_hi = bit32_bxor( bit32_rshift(e_hi, 14) + bit32_lshift(e_lo, 18), bit32_rshift(e_hi, 18) + bit32_lshift(e_lo, 14), bit32_lshift(e_hi, 23) + bit32_rshift(e_lo, 9) ) + bit32_band(e_hi, f_hi) + bit32_band(-1 - e_hi, g_hi) + h_hi + K_hi[j] + W[jj - 1] + (tmp1 - z_lo) / 4294967296 h_lo = g_lo h_hi = g_hi g_lo = f_lo g_hi = f_hi f_lo = e_lo f_hi = e_hi tmp1 = z_lo + d_lo e_lo = tmp1 % 4294967296 e_hi = z_hi + d_hi + (tmp1 - e_lo) / 4294967296 d_lo = c_lo d_hi = c_hi c_lo = b_lo c_hi = b_hi b_lo = a_lo b_hi = a_hi tmp1 = z_lo + (bit32_band(d_lo, c_lo) + bit32_band(b_lo, bit32_bxor(d_lo, c_lo))) % 4294967296 + bit32_bxor( bit32_rshift(b_lo, 28) + bit32_lshift(b_hi, 4), bit32_lshift(b_lo, 30) + bit32_rshift(b_hi, 2), bit32_lshift(b_lo, 25) + bit32_rshift(b_hi, 7) ) % 4294967296 a_lo = tmp1 % 4294967296 a_hi = z_hi + (bit32_band(d_hi, c_hi) + bit32_band(b_hi, bit32_bxor(d_hi, c_hi))) + bit32_bxor( bit32_rshift(b_hi, 28) + bit32_lshift(b_lo, 4), bit32_lshift(b_hi, 30) + bit32_rshift(b_lo, 2), bit32_lshift(b_hi, 25) + bit32_rshift(b_lo, 7) ) + (tmp1 - a_lo) / 4294967296 end a_lo = h1_lo + a_lo h1_lo = a_lo % 4294967296 h1_hi = (h1_hi + a_hi + (a_lo - h1_lo) / 4294967296) % 4294967296 a_lo = h2_lo + b_lo h2_lo = a_lo % 4294967296 h2_hi = (h2_hi + b_hi + (a_lo - h2_lo) / 4294967296) % 4294967296 a_lo = h3_lo + c_lo h3_lo = a_lo % 4294967296 h3_hi = (h3_hi + c_hi + (a_lo - h3_lo) / 4294967296) % 4294967296 a_lo = h4_lo + d_lo h4_lo = a_lo % 4294967296 h4_hi = (h4_hi + d_hi + (a_lo - h4_lo) / 4294967296) % 4294967296 a_lo = h5_lo + e_lo h5_lo = a_lo % 4294967296 h5_hi = (h5_hi + e_hi + (a_lo - h5_lo) / 4294967296) % 4294967296 a_lo = h6_lo + f_lo h6_lo = a_lo % 4294967296 h6_hi = (h6_hi + f_hi + (a_lo - h6_lo) / 4294967296) % 4294967296 a_lo = h7_lo + g_lo h7_lo = a_lo % 4294967296 h7_hi = (h7_hi + g_hi + (a_lo - h7_lo) / 4294967296) % 4294967296 a_lo = h8_lo + h_lo h8_lo = a_lo % 4294967296 h8_hi = (h8_hi + h_hi + (a_lo - h8_lo) / 4294967296) % 4294967296 end H_lo[1], H_lo[2], H_lo[3], H_lo[4], H_lo[5], H_lo[6], H_lo[7], H_lo[8] = h1_lo, h2_lo, h3_lo, h4_lo, h5_lo, h6_lo, h7_lo, h8_lo H_hi[1], H_hi[2], H_hi[3], H_hi[4], H_hi[5], H_hi[6], H_hi[7], H_hi[8] = h1_hi, h2_hi, h3_hi, h4_hi, h5_hi, h6_hi, h7_hi, h8_hi end local function md5_feed_64(H, str, offs, size) -- offs >= 0, size >= 0, size is multiple of 64 local W, K, md5_next_shift = common_W, md5_K, md5_next_shift local h1, h2, h3, h4 = H[1], H[2], H[3], H[4] for pos = offs, offs + size - 1, 64 do for j = 1, 16 do pos = pos + 4 local a, b, c, d = string.byte(str, pos - 3, pos) W[j] = ((d * 256 + c) * 256 + b) * 256 + a end local a, b, c, d = h1, h2, h3, h4 local s = 25 for j = 1, 16 do local F = bit32_rrotate(bit32_band(b, c) + bit32_band(-1 - b, d) + a + K[j] + W[j], s) + b s = md5_next_shift[s] a = d d = c c = b b = F end s = 27 for j = 17, 32 do local F = bit32_rrotate(bit32_band(d, b) + bit32_band(-1 - d, c) + a + K[j] + W[(5 * j - 4) % 16 + 1], s) + b s = md5_next_shift[s] a = d d = c c = b b = F end s = 28 for j = 33, 48 do local F = bit32_rrotate(bit32_bxor(bit32_bxor(b, c), d) + a + K[j] + W[(3 * j + 2) % 16 + 1], s) + b s = md5_next_shift[s] a = d d = c c = b b = F end s = 26 for j = 49, 64 do local F = bit32_rrotate(bit32_bxor(c, bit32_bor(b, -1 - d)) + a + K[j] + W[(j * 7 - 7) % 16 + 1], s) + b s = md5_next_shift[s] a = d d = c c = b b = F end h1 = (a + h1) % 4294967296 h2 = (b + h2) % 4294967296 h3 = (c + h3) % 4294967296 h4 = (d + h4) % 4294967296 end H[1], H[2], H[3], H[4] = h1, h2, h3, h4 end local function sha1_feed_64(H, str, offs, size) -- offs >= 0, size >= 0, size is multiple of 64 local W = common_W local h1, h2, h3, h4, h5 = H[1], H[2], H[3], H[4], H[5] for pos = offs, offs + size - 1, 64 do for j = 1, 16 do pos = pos + 4 local a, b, c, d = string.byte(str, pos - 3, pos) W[j] = ((a * 256 + b) * 256 + c) * 256 + d end for j = 17, 80 do W[j] = bit32_lrotate(bit32_bxor(W[j - 3], W[j - 8], W[j - 14], W[j - 16]), 1) end local a, b, c, d, e = h1, h2, h3, h4, h5 for j = 1, 20 do local z = bit32_lrotate(a, 5) + bit32_band(b, c) + bit32_band(-1 - b, d) + 0x5A827999 + W[j] + e -- constant = math.floor(TWO_POW_30 * sqrt(2)) e = d d = c c = bit32_rrotate(b, 2) b = a a = z end for j = 21, 40 do local z = bit32_lrotate(a, 5) + bit32_bxor(b, c, d) + 0x6ED9EBA1 + W[j] + e -- TWO_POW_30 * sqrt(3) e = d d = c c = bit32_rrotate(b, 2) b = a a = z end for j = 41, 60 do local z = bit32_lrotate(a, 5) + bit32_band(d, c) + bit32_band(b, bit32_bxor(d, c)) + 0x8F1BBCDC + W[j] + e -- TWO_POW_30 * sqrt(5) e = d d = c c = bit32_rrotate(b, 2) b = a a = z end for j = 61, 80 do local z = bit32_lrotate(a, 5) + bit32_bxor(b, c, d) + 0xCA62C1D6 + W[j] + e -- TWO_POW_30 * sqrt(10) e = d d = c c = bit32_rrotate(b, 2) b = a a = z end h1 = (a + h1) % 4294967296 h2 = (b + h2) % 4294967296 h3 = (c + h3) % 4294967296 h4 = (d + h4) % 4294967296 h5 = (e + h5) % 4294967296 end H[1], H[2], H[3], H[4], H[5] = h1, h2, h3, h4, h5 end local function keccak_feed(lanes_lo, lanes_hi, str, offs, size, block_size_in_bytes) -- This is an example of a Lua function having 79 local variables :-) -- offs >= 0, size >= 0, size is multiple of block_size_in_bytes, block_size_in_bytes is positive multiple of 8 local RC_lo, RC_hi = sha3_RC_lo, sha3_RC_hi local qwords_qty = block_size_in_bytes / 8 for pos = offs, offs + size - 1, block_size_in_bytes do for j = 1, qwords_qty do local a, b, c, d = string.byte(str, pos + 1, pos + 4) lanes_lo[j] = bit32_bxor(lanes_lo[j], ((d * 256 + c) * 256 + b) * 256 + a) pos = pos + 8 a, b, c, d = string.byte(str, pos - 3, pos) lanes_hi[j] = bit32_bxor(lanes_hi[j], ((d * 256 + c) * 256 + b) * 256 + a) end local L01_lo, L01_hi, L02_lo, L02_hi, L03_lo, L03_hi, L04_lo, L04_hi, L05_lo, L05_hi, L06_lo, L06_hi, L07_lo, L07_hi, L08_lo, L08_hi, L09_lo, L09_hi, L10_lo, L10_hi, L11_lo, L11_hi, L12_lo, L12_hi, L13_lo, L13_hi, L14_lo, L14_hi, L15_lo, L15_hi, L16_lo, L16_hi, L17_lo, L17_hi, L18_lo, L18_hi, L19_lo, L19_hi, L20_lo, L20_hi, L21_lo, L21_hi, L22_lo, L22_hi, L23_lo, L23_hi, L24_lo, L24_hi, L25_lo, L25_hi = lanes_lo[1], lanes_hi[1], lanes_lo[2], lanes_hi[2], lanes_lo[3], lanes_hi[3], lanes_lo[4], lanes_hi[4], lanes_lo[5], lanes_hi[5], lanes_lo[6], lanes_hi[6], lanes_lo[7], lanes_hi[7], lanes_lo[8], lanes_hi[8], lanes_lo[9], lanes_hi[9], lanes_lo[10], lanes_hi[10], lanes_lo[11], lanes_hi[11], lanes_lo[12], lanes_hi[12], lanes_lo[13], lanes_hi[13], lanes_lo[14], lanes_hi[14], lanes_lo[15], lanes_hi[15], lanes_lo[16], lanes_hi[16], lanes_lo[17], lanes_hi[17], lanes_lo[18], lanes_hi[18], lanes_lo[19], lanes_hi[19], lanes_lo[20], lanes_hi[20], lanes_lo[21], lanes_hi[21], lanes_lo[22], lanes_hi[22], lanes_lo[23], lanes_hi[23], lanes_lo[24], lanes_hi[24], lanes_lo[25], lanes_hi[25] for round_idx = 1, 24 do local C1_lo = bit32_bxor(L01_lo, L06_lo, L11_lo, L16_lo, L21_lo) local C1_hi = bit32_bxor(L01_hi, L06_hi, L11_hi, L16_hi, L21_hi) local C2_lo = bit32_bxor(L02_lo, L07_lo, L12_lo, L17_lo, L22_lo) local C2_hi = bit32_bxor(L02_hi, L07_hi, L12_hi, L17_hi, L22_hi) local C3_lo = bit32_bxor(L03_lo, L08_lo, L13_lo, L18_lo, L23_lo) local C3_hi = bit32_bxor(L03_hi, L08_hi, L13_hi, L18_hi, L23_hi) local C4_lo = bit32_bxor(L04_lo, L09_lo, L14_lo, L19_lo, L24_lo) local C4_hi = bit32_bxor(L04_hi, L09_hi, L14_hi, L19_hi, L24_hi) local C5_lo = bit32_bxor(L05_lo, L10_lo, L15_lo, L20_lo, L25_lo) local C5_hi = bit32_bxor(L05_hi, L10_hi, L15_hi, L20_hi, L25_hi) local D_lo = bit32_bxor(C1_lo, C3_lo * 2 + (C3_hi % TWO_POW_32 - C3_hi % TWO_POW_31) / TWO_POW_31) local D_hi = bit32_bxor(C1_hi, C3_hi * 2 + (C3_lo % TWO_POW_32 - C3_lo % TWO_POW_31) / TWO_POW_31) local T0_lo = bit32_bxor(D_lo, L02_lo) local T0_hi = bit32_bxor(D_hi, L02_hi) local T1_lo = bit32_bxor(D_lo, L07_lo) local T1_hi = bit32_bxor(D_hi, L07_hi) local T2_lo = bit32_bxor(D_lo, L12_lo) local T2_hi = bit32_bxor(D_hi, L12_hi) local T3_lo = bit32_bxor(D_lo, L17_lo) local T3_hi = bit32_bxor(D_hi, L17_hi) local T4_lo = bit32_bxor(D_lo, L22_lo) local T4_hi = bit32_bxor(D_hi, L22_hi) L02_lo = (T1_lo % TWO_POW_32 - T1_lo % TWO_POW_20) / TWO_POW_20 + T1_hi * TWO_POW_12 L02_hi = (T1_hi % TWO_POW_32 - T1_hi % TWO_POW_20) / TWO_POW_20 + T1_lo * TWO_POW_12 L07_lo = (T3_lo % TWO_POW_32 - T3_lo % TWO_POW_19) / TWO_POW_19 + T3_hi * TWO_POW_13 L07_hi = (T3_hi % TWO_POW_32 - T3_hi % TWO_POW_19) / TWO_POW_19 + T3_lo * TWO_POW_13 L12_lo = T0_lo * 2 + (T0_hi % TWO_POW_32 - T0_hi % TWO_POW_31) / TWO_POW_31 L12_hi = T0_hi * 2 + (T0_lo % TWO_POW_32 - T0_lo % TWO_POW_31) / TWO_POW_31 L17_lo = T2_lo * TWO_POW_10 + (T2_hi % TWO_POW_32 - T2_hi % TWO_POW_22) / TWO_POW_22 L17_hi = T2_hi * TWO_POW_10 + (T2_lo % TWO_POW_32 - T2_lo % TWO_POW_22) / TWO_POW_22 L22_lo = T4_lo * TWO_POW_2 + (T4_hi % TWO_POW_32 - T4_hi % TWO_POW_30) / TWO_POW_30 L22_hi = T4_hi * TWO_POW_2 + (T4_lo % TWO_POW_32 - T4_lo % TWO_POW_30) / TWO_POW_30 D_lo = bit32_bxor(C2_lo, C4_lo * 2 + (C4_hi % TWO_POW_32 - C4_hi % TWO_POW_31) / TWO_POW_31) D_hi = bit32_bxor(C2_hi, C4_hi * 2 + (C4_lo % TWO_POW_32 - C4_lo % TWO_POW_31) / TWO_POW_31) T0_lo = bit32_bxor(D_lo, L03_lo) T0_hi = bit32_bxor(D_hi, L03_hi) T1_lo = bit32_bxor(D_lo, L08_lo) T1_hi = bit32_bxor(D_hi, L08_hi) T2_lo = bit32_bxor(D_lo, L13_lo) T2_hi = bit32_bxor(D_hi, L13_hi) T3_lo = bit32_bxor(D_lo, L18_lo) T3_hi = bit32_bxor(D_hi, L18_hi) T4_lo = bit32_bxor(D_lo, L23_lo) T4_hi = bit32_bxor(D_hi, L23_hi) L03_lo = (T2_lo % TWO_POW_32 - T2_lo % TWO_POW_21) / TWO_POW_21 + T2_hi * TWO_POW_11 L03_hi = (T2_hi % TWO_POW_32 - T2_hi % TWO_POW_21) / TWO_POW_21 + T2_lo * TWO_POW_11 L08_lo = (T4_lo % TWO_POW_32 - T4_lo % TWO_POW_3) / TWO_POW_3 + T4_hi * TWO_POW_29 % TWO_POW_32 L08_hi = (T4_hi % TWO_POW_32 - T4_hi % TWO_POW_3) / TWO_POW_3 + T4_lo * TWO_POW_29 % TWO_POW_32 L13_lo = T1_lo * TWO_POW_6 + (T1_hi % TWO_POW_32 - T1_hi % TWO_POW_26) / TWO_POW_26 L13_hi = T1_hi * TWO_POW_6 + (T1_lo % TWO_POW_32 - T1_lo % TWO_POW_26) / TWO_POW_26 L18_lo = T3_lo * TWO_POW_15 + (T3_hi % TWO_POW_32 - T3_hi % TWO_POW_17) / TWO_POW_17 L18_hi = T3_hi * TWO_POW_15 + (T3_lo % TWO_POW_32 - T3_lo % TWO_POW_17) / TWO_POW_17 L23_lo = (T0_lo % TWO_POW_32 - T0_lo % TWO_POW_2) / TWO_POW_2 + T0_hi * TWO_POW_30 % TWO_POW_32 L23_hi = (T0_hi % TWO_POW_32 - T0_hi % TWO_POW_2) / TWO_POW_2 + T0_lo * TWO_POW_30 % TWO_POW_32 D_lo = bit32_bxor(C3_lo, C5_lo * 2 + (C5_hi % TWO_POW_32 - C5_hi % TWO_POW_31) / TWO_POW_31) D_hi = bit32_bxor(C3_hi, C5_hi * 2 + (C5_lo % TWO_POW_32 - C5_lo % TWO_POW_31) / TWO_POW_31) T0_lo = bit32_bxor(D_lo, L04_lo) T0_hi = bit32_bxor(D_hi, L04_hi) T1_lo = bit32_bxor(D_lo, L09_lo) T1_hi = bit32_bxor(D_hi, L09_hi) T2_lo = bit32_bxor(D_lo, L14_lo) T2_hi = bit32_bxor(D_hi, L14_hi) T3_lo = bit32_bxor(D_lo, L19_lo) T3_hi = bit32_bxor(D_hi, L19_hi) T4_lo = bit32_bxor(D_lo, L24_lo) T4_hi = bit32_bxor(D_hi, L24_hi) L04_lo = T3_lo * TWO_POW_21 % TWO_POW_32 + (T3_hi % TWO_POW_32 - T3_hi % TWO_POW_11) / TWO_POW_11 L04_hi = T3_hi * TWO_POW_21 % TWO_POW_32 + (T3_lo % TWO_POW_32 - T3_lo % TWO_POW_11) / TWO_POW_11 L09_lo = T0_lo * TWO_POW_28 % TWO_POW_32 + (T0_hi % TWO_POW_32 - T0_hi % TWO_POW_4) / TWO_POW_4 L09_hi = T0_hi * TWO_POW_28 % TWO_POW_32 + (T0_lo % TWO_POW_32 - T0_lo % TWO_POW_4) / TWO_POW_4 L14_lo = T2_lo * TWO_POW_25 % TWO_POW_32 + (T2_hi % TWO_POW_32 - T2_hi % TWO_POW_7) / TWO_POW_7 L14_hi = T2_hi * TWO_POW_25 % TWO_POW_32 + (T2_lo % TWO_POW_32 - T2_lo % TWO_POW_7) / TWO_POW_7 L19_lo = (T4_lo % TWO_POW_32 - T4_lo % TWO_POW_8) / TWO_POW_8 + T4_hi * TWO_POW_24 % TWO_POW_32 L19_hi = (T4_hi % TWO_POW_32 - T4_hi % TWO_POW_8) / TWO_POW_8 + T4_lo * TWO_POW_24 % TWO_POW_32 L24_lo = (T1_lo % TWO_POW_32 - T1_lo % TWO_POW_9) / TWO_POW_9 + T1_hi * TWO_POW_23 % TWO_POW_32 L24_hi = (T1_hi % TWO_POW_32 - T1_hi % TWO_POW_9) / TWO_POW_9 + T1_lo * TWO_POW_23 % TWO_POW_32 D_lo = bit32_bxor(C4_lo, C1_lo * 2 + (C1_hi % TWO_POW_32 - C1_hi % TWO_POW_31) / TWO_POW_31) D_hi = bit32_bxor(C4_hi, C1_hi * 2 + (C1_lo % TWO_POW_32 - C1_lo % TWO_POW_31) / TWO_POW_31) T0_lo = bit32_bxor(D_lo, L05_lo) T0_hi = bit32_bxor(D_hi, L05_hi) T1_lo = bit32_bxor(D_lo, L10_lo) T1_hi = bit32_bxor(D_hi, L10_hi) T2_lo = bit32_bxor(D_lo, L15_lo) T2_hi = bit32_bxor(D_hi, L15_hi) T3_lo = bit32_bxor(D_lo, L20_lo) T3_hi = bit32_bxor(D_hi, L20_hi) T4_lo = bit32_bxor(D_lo, L25_lo) T4_hi = bit32_bxor(D_hi, L25_hi) L05_lo = T4_lo * TWO_POW_14 + (T4_hi % TWO_POW_32 - T4_hi % TWO_POW_18) / TWO_POW_18 L05_hi = T4_hi * TWO_POW_14 + (T4_lo % TWO_POW_32 - T4_lo % TWO_POW_18) / TWO_POW_18 L10_lo = T1_lo * TWO_POW_20 % TWO_POW_32 + (T1_hi % TWO_POW_32 - T1_hi % TWO_POW_12) / TWO_POW_12 L10_hi = T1_hi * TWO_POW_20 % TWO_POW_32 + (T1_lo % TWO_POW_32 - T1_lo % TWO_POW_12) / TWO_POW_12 L15_lo = T3_lo * TWO_POW_8 + (T3_hi % TWO_POW_32 - T3_hi % TWO_POW_24) / TWO_POW_24 L15_hi = T3_hi * TWO_POW_8 + (T3_lo % TWO_POW_32 - T3_lo % TWO_POW_24) / TWO_POW_24 L20_lo = T0_lo * TWO_POW_27 % TWO_POW_32 + (T0_hi % TWO_POW_32 - T0_hi % TWO_POW_5) / TWO_POW_5 L20_hi = T0_hi * TWO_POW_27 % TWO_POW_32 + (T0_lo % TWO_POW_32 - T0_lo % TWO_POW_5) / TWO_POW_5 L25_lo = (T2_lo % TWO_POW_32 - T2_lo % TWO_POW_25) / TWO_POW_25 + T2_hi * TWO_POW_7 L25_hi = (T2_hi % TWO_POW_32 - T2_hi % TWO_POW_25) / TWO_POW_25 + T2_lo * TWO_POW_7 D_lo = bit32_bxor(C5_lo, C2_lo * 2 + (C2_hi % TWO_POW_32 - C2_hi % TWO_POW_31) / TWO_POW_31) D_hi = bit32_bxor(C5_hi, C2_hi * 2 + (C2_lo % TWO_POW_32 - C2_lo % TWO_POW_31) / TWO_POW_31) T1_lo = bit32_bxor(D_lo, L06_lo) T1_hi = bit32_bxor(D_hi, L06_hi) T2_lo = bit32_bxor(D_lo, L11_lo) T2_hi = bit32_bxor(D_hi, L11_hi) T3_lo = bit32_bxor(D_lo, L16_lo) T3_hi = bit32_bxor(D_hi, L16_hi) T4_lo = bit32_bxor(D_lo, L21_lo) T4_hi = bit32_bxor(D_hi, L21_hi) L06_lo = T2_lo * TWO_POW_3 + (T2_hi % TWO_POW_32 - T2_hi % TWO_POW_29) / TWO_POW_29 L06_hi = T2_hi * TWO_POW_3 + (T2_lo % TWO_POW_32 - T2_lo % TWO_POW_29) / TWO_POW_29 L11_lo = T4_lo * TWO_POW_18 + (T4_hi % TWO_POW_32 - T4_hi % TWO_POW_14) / TWO_POW_14 L11_hi = T4_hi * TWO_POW_18 + (T4_lo % TWO_POW_32 - T4_lo % TWO_POW_14) / TWO_POW_14 L16_lo = (T1_lo % TWO_POW_32 - T1_lo % TWO_POW_28) / TWO_POW_28 + T1_hi * TWO_POW_4 L16_hi = (T1_hi % TWO_POW_32 - T1_hi % TWO_POW_28) / TWO_POW_28 + T1_lo * TWO_POW_4 L21_lo = (T3_lo % TWO_POW_32 - T3_lo % TWO_POW_23) / TWO_POW_23 + T3_hi * TWO_POW_9 L21_hi = (T3_hi % TWO_POW_32 - T3_hi % TWO_POW_23) / TWO_POW_23 + T3_lo * TWO_POW_9 L01_lo = bit32_bxor(D_lo, L01_lo) L01_hi = bit32_bxor(D_hi, L01_hi) L01_lo, L02_lo, L03_lo, L04_lo, L05_lo = bit32_bxor(L01_lo, bit32_band(-1 - L02_lo, L03_lo)), bit32_bxor(L02_lo, bit32_band(-1 - L03_lo, L04_lo)), bit32_bxor(L03_lo, bit32_band(-1 - L04_lo, L05_lo)), bit32_bxor(L04_lo, bit32_band(-1 - L05_lo, L01_lo)), bit32_bxor(L05_lo, bit32_band(-1 - L01_lo, L02_lo)) L01_hi, L02_hi, L03_hi, L04_hi, L05_hi = bit32_bxor(L01_hi, bit32_band(-1 - L02_hi, L03_hi)), bit32_bxor(L02_hi, bit32_band(-1 - L03_hi, L04_hi)), bit32_bxor(L03_hi, bit32_band(-1 - L04_hi, L05_hi)), bit32_bxor(L04_hi, bit32_band(-1 - L05_hi, L01_hi)), bit32_bxor(L05_hi, bit32_band(-1 - L01_hi, L02_hi)) L06_lo, L07_lo, L08_lo, L09_lo, L10_lo = bit32_bxor(L09_lo, bit32_band(-1 - L10_lo, L06_lo)), bit32_bxor(L10_lo, bit32_band(-1 - L06_lo, L07_lo)), bit32_bxor(L06_lo, bit32_band(-1 - L07_lo, L08_lo)), bit32_bxor(L07_lo, bit32_band(-1 - L08_lo, L09_lo)), bit32_bxor(L08_lo, bit32_band(-1 - L09_lo, L10_lo)) L06_hi, L07_hi, L08_hi, L09_hi, L10_hi = bit32_bxor(L09_hi, bit32_band(-1 - L10_hi, L06_hi)), bit32_bxor(L10_hi, bit32_band(-1 - L06_hi, L07_hi)), bit32_bxor(L06_hi, bit32_band(-1 - L07_hi, L08_hi)), bit32_bxor(L07_hi, bit32_band(-1 - L08_hi, L09_hi)), bit32_bxor(L08_hi, bit32_band(-1 - L09_hi, L10_hi)) L11_lo, L12_lo, L13_lo, L14_lo, L15_lo = bit32_bxor(L12_lo, bit32_band(-1 - L13_lo, L14_lo)), bit32_bxor(L13_lo, bit32_band(-1 - L14_lo, L15_lo)), bit32_bxor(L14_lo, bit32_band(-1 - L15_lo, L11_lo)), bit32_bxor(L15_lo, bit32_band(-1 - L11_lo, L12_lo)), bit32_bxor(L11_lo, bit32_band(-1 - L12_lo, L13_lo)) L11_hi, L12_hi, L13_hi, L14_hi, L15_hi = bit32_bxor(L12_hi, bit32_band(-1 - L13_hi, L14_hi)), bit32_bxor(L13_hi, bit32_band(-1 - L14_hi, L15_hi)), bit32_bxor(L14_hi, bit32_band(-1 - L15_hi, L11_hi)), bit32_bxor(L15_hi, bit32_band(-1 - L11_hi, L12_hi)), bit32_bxor(L11_hi, bit32_band(-1 - L12_hi, L13_hi)) L16_lo, L17_lo, L18_lo, L19_lo, L20_lo = bit32_bxor(L20_lo, bit32_band(-1 - L16_lo, L17_lo)), bit32_bxor(L16_lo, bit32_band(-1 - L17_lo, L18_lo)), bit32_bxor(L17_lo, bit32_band(-1 - L18_lo, L19_lo)), bit32_bxor(L18_lo, bit32_band(-1 - L19_lo, L20_lo)), bit32_bxor(L19_lo, bit32_band(-1 - L20_lo, L16_lo)) L16_hi, L17_hi, L18_hi, L19_hi, L20_hi = bit32_bxor(L20_hi, bit32_band(-1 - L16_hi, L17_hi)), bit32_bxor(L16_hi, bit32_band(-1 - L17_hi, L18_hi)), bit32_bxor(L17_hi, bit32_band(-1 - L18_hi, L19_hi)), bit32_bxor(L18_hi, bit32_band(-1 - L19_hi, L20_hi)), bit32_bxor(L19_hi, bit32_band(-1 - L20_hi, L16_hi)) L21_lo, L22_lo, L23_lo, L24_lo, L25_lo = bit32_bxor(L23_lo, bit32_band(-1 - L24_lo, L25_lo)), bit32_bxor(L24_lo, bit32_band(-1 - L25_lo, L21_lo)), bit32_bxor(L25_lo, bit32_band(-1 - L21_lo, L22_lo)), bit32_bxor(L21_lo, bit32_band(-1 - L22_lo, L23_lo)), bit32_bxor(L22_lo, bit32_band(-1 - L23_lo, L24_lo)) L21_hi, L22_hi, L23_hi, L24_hi, L25_hi = bit32_bxor(L23_hi, bit32_band(-1 - L24_hi, L25_hi)), bit32_bxor(L24_hi, bit32_band(-1 - L25_hi, L21_hi)), bit32_bxor(L25_hi, bit32_band(-1 - L21_hi, L22_hi)), bit32_bxor(L21_hi, bit32_band(-1 - L22_hi, L23_hi)), bit32_bxor(L22_hi, bit32_band(-1 - L23_hi, L24_hi)) L01_lo = bit32_bxor(L01_lo, RC_lo[round_idx]) L01_hi = L01_hi + RC_hi[round_idx] -- RC_hi[] is either 0 or 0x80000000, so we could use fast addition instead of slow XOR end lanes_lo[1] = L01_lo lanes_hi[1] = L01_hi lanes_lo[2] = L02_lo lanes_hi[2] = L02_hi lanes_lo[3] = L03_lo lanes_hi[3] = L03_hi lanes_lo[4] = L04_lo lanes_hi[4] = L04_hi lanes_lo[5] = L05_lo lanes_hi[5] = L05_hi lanes_lo[6] = L06_lo lanes_hi[6] = L06_hi lanes_lo[7] = L07_lo lanes_hi[7] = L07_hi lanes_lo[8] = L08_lo lanes_hi[8] = L08_hi lanes_lo[9] = L09_lo lanes_hi[9] = L09_hi lanes_lo[10] = L10_lo lanes_hi[10] = L10_hi lanes_lo[11] = L11_lo lanes_hi[11] = L11_hi lanes_lo[12] = L12_lo lanes_hi[12] = L12_hi lanes_lo[13] = L13_lo lanes_hi[13] = L13_hi lanes_lo[14] = L14_lo lanes_hi[14] = L14_hi lanes_lo[15] = L15_lo lanes_hi[15] = L15_hi lanes_lo[16] = L16_lo lanes_hi[16] = L16_hi lanes_lo[17] = L17_lo lanes_hi[17] = L17_hi lanes_lo[18] = L18_lo lanes_hi[18] = L18_hi lanes_lo[19] = L19_lo lanes_hi[19] = L19_hi lanes_lo[20] = L20_lo lanes_hi[20] = L20_hi lanes_lo[21] = L21_lo lanes_hi[21] = L21_hi lanes_lo[22] = L22_lo lanes_hi[22] = L22_hi lanes_lo[23] = L23_lo lanes_hi[23] = L23_hi lanes_lo[24] = L24_lo lanes_hi[24] = L24_hi lanes_lo[25] = L25_lo lanes_hi[25] = L25_hi end end -------------------------------------------------------------------------------- -- MAGIC NUMBERS CALCULATOR -------------------------------------------------------------------------------- -- Q: -- Is 53-bit "double" math enough to calculate square roots and cube roots of primes with 64 correct bits after decimal point? -- A: -- Yes, 53-bit "double" arithmetic is enough. -- We could obtain first 40 bits by direct calculation of p^(1/3) and next 40 bits by one step of Newton's method. do local function mul(src1, src2, factor, result_length) -- src1, src2 - long integers (arrays of digits in base TWO_POW_24) -- factor - small integer -- returns long integer result (src1 * src2 * factor) and its floating point approximation local result, carry, value, weight = table.create(result_length), 0, 0, 1 for j = 1, result_length do for k = math.max(1, j + 1 - #src2), math.min(j, #src1) do carry = carry + factor * src1[k] * src2[j + 1 - k] -- "int32" is not enough for multiplication result, that's why "factor" must be of type "double" end local digit = carry % TWO_POW_24 result[j] = math.floor(digit) carry = (carry - digit) / TWO_POW_24 value = value + digit * weight weight = weight * TWO_POW_24 end return result, value end local idx, step, p, one, sqrt_hi, sqrt_lo = 0, { 4, 1, 2, -2, 2 }, 4, { 1 }, sha2_H_hi, sha2_H_lo repeat p = p + step[p % 6] local d = 1 repeat d = d + step[d % 6] if d * d > p then -- next prime number is found local root = p ^ (1 / 3) local R = root * TWO_POW_40 R = mul(table.create(1, math.floor(R)), one, 1, 2) local _, delta = mul(R, mul(R, R, 1, 4), -1, 4) local hi = R[2] % 65536 * 65536 + math.floor(R[1] / 256) local lo = R[1] % 256 * 16777216 + math.floor(delta * (TWO_POW_NEG_56 / 3) * root / p) if idx < 16 then root = math.sqrt(p) R = root * TWO_POW_40 R = mul(table.create(1, math.floor(R)), one, 1, 2) _, delta = mul(R, R, -1, 2) local hi = R[2] % 65536 * 65536 + math.floor(R[1] / 256) local lo = R[1] % 256 * 16777216 + math.floor(delta * TWO_POW_NEG_17 / root) local idx = idx % 8 + 1 sha2_H_ext256[224][idx] = lo sqrt_hi[idx], sqrt_lo[idx] = hi, lo + hi * hi_factor if idx > 7 then sqrt_hi, sqrt_lo = sha2_H_ext512_hi[384], sha2_H_ext512_lo[384] end end idx = idx + 1 sha2_K_hi[idx], sha2_K_lo[idx] = hi, lo % K_lo_modulo + hi * hi_factor break end until p % d == 0 until idx > 79 end -- Calculating IVs for SHA512/224 and SHA512/256 for width = 224, 256, 32 do local H_lo, H_hi = {}, nil if XOR64A5 then for j = 1, 8 do H_lo[j] = XOR64A5(sha2_H_lo[j]) end else H_hi = {} for j = 1, 8 do H_lo[j] = bit32_bxor(sha2_H_lo[j], 0xA5A5A5A5) % 4294967296 H_hi[j] = bit32_bxor(sha2_H_hi[j], 0xA5A5A5A5) % 4294967296 end end sha512_feed_128(H_lo, H_hi, "SHA-512/" .. tostring(width) .. "\128" .. string.rep("\0", 115) .. "\88", 0, 128) sha2_H_ext512_lo[width] = H_lo sha2_H_ext512_hi[width] = H_hi end -- Constants for MD5 do for idx = 1, 64 do -- we can't use formula math.floor(abs(sin(idx))*TWO_POW_32) because its result may be beyond integer range on Lua built with 32-bit integers local hi, lo = math.modf(math.abs(math.sin(idx)) * TWO_POW_16) md5_K[idx] = hi * 65536 + math.floor(lo * TWO_POW_16) end end -- Constants for SHA3 do local sh_reg = 29 local function next_bit() local r = sh_reg % 2 sh_reg = bit32_bxor((sh_reg - r) / 2, 142 * r) return r end for idx = 1, 24 do local lo, m = 0, nil for _ = 1, 6 do m = m and m * m * 2 or 1 lo = lo + next_bit() * m end local hi = next_bit() * m sha3_RC_hi[idx], sha3_RC_lo[idx] = hi, lo + hi * hi_factor_keccak end end -------------------------------------------------------------------------------- -- MAIN FUNCTIONS -------------------------------------------------------------------------------- local function sha256ext(width, message) -- Create an instance (private objects for current calculation) local Array256 = sha2_H_ext256[width] -- # == 8 local length, tail = 0, "" local H = table.create(8) H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8] = Array256[1], Array256[2], Array256[3], Array256[4], Array256[5], Array256[6], Array256[7], Array256[8] local function partial(message_part) if message_part then local partLength = #message_part if tail then length = length + partLength local offs = 0 local tailLength = #tail if tail ~= "" and tailLength + partLength >= 64 then offs = 64 - tailLength sha256_feed_64(H, tail .. string.sub(message_part, 1, offs), 0, 64) tail = "" end local size = partLength - offs local size_tail = size % 64 sha256_feed_64(H, message_part, offs, size - size_tail) tail = tail .. string.sub(message_part, partLength + 1 - size_tail) return partial else error("Adding more chunks is not allowed after receiving the result", 2) end else if tail then local final_blocks = table.create(10) --{tail, "\128", string.rep("\0", (-9 - length) % 64 + 1)} final_blocks[1] = tail final_blocks[2] = "\128" final_blocks[3] = string.rep("\0", (-9 - length) % 64 + 1) tail = nil -- Assuming user data length is shorter than (TWO_POW_53)-9 bytes -- Anyway, it looks very unrealistic that someone would spend more than a year of calculations to process TWO_POW_53 bytes of data by using this Lua script :-) -- TWO_POW_53 bytes = TWO_POW_56 bits, so "bit-counter" fits in 7 bytes length = length * (8 / TWO56_POW_7) -- convert "byte-counter" to "bit-counter" and move decimal point to the left for j = 4, 10 do length = length % 1 * 256 final_blocks[j] = string.char(math.floor(length)) end final_blocks = table.concat(final_blocks) sha256_feed_64(H, final_blocks, 0, #final_blocks) local max_reg = width / 32 for j = 1, max_reg do H[j] = string.format("%08x", H[j] % 4294967296) end H = table.concat(H, "", 1, max_reg) end return H end end if message then -- Actually perform calculations and return the SHA256 digest of a message return partial(message)() else -- Return function for chunk-by-chunk loading -- User should feed every chunk of input data as single argument to this function and finally get SHA256 digest by invoking this function without an argument return partial end end local function sha512ext(width, message) -- Create an instance (private objects for current calculation) local length, tail, H_lo, H_hi = 0, "", table.pack(table.unpack(sha2_H_ext512_lo[width])), not HEX64 and table.pack(table.unpack(sha2_H_ext512_hi[width])) local function partial(message_part) if message_part then local partLength = #message_part if tail then length = length + partLength local offs = 0 if tail ~= "" and #tail + partLength >= 128 then offs = 128 - #tail sha512_feed_128(H_lo, H_hi, tail .. string.sub(message_part, 1, offs), 0, 128) tail = "" end local size = partLength - offs local size_tail = size % 128 sha512_feed_128(H_lo, H_hi, message_part, offs, size - size_tail) tail = tail .. string.sub(message_part, partLength + 1 - size_tail) return partial else error("Adding more chunks is not allowed after receiving the result", 2) end else if tail then local final_blocks = table.create(3) --{tail, "\128", string.rep("\0", (-17-length) % 128 + 9)} final_blocks[1] = tail final_blocks[2] = "\128" final_blocks[3] = string.rep("\0", (-17 - length) % 128 + 9) tail = nil -- Assuming user data length is shorter than (TWO_POW_53)-17 bytes -- TWO_POW_53 bytes = TWO_POW_56 bits, so "bit-counter" fits in 7 bytes length = length * (8 / TWO56_POW_7) -- convert "byte-counter" to "bit-counter" and move floating point to the left for j = 4, 10 do length = length % 1 * 256 final_blocks[j] = string.char(math.floor(length)) end final_blocks = table.concat(final_blocks) sha512_feed_128(H_lo, H_hi, final_blocks, 0, #final_blocks) local max_reg = math.ceil(width / 64) if HEX64 then for j = 1, max_reg do H_lo[j] = HEX64(H_lo[j]) end else for j = 1, max_reg do H_lo[j] = string.format("%08x", H_hi[j] % 4294967296) .. string.format("%08x", H_lo[j] % 4294967296) end H_hi = nil end H_lo = string.sub(table.concat(H_lo, "", 1, max_reg), 1, width / 4) end return H_lo end end if message then -- Actually perform calculations and return the SHA512 digest of a message return partial(message)() else -- Return function for chunk-by-chunk loading -- User should feed every chunk of input data as single argument to this function and finally get SHA512 digest by invoking this function without an argument return partial end end local function md5(message) -- Create an instance (private objects for current calculation) local H, length, tail = table.create(4), 0, "" H[1], H[2], H[3], H[4] = md5_sha1_H[1], md5_sha1_H[2], md5_sha1_H[3], md5_sha1_H[4] local function partial(message_part) if message_part then local partLength = #message_part if tail then length = length + partLength local offs = 0 if tail ~= "" and #tail + partLength >= 64 then offs = 64 - #tail md5_feed_64(H, tail .. string.sub(message_part, 1, offs), 0, 64) tail = "" end local size = partLength - offs local size_tail = size % 64 md5_feed_64(H, message_part, offs, size - size_tail) tail = tail .. string.sub(message_part, partLength + 1 - size_tail) return partial else error("Adding more chunks is not allowed after receiving the result", 2) end else if tail then local final_blocks = table.create(3) --{tail, "\128", string.rep("\0", (-9 - length) % 64)} final_blocks[1] = tail final_blocks[2] = "\128" final_blocks[3] = string.rep("\0", (-9 - length) % 64) tail = nil length = length * 8 -- convert "byte-counter" to "bit-counter" for j = 4, 11 do local low_byte = length % 256 final_blocks[j] = string.char(low_byte) length = (length - low_byte) / 256 end final_blocks = table.concat(final_blocks) md5_feed_64(H, final_blocks, 0, #final_blocks) for j = 1, 4 do H[j] = string.format("%08x", H[j] % 4294967296) end H = string.gsub(table.concat(H), "(..)(..)(..)(..)", "%4%3%2%1") end return H end end if message then -- Actually perform calculations and return the MD5 digest of a message return partial(message)() else -- Return function for chunk-by-chunk loading -- User should feed every chunk of input data as single argument to this function and finally get MD5 digest by invoking this function without an argument return partial end end local function sha1(message) -- Create an instance (private objects for current calculation) local H, length, tail = table.pack(table.unpack(md5_sha1_H)), 0, "" local function partial(message_part) if message_part then local partLength = #message_part if tail then length = length + partLength local offs = 0 if tail ~= "" and #tail + partLength >= 64 then offs = 64 - #tail sha1_feed_64(H, tail .. string.sub(message_part, 1, offs), 0, 64) tail = "" end local size = partLength - offs local size_tail = size % 64 sha1_feed_64(H, message_part, offs, size - size_tail) tail = tail .. string.sub(message_part, partLength + 1 - size_tail) return partial else error("Adding more chunks is not allowed after receiving the result", 2) end else if tail then local final_blocks = table.create(10) --{tail, "\128", string.rep("\0", (-9 - length) % 64 + 1)} final_blocks[1] = tail final_blocks[2] = "\128" final_blocks[3] = string.rep("\0", (-9 - length) % 64 + 1) tail = nil -- Assuming user data length is shorter than (TWO_POW_53)-9 bytes -- TWO_POW_53 bytes = TWO_POW_56 bits, so "bit-counter" fits in 7 bytes length = length * (8 / TWO56_POW_7) -- convert "byte-counter" to "bit-counter" and move decimal point to the left for j = 4, 10 do length = length % 1 * 256 final_blocks[j] = string.char(math.floor(length)) end final_blocks = table.concat(final_blocks) sha1_feed_64(H, final_blocks, 0, #final_blocks) for j = 1, 5 do H[j] = string.format("%08x", H[j] % 4294967296) end H = table.concat(H) end return H end end if message then -- Actually perform calculations and return the SHA-1 digest of a message return partial(message)() else -- Return function for chunk-by-chunk loading -- User should feed every chunk of input data as single argument to this function and finally get SHA-1 digest by invoking this function without an argument return partial end end local function keccak(block_size_in_bytes, digest_size_in_bytes, is_SHAKE, message) -- "block_size_in_bytes" is multiple of 8 if type(digest_size_in_bytes) ~= "number" then -- arguments in SHAKE are swapped: -- NIST FIPS 202 defines SHAKE(message,num_bits) -- this module defines SHAKE(num_bytes,message) -- it's easy to forget about this swap, hence the check error("Argument 'digest_size_in_bytes' must be a number", 2) end -- Create an instance (private objects for current calculation) local tail, lanes_lo, lanes_hi = "", table.create(25, 0), hi_factor_keccak == 0 and table.create(25, 0) local result --~ pad the input N using the pad function, yielding a padded bit string P with a length divisible by r (such that n = len(P)/r is integer), --~ break P into n consecutive r-bit pieces P0, ..., Pn-1 (last is zero-padded) --~ initialize the state S to a string of b 0 bits. --~ absorb the input into the state: For each block Pi, --~ extend Pi at the end by a string of c 0 bits, yielding one of length b, --~ XOR that with S and --~ apply the block permutation f to the result, yielding a new state S --~ initialize Z to be the empty string --~ while the length of Z is less than d: --~ append the first r bits of S to Z --~ if Z is still less than d bits long, apply f to S, yielding a new state S. --~ truncate Z to d bits local function partial(message_part) if message_part then local partLength = #message_part if tail then local offs = 0 if tail ~= "" and #tail + partLength >= block_size_in_bytes then offs = block_size_in_bytes - #tail keccak_feed( lanes_lo, lanes_hi, tail .. string.sub(message_part, 1, offs), 0, block_size_in_bytes, block_size_in_bytes ) tail = "" end local size = partLength - offs local size_tail = size % block_size_in_bytes keccak_feed(lanes_lo, lanes_hi, message_part, offs, size - size_tail, block_size_in_bytes) tail = tail .. string.sub(message_part, partLength + 1 - size_tail) return partial else error("Adding more chunks is not allowed after receiving the result", 2) end else if tail then -- append the following bits to the message: for usual SHA3: 011(0*)1, for SHAKE: 11111(0*)1 local gap_start = is_SHAKE and 31 or 6 tail = tail .. ( #tail + 1 == block_size_in_bytes and string.char(gap_start + 128) or string.char(gap_start) .. string.rep("\0", (-2 - #tail) % block_size_in_bytes) .. "\128" ) keccak_feed(lanes_lo, lanes_hi, tail, 0, #tail, block_size_in_bytes) tail = nil local lanes_used = 0 local total_lanes = math.floor(block_size_in_bytes / 8) local qwords = {} local function get_next_qwords_of_digest(qwords_qty) -- returns not more than 'qwords_qty' qwords ('qwords_qty' might be non-integer) -- doesn't go across keccak-buffer boundary -- block_size_in_bytes is a multiple of 8, so, keccak-buffer contains integer number of qwords if lanes_used >= total_lanes then keccak_feed(lanes_lo, lanes_hi, "\0\0\0\0\0\0\0\0", 0, 8, 8) lanes_used = 0 end qwords_qty = math.floor(math.min(qwords_qty, total_lanes - lanes_used)) if hi_factor_keccak ~= 0 then for j = 1, qwords_qty do qwords[j] = HEX64(lanes_lo[lanes_used + j - 1 + lanes_index_base]) end else for j = 1, qwords_qty do qwords[j] = string.format("%08x", lanes_hi[lanes_used + j] % 4294967296) .. string.format("%08x", lanes_lo[lanes_used + j] % 4294967296) end end lanes_used = lanes_used + qwords_qty return string.gsub( table.concat(qwords, "", 1, qwords_qty), "(..)(..)(..)(..)(..)(..)(..)(..)", "%8%7%6%5%4%3%2%1" ), qwords_qty * 8 end local parts = {} -- digest parts local last_part, last_part_size = "", 0 local function get_next_part_of_digest(bytes_needed) -- returns 'bytes_needed' bytes, for arbitrary integer 'bytes_needed' bytes_needed = bytes_needed or 1 if bytes_needed <= last_part_size then last_part_size = last_part_size - bytes_needed local part_size_in_nibbles = bytes_needed * 2 local result = string.sub(last_part, 1, part_size_in_nibbles) last_part = string.sub(last_part, part_size_in_nibbles + 1) return result end local parts_qty = 0 if last_part_size > 0 then parts_qty = 1 parts[parts_qty] = last_part bytes_needed = bytes_needed - last_part_size end -- repeats until the length is enough while bytes_needed >= 8 do local next_part, next_part_size = get_next_qwords_of_digest(bytes_needed / 8) parts_qty = parts_qty + 1 parts[parts_qty] = next_part bytes_needed = bytes_needed - next_part_size end if bytes_needed > 0 then last_part, last_part_size = get_next_qwords_of_digest(1) parts_qty = parts_qty + 1 parts[parts_qty] = get_next_part_of_digest(bytes_needed) else last_part, last_part_size = "", 0 end return table.concat(parts, "", 1, parts_qty) end if digest_size_in_bytes < 0 then result = get_next_part_of_digest else result = get_next_part_of_digest(digest_size_in_bytes) end end return result end end if message then -- Actually perform calculations and return the SHA3 digest of a message return partial(message)() else -- Return function for chunk-by-chunk loading -- User should feed every chunk of input data as single argument to this function and finally get SHA3 digest by invoking this function without an argument return partial end end local function HexToBinFunction(hh) return string.char(tonumber(hh, 16)) end local function hex2bin(hex_string) return (string.gsub(hex_string, "%x%x", HexToBinFunction)) end local base64_symbols = { ["+"] = 62, ["-"] = 62, [62] = "+", ["/"] = 63, ["_"] = 63, [63] = "/", ["="] = -1, ["."] = -1, [-1] = "=", } local symbol_index = 0 for j, pair in ipairs({ "AZ", "az", "09" }) do for ascii = string.byte(pair), string.byte(pair, 2) do local ch = string.char(ascii) base64_symbols[ch] = symbol_index base64_symbols[symbol_index] = ch symbol_index = symbol_index + 1 end end local function bin2base64(binary_string) local stringLength = #binary_string local result = table.create(math.ceil(stringLength / 3)) local length = 0 for pos = 1, #binary_string, 3 do local c1, c2, c3, c4 = string.byte(string.sub(binary_string, pos, pos + 2) .. "\0", 1, -1) length = length + 1 result[length] = base64_symbols[math.floor(c1 / 4)] .. base64_symbols[c1 % 4 * 16 + math.floor(c2 / 16)] .. base64_symbols[c3 and c2 % 16 * 4 + math.floor(c3 / 64) or -1] .. base64_symbols[c4 and c3 % 64 or -1] end return table.concat(result) end local function base642bin(base64_string) local result, chars_qty = {}, 3 for pos, ch in string.gmatch(string.gsub(base64_string, "%s+", ""), "()(.)") do local code = base64_symbols[ch] if code < 0 then chars_qty = chars_qty - 1 code = 0 end local idx = pos % 4 if idx > 0 then result[-idx] = code else local c1 = result[-1] * 4 + math.floor(result[-2] / 16) local c2 = (result[-2] % 16) * 16 + math.floor(result[-3] / 4) local c3 = (result[-3] % 4) * 64 + code result[#result + 1] = string.sub(string.char(c1, c2, c3), 1, chars_qty) end end return table.concat(result) end local block_size_for_HMAC -- this table will be initialized at the end of the module --local function pad_and_xor(str, result_length, byte_for_xor) -- return string.gsub(str, ".", function(c) -- return string.char(bit32_bxor(string.byte(c), byte_for_xor)) -- end) .. string.rep(string.char(byte_for_xor), result_length - #str) --end -- For the sake of speed of converting hexes to strings, there's a map of the conversions here local BinaryStringMap = {} for Index = 0, 255 do BinaryStringMap[string.format("%02x", Index)] = string.char(Index) end -- Update 02.14.20 - added AsBinary for easy GameAnalytics replacement. local function hmac(hash_func, key, message, AsBinary) -- Create an instance (private objects for current calculation) local block_size = block_size_for_HMAC[hash_func] if not block_size then error("Unknown hash function", 2) end local KeyLength = #key if KeyLength > block_size then key = string.gsub(hash_func(key), "%x%x", HexToBinFunction) KeyLength = #key end local append = hash_func()(string.gsub(key, ".", function(c) return string.char(bit32_bxor(string.byte(c), 0x36)) end) .. string.rep("6", block_size - KeyLength)) -- 6 = string.char(0x36) local result local function partial(message_part) if not message_part then result = result or hash_func( string.gsub(key, ".", function(c) return string.char(bit32_bxor(string.byte(c), 0x5c)) end) .. string.rep("\\", block_size - KeyLength) -- \ = string.char(0x5c) .. (string.gsub(append(), "%x%x", HexToBinFunction)) ) return result elseif result then error("Adding more chunks is not allowed after receiving the result", 2) else append(message_part) return partial end end if message then -- Actually perform calculations and return the HMAC of a message local FinalMessage = partial(message)() return AsBinary and (string.gsub(FinalMessage, "%x%x", BinaryStringMap)) or FinalMessage else -- Return function for chunk-by-chunk loading of a message -- User should feed every chunk of the message as single argument to this function and finally get HMAC by invoking this function without an argument return partial end end local sha = { md5 = md5, sha1 = sha1, -- SHA2 hash functions: sha224 = function(message) return sha256ext(224, message) end, sha256 = function(message) return sha256ext(256, message) end, sha512_224 = function(message) return sha512ext(224, message) end, sha512_256 = function(message) return sha512ext(256, message) end, sha384 = function(message) return sha512ext(384, message) end, sha512 = function(message) return sha512ext(512, message) end, -- SHA3 hash functions: sha3_224 = function(message) return keccak((1600 - 2 * 224) / 8, 224 / 8, false, message) end, sha3_256 = function(message) return keccak((1600 - 2 * 256) / 8, 256 / 8, false, message) end, sha3_384 = function(message) return keccak((1600 - 2 * 384) / 8, 384 / 8, false, message) end, sha3_512 = function(message) return keccak((1600 - 2 * 512) / 8, 512 / 8, false, message) end, shake128 = function(message, digest_size_in_bytes) return keccak((1600 - 2 * 128) / 8, digest_size_in_bytes, true, message) end, shake256 = function(message, digest_size_in_bytes) return keccak((1600 - 2 * 256) / 8, digest_size_in_bytes, true, message) end, -- misc utilities: hmac = hmac, -- HMAC(hash_func, key, message) is applicable to any hash function from this module except SHAKE* hex_to_bin = hex2bin, -- converts hexadecimal representation to binary string base64_to_bin = base642bin, -- converts base64 representation to binary string bin_to_base64 = bin2base64, -- converts binary string to base64 representation base64_encode = Base64.Encode, base64_decode = Base64.Decode, } block_size_for_HMAC = { [sha.md5] = 64, [sha.sha1] = 64, [sha.sha224] = 64, [sha.sha256] = 64, [sha.sha512_224] = 128, [sha.sha512_256] = 128, [sha.sha384] = 128, [sha.sha512] = 128, [sha.sha3_224] = (1600 - 2 * 224) / 8, [sha.sha3_256] = (1600 - 2 * 256) / 8, [sha.sha3_384] = (1600 - 2 * 384) / 8, [sha.sha3_512] = (1600 - 2 * 512) / 8, } return sha