任何人都可以帮我将下面的 C# 代码(Knuth hash)转换为 SQL 等效项吗?
/// <summary>
/// https://stackoverflow.com/a/9545731
/// </summary>
static string CalculateHash(string read)
{
UInt64 hashedValue = 3074457345618258791ul;
for (int i = 0; i < read.Length; i++)
{
hashedValue += read[i];
hashedValue *= 3074457345618258799ul;
}
return hashedValue.ToString(CultureInfo.InvariantCulture);
}
我正在使用 Microsoft SQL Server 2016。
此外,正如here所指出的,在 SQL 中,您可以使用 NUMERIC(20) 数据类型来表示 64 位无符号整数值。
谢谢你。
我准备了两个潜在的 SQL 函数 - 一个使用
NUMERIC(38,0)
执行计算,另一个使用 BIGINT
执行计算。
BIGINT
版本需要在基础 Int64 值范围的限制内对乘法计算执行位片算术。尽管实现更加复杂,但它仍然可能优于 NUMERIC(38,0)
版本,因为处理器本身支持 BIGINT
加法、乘法以及各种按位运算。 NUMERIC(38,0)
是一种模拟数据类型,需要使用除法和余数运算。我没有进行性能测试。
这两个函数产生相同的结果,并且还与多个测试字符串的参考 C# 实现的结果相匹配,包括包含 16 位字符的字符串和 U+FFFF 以上 UNICODE 字符的代理对。
NUMERIC(38,0)
版本:
CREATE FUNCTION KnuthHashUnicode1(@String NVARCHAR(MAX))
RETURNS VARCHAR(20)
AS
BEGIN
-- Adaptation of C# hash algorithm posted at https://stackoverflow.com/a/9545731
-- WARNING: This hash is **not** crypographically secure.
-- NOTE: This algorithm depends on the fact that the @Multiplier value is such
-- that @Multiplier * @TwoPower64 does not exceed the NUMERIC(38,0) capacity.
-- Key values
DECLARE @Seed NUMERIC(38,0) = 3074457345618258791
DECLARE @Multiplier NUMERIC(38,0) = 3074457345618258799
DECLARE @TwoPower64 NUMERIC(38,0) = 18446744073709551616 -- 2^64
-- Initialize
DECLARE @Hash NUMERIC(38,0) = @Seed
-- Get the true length of string, including possible trailing spaces
DECLARE @Len INT = DATALENGTH(@String) / 2
DECLARE @Pos INT = 0
WHILE @Pos < @Len
BEGIN
SET @Pos += 1
DECLARE @Value BIGINT = UNICODE(SUBSTRING(@String, @Pos, 1))
SET @Hash = (@Hash + @Value) % @TwoPower64
SET @Hash = (@Hash * @Multiplier) % @TwoPower64
END
-- Format and return as string
RETURN CAST(@Hash AS VARCHAR(20))
END
BIGINT
版本:
CREATE FUNCTION KnuthHashUnicode2(@String NVARCHAR(MAX))
RETURNS VARCHAR(20)
AS
BEGIN
-- Adaptation of C# hash algorithm posted at https://stackoverflow.com/a/9545731
-- WARNING: This hash is **not** crypographically secure.
-- Key values are conveniently < 2^63, so they fit in a signed BIGINT.
DECLARE @Seed BIGINT = 3074457345618258791
DECLARE @Mult BIGINT = 3074457345618258799
-- We will be doing arithmatic in three slices: 2 x 24-bit and 1 x 16-bit
DECLARE @Mask24 BIGINT = 0xFFFFFF -- 24 bits (Lo and Md parts)
DECLARE @Mask16 BIGINT = 0xFFFF -- 16 bits (Hi part)
DECLARE @SeedLo BIGINT = @Seed & @Mask24
DECLARE @SeedMd BIGINT = (@Seed >> 24) & @Mask24
DECLARE @SeedHi BIGINT = @Seed >> 48
DECLARE @MultLo BIGINT = @Mult & @Mask24
DECLARE @MultMd BIGINT = (@Mult >> 24) & @Mask24
DECLARE @MultHi BIGINT = @Mult >> 48
-- Initialize
DECLARE @HashHi BIGINT = @SeedHi
DECLARE @HashMd BIGINT = @SeedMd
DECLARE @HashLo BIGINT = @SeedLo
-- Get the true length of string, including possible trailing spaces
DECLARE @Len INT = DATALENGTH(@String) / 2
DECLARE @Pos INT = 0
WHILE @Pos < @Len
BEGIN
SET @Pos += 1
DECLARE @Value BIGINT = UNICODE(SUBSTRING(@String, @Pos, 1))
-- Add with carry
DECLARE @SumLo BIGINT = @HashLo + @Value
DECLARE @SumMd BIGINT = @HashMd + (@SumLo >> 24)
DECLARE @SumHi BIGINT = @HashHi + (@SumMd >> 24)
SET @HashLo = @SumLo & @Mask24
SET @HashMd = @SumMd & @Mask24
SET @HashHi = @SumHi & @Mask16
-- Cross-multiply with carry
-- (No need to calculate product components > 64 bits)
DECLARE @ProdLo BIGINT = (@HashLo * @MultLo)
DECLARE @ProdMd BIGINT = (@HashMd * @MultLo)
+ (@HashLo * @MultMd)
+ (@ProdLo >> 24)
DECLARE @ProdHi BIGINT = (@HashHi * @MultLo)
+ (@HashMd * @MultMd)
+ (@HashLo * @MultHi)
+ (@ProdMd >> 24)
SET @HashLo = @ProdLo & @Mask24
SET @HashMd = @ProdMd & @Mask24
SET @HashHi = @ProdHi & @Mask16
END
-- Combine slices
DECLARE @Result BIGINT = (@HashHi << 48) | (@HashMd << 24) | (@HashLo)
-- Convert to numeric and adjust for negative
DECLARE @ResultNum NUMERIC(20,0) = CAST(@Result AS NUMERIC(20,0))
IF @ResultNum < 0 SET @ResultNum += 18446744073709551616 -- 2^64
-- Format and return as string
RETURN CAST(@ResultNum AS VARCHAR(20))
END
结果示例:
字符串 | 结果1 | 结果2 |
---|---|---|
(空字符串) | 3074457345618258791 | 3074457345618258791 |
(单个空格) | 16397105843297422473 | 16397105843297422473 |
世界你好 | 5485109609273439365 | 5485109609273439365 |
Hello World(带尾随空格) | 10073154357816199819 | 10073154357816199819 |
αβγδ абвг你好世界∫∬∭∮🅐🅑🅒🅓 | 5168594500780613104 | 5168594500780613104 |
敏捷的棕色狐狸跳过了懒狗。 | 5118964073116902044 | 5118964073116902044 |
请参阅 this db<>fiddle 进行演示。