我正在尝试用 C 实现 MD5 哈希函数。我已经阅读了 RFC,并完成了以下操作,并试图找出这里出了什么问题。我知道我可能应该使用像 OpenSSL 这样的库,但我只是想通过阅读 RFC 来学习如何自己做到这一点。如果我能得到一些帮助,那就真的很有帮助了。
#include <malloc.h>
#include <stdint.h>
#include <string.h>
void* MD5_new(const void*, uint64_t);
void MD5_free(void*);
static const uint8_t S[4][4] = {
{ 7, 12, 17, 22},
{ 5, 9, 14, 20},
{ 4, 11, 16, 23},
{ 6, 10, 15, 21},
};
const static uint32_t T[64] = {
0xd76aa478,
0xe8c7b756,
0x242070db,
0xc1bdceee,
0xf57c0faf,
0x4787c62a,
0xa8304613,
0xfd469501,
0x698098d8,
0x8b44f7af,
0xffff5bb1,
0x895cd7be,
0x6b901122,
0xfd987193,
0xa679438e,
0x49b40821,
0xf61e2562,
0xc040b340,
0x265e5a51,
0xe9b6c7aa,
0xd62f105d,
0x02441453,
0xd8a1e681,
0xe7d3fbc8,
0x21e1cde6,
0xc33707d6,
0xf4d50d87,
0x455a14ed,
0xa9e3e905,
0xfcefa3f8,
0x676f02d9,
0x8d2a4c8a,
0xfffa3942,
0x8771f681,
0x6d9d6122,
0xfde5380c,
0xa4beea44,
0x4bdecfa9,
0xf6bb4b60,
0xbebfbc70,
0x289b7ec6,
0xeaa127fa,
0xd4ef3085,
0x04881d05,
0xd9d4d039,
0xe6db99e5,
0x1fa27cf8,
0xc4ac5665,
0xf4292244,
0x432aff97,
0xab9423a7,
0xfc93a039,
0x655b59c3,
0x8f0ccc92,
0xffeff47d,
0x85845dd1,
0x6fa87e4f,
0xfe2ce6e0,
0xa3014314,
0x4e0811a1,
0xf7537e82,
0xbd3af235,
0x2ad7d2bb,
0xeb86d391,
};
static inline uint32_t rotatel(uint32_t x, uint32_t shift) {
// x << shift first left shifts the thing by shift units
//
// x >> (32 - shift) right shifts the thing by (32 - shift) units
//
// Then bitwise OR gives the result which we want since the new bits after
// shift are zero.
return (x << shift) | (x >> (32 - shift));
}
static inline uint32_t F(uint32_t X, uint32_t Y, uint32_t Z) {
return (X & Y) | ((~X) & Z);
}
static inline uint32_t G(uint32_t X, uint32_t Y, uint32_t Z) {
return (X & Z) | (Y & (~Z));
}
static inline uint32_t H(uint32_t X, uint32_t Y, uint32_t Z) {
return X ^ Y ^ Z;
}
static inline uint32_t I(uint32_t X, uint32_t Y, uint32_t Z) {
return Y ^ (X | (~Z));
}
// https://www.rfc-editor.org/rfc/rfc1321
// although the RFC specifies a lot of these things in bits, we are doing it in
// bytes
void *MD5_new(const void *data, const uint64_t size) {
// Padding is always supposed to be done. (RFC section 3.1 step 1)
uint64_t padding_size;
if (size % 64 < 56) {
// We are missing a few bytes to get a 64n + 56 bytes form
padding_size = 56 - (size % 64);
} else {
// We are having 64n + (>= 56) form, fill in the current chunk, and have
// another chunk with exactly 56 bytes.
padding_size = (64 - (size % 64)) + 56;
}
// this is supposed to be able to be broken into chunks of 512-bits or 64
// bytes
const uint64_t padded_size = size + padding_size + 8;
// Thee padded message must be of form 64n + 56 bytes.
// Last 8 bytes (64 bits) contains the original message size in uint64 format
void *M = malloc(padded_size);
memcpy(M, data, size);
// Perform the actual padding.
void *m = M + size;
// The first bit is always supposed to be 1 and rest are to be filled with
// zeroes
*(uint8_t *)m = 0x80;
m += sizeof(uint8_t);
// We already written our first byte by the time we are reaching here.
for (padding_size--; padding_size != 0; padding_size--) {
// Actually pad it
*(uint8_t *)m = 0x00;
// Advance the pointer
m += sizeof(uint8_t);
}
// Now append the original message size.
// Bits are appended as 2 32-bit words, with low order word first
// Also RFC wants size in bits, not bytes
const uint64_t size_in_bits = size * 8;
*(uint64_t *)m = size_in_bits;
// the md5sum is a 128-bit output, or 16 bytes
void *md5sum = malloc(16);
// Break into 4 32-bit words as mentioned in the spec
uint32_t *A = md5sum + sizeof(uint32_t) * 0;
uint32_t *B = md5sum + sizeof(uint32_t) * 1;
uint32_t *C = md5sum + sizeof(uint32_t) * 2;
uint32_t *D = md5sum + sizeof(uint32_t) * 3;
// Initialize the word buffers
// RFC specifies the hexadecimal in an antique fashion by mentioning the
// low-order bits first after breaking it into 8-bit words.
//
// A: 01234567
// B: 89abcdef
// C: fedcba98
// D: 76543210
*A = 0x67452301;
*B = 0xefcdab89;
*C = 0x98badcfe;
*D = 0x10325476;
// Represents the current chunk
void *X = M;
uint64_t k;
// <<< is left round shift operator
//
// Some other notes, original RFC does not give the relation between i and k,
// but can be easily guessed looking at the pattern in the values.
// Morever,Wikipedia also mentions the relation in it's MD5 article under the
// pseudocode section (https://en.wikipedia.org/wiki/MD5#Pseudocode)
for (uint64_t num_chunks = padded_size / 64, current_chunk = 0;
current_chunk < num_chunks; current_chunk++) {
uint32_t AA = *A;
uint32_t BB = *B;
uint32_t CC = *C;
uint32_t DD = *D;
for (int i = 0; i < 64; i++) {
// Round 1
// [abcd k s i] denotes:
// a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s),
// where,
// s = S[i / 16][i % 4]
// k = i
if (i < 16) {
k = i;
*A = *B + rotatel(((*A + F(*B, *C, *D)) + ((uint32_t *)X)[k] + T[i]),
S[i / 16][i % 4]);
// print k s i
}
// Round 2
// [abcd k s i] denotes:
// a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s),
// where,
// s = S[i / 16][i % 4]
// k = ((5 * i) + 1) % 16
else if (i < 32) {
k = ((5 * i) + 1) % 16;
*A = *B + rotatel(((*A + G(*B, *C, *D)) + ((uint32_t *)X)[k] + T[i]),
S[i / 16][i % 4]);
}
// Round 3
// [abcd k s i] denotes:
// a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s),
// where,
// s = S[i / 16][i % 4]
// k = ((3 * i) + 5) % 16
else if (i < 48) {
k = ((3 * i) + 5) % 16;
*A = *B + rotatel(((*A + H(*B, *C, *D)) + ((uint32_t *)X)[k] + T[i]),
S[i / 16][i % 4]);
}
// Round 4
// [abcd k s i] denotes:
// a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s),
// where,
// s = S[i / 16][i % 4]
// k = (7 * i) % 16
else /* if ( i < 64) */ {
k = (7 * i) % 16;
*A = *B + rotatel(((*A + I(*B, *C, *D)) + ((uint32_t *)X)[k] + T[i]),
S[i / 16][i % 4]);
}
// Rotate the variables;
uint32_t *temp = A;
A = D;
D = C;
C = B;
B = temp;
}
*A += AA;
*B += BB;
*C += CC;
*D += DD;
X += 64;
}
free(M);
return md5sum;
}
void MD5_free(void *data) { free(data); }
int main() {
void* hash = MD5_new("hello", 5);
printf("%08x%08x%08x%08x\n", ((uint32_t*)hash)[0], ((uint32_t*)hash)[1], ((uint32_t*)hash)[2], ((uint32_t*)hash)[3]);
MD5_free(hash);
}
以下生成“b975c10ca8b6f1c0e299c33161267769”作为输出,而不是预期的“60b725f10c9c85c70d97880dfe8191b3”。预先感谢!
我在我的实现中发现了错误。 RFC 还提到消息摘要应该首先具有低位,因此通过执行以下操作来反转每个 A、B、C 和 D,就像魅力一样:
还要感谢@topaco 建议查看每个步骤的值并将其与其他输出进行比较。 https://rosettacode.org/wiki/MD5/Implementation_Debug帮助了我。我发现我的实现根本没有错误,只需要一些字节反转。
此外,在通过 GNU coreutils'
md5hash
获取哈希值时,我没有将 -n
标志传递给 echo
,这也改变了哈希值,否则我可能很快就弄清楚了。谢谢大家!这就是我对任何碰巧来到这里的未来访客所做的事情。确保您自己完成问题后也回答自己的问题。
for (int i = 0; i < 4; i++) {
uint8_t tmp1 = ((uint8_t *)md5sum)[4 * i];
((uint8_t *)md5sum)[4 * i] = ((uint8_t *)md5sum)[4 * i + 3];
((uint8_t *)md5sum)[4 * i + 3] = tmp1;
uint8_t tmp2 = ((uint8_t *)md5sum)[4 * i + 1];
((uint8_t *)md5sum)[4 * i + 1] = ((uint8_t *)md5sum)[4 * i + 2];
((uint8_t *)md5sum)[4 * i + 2] = tmp2;
}
return md5sum;