下面的MD5实现有什么问题吗?

问题描述 投票:0回答:1

我正在尝试用 C 实现 MD5 哈希函数。我已经阅读了 RFC,并完成了以下操作,并试图找出这里出了什么问题。我知道我可能应该使用像 OpenSSL 这样的库,但我只是想通过阅读 RFC 来学习如何自己做到这一点。如果我能得到一些帮助,那就真的很有帮助了。

#include <malloc.h>
#include <stdint.h>
#include <string.h>

void* MD5_new(const void*, uint64_t);
void MD5_free(void*);

static const uint8_t S[4][4] = {
    { 7, 12, 17, 22},
    { 5,  9, 14, 20},
    { 4, 11, 16, 23},
    { 6, 10, 15, 21},
};

const static uint32_t T[64] = {
  0xd76aa478,
  0xe8c7b756,
  0x242070db,
  0xc1bdceee,
  0xf57c0faf,
  0x4787c62a,
  0xa8304613,
  0xfd469501,
  0x698098d8,
  0x8b44f7af,
  0xffff5bb1,
  0x895cd7be,
  0x6b901122,
  0xfd987193,
  0xa679438e,
  0x49b40821,
  0xf61e2562,
  0xc040b340,
  0x265e5a51,
  0xe9b6c7aa,
  0xd62f105d,
  0x02441453,
  0xd8a1e681,
  0xe7d3fbc8,
  0x21e1cde6,
  0xc33707d6,
  0xf4d50d87,
  0x455a14ed,
  0xa9e3e905,
  0xfcefa3f8,
  0x676f02d9,
  0x8d2a4c8a,
  0xfffa3942,
  0x8771f681,
  0x6d9d6122,
  0xfde5380c,
  0xa4beea44,
  0x4bdecfa9,
  0xf6bb4b60,
  0xbebfbc70,
  0x289b7ec6,
  0xeaa127fa,
  0xd4ef3085,
  0x04881d05,
  0xd9d4d039,
  0xe6db99e5,
  0x1fa27cf8,
  0xc4ac5665,
  0xf4292244,
  0x432aff97,
  0xab9423a7,
  0xfc93a039,
  0x655b59c3,
  0x8f0ccc92,
  0xffeff47d,
  0x85845dd1,
  0x6fa87e4f,
  0xfe2ce6e0,
  0xa3014314,
  0x4e0811a1,
  0xf7537e82,
  0xbd3af235,
  0x2ad7d2bb,
  0xeb86d391,
};

static inline uint32_t rotatel(uint32_t x, uint32_t shift) {
  // x << shift first left shifts the thing by shift units
  //
  // x >> (32 - shift) right shifts the thing by (32 - shift) units
  //
  // Then bitwise OR gives the result which we want since the new bits after
  // shift are zero.
  return (x << shift) | (x >> (32 - shift));
}

static inline uint32_t F(uint32_t X, uint32_t Y, uint32_t Z) {
  return (X & Y) | ((~X) & Z);
}

static inline uint32_t G(uint32_t X, uint32_t Y, uint32_t Z) {
  return (X & Z) | (Y & (~Z));
}

static inline uint32_t H(uint32_t X, uint32_t Y, uint32_t Z) {
  return X ^ Y ^ Z;
}

static inline uint32_t I(uint32_t X, uint32_t Y, uint32_t Z) {
  return Y ^ (X | (~Z));
}

// https://www.rfc-editor.org/rfc/rfc1321
// although the RFC specifies a lot of these things in bits, we are doing it in
// bytes
void *MD5_new(const void *data, const uint64_t size) {
  // Padding is always supposed to be done. (RFC section 3.1 step 1)
  uint64_t padding_size;
  if (size % 64 < 56) {
    // We are missing a few bytes to get a 64n + 56 bytes form
    padding_size = 56 - (size % 64);
  } else {
    // We are having 64n + (>= 56) form, fill in the current chunk, and have
    // another chunk with exactly 56 bytes.
    padding_size = (64 - (size % 64)) + 56;
  }
  // this is supposed to be able to be broken into chunks of 512-bits or 64
  // bytes
  const uint64_t padded_size = size + padding_size + 8;
  // Thee padded message must be of form 64n + 56 bytes.
  // Last 8 bytes (64 bits) contains the original message size in uint64 format
  void *M = malloc(padded_size);
  memcpy(M, data, size);
  // Perform the actual padding.
  void *m = M + size;
  // The first bit is always supposed to be 1 and rest are to be filled with
  // zeroes
  *(uint8_t *)m = 0x80;
  m += sizeof(uint8_t);
  // We already written our first byte by the time we are reaching here.
  for (padding_size--; padding_size != 0; padding_size--) {
    // Actually pad it
    *(uint8_t *)m = 0x00;
    // Advance the pointer
    m += sizeof(uint8_t);
  }
  // Now append the original message size.
  // Bits are appended as 2 32-bit words, with low order word first
  // Also RFC wants size in bits, not bytes
  const uint64_t size_in_bits = size * 8;
  *(uint64_t *)m = size_in_bits;

  // the md5sum is a 128-bit output, or 16 bytes
  void *md5sum = malloc(16);
  // Break into 4 32-bit words as mentioned in the spec
  uint32_t *A = md5sum + sizeof(uint32_t) * 0;
  uint32_t *B = md5sum + sizeof(uint32_t) * 1;
  uint32_t *C = md5sum + sizeof(uint32_t) * 2;
  uint32_t *D = md5sum + sizeof(uint32_t) * 3;

  // Initialize the word buffers
  // RFC specifies the hexadecimal in an antique fashion by mentioning the
  // low-order bits first after breaking it into 8-bit words.
  //
  // A: 01234567
  // B: 89abcdef
  // C: fedcba98
  // D: 76543210
  *A = 0x67452301;
  *B = 0xefcdab89;
  *C = 0x98badcfe;
  *D = 0x10325476;

  // Represents the current chunk
  void *X = M;
  uint64_t k;
  // <<< is left round shift operator
  //
  // Some other notes, original RFC does not give the relation between i and k,
  // but can be easily guessed looking at the pattern in the values.
  // Morever,Wikipedia also mentions the relation in it's MD5 article under the
  // pseudocode section (https://en.wikipedia.org/wiki/MD5#Pseudocode)
  for (uint64_t num_chunks = padded_size / 64, current_chunk = 0;
       current_chunk < num_chunks; current_chunk++) {
    uint32_t AA = *A;
    uint32_t BB = *B;
    uint32_t CC = *C;
    uint32_t DD = *D;
    for (int i = 0; i < 64; i++) {
      // Round 1
      // [abcd k s i] denotes:
      // a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s),
      // where,
      //        s = S[i / 16][i % 4]
      //        k = i
      if (i < 16) {
        k = i;
        *A = *B + rotatel(((*A + F(*B, *C, *D)) + ((uint32_t *)X)[k] + T[i]),
                          S[i / 16][i % 4]);
        // print k s i
      }
      // Round 2
      // [abcd k s i] denotes:
      // a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s),
      // where,
      //        s = S[i / 16][i % 4]
      //        k = ((5 * i) + 1) % 16
      else if (i < 32) {
        k = ((5 * i) + 1) % 16;
        *A = *B + rotatel(((*A + G(*B, *C, *D)) + ((uint32_t *)X)[k] + T[i]),
                          S[i / 16][i % 4]);
      }
      // Round 3
      // [abcd k s i] denotes:
      // a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s),
      // where,
      //        s = S[i / 16][i % 4]
      //        k = ((3 * i) + 5) % 16
      else if (i < 48) {
        k = ((3 * i) + 5) % 16;
        *A = *B + rotatel(((*A + H(*B, *C, *D)) + ((uint32_t *)X)[k] + T[i]),
                          S[i / 16][i % 4]);
      }
      // Round 4
      // [abcd k s i] denotes:
      // a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s),
      // where,
      //        s = S[i / 16][i % 4]
      //        k = (7 * i) % 16
      else /* if ( i < 64) */ {
        k = (7 * i) % 16;
        *A = *B + rotatel(((*A + I(*B, *C, *D)) + ((uint32_t *)X)[k] + T[i]),
                          S[i / 16][i % 4]);
      }
      // Rotate the variables;
      uint32_t *temp = A;
      A = D;
      D = C;
      C = B;
      B = temp;
    }
    *A += AA;
    *B += BB;
    *C += CC;
    *D += DD;
    X += 64;
  }
  free(M);
  return md5sum;
}

void MD5_free(void *data) { free(data); }

int main() {
  void* hash = MD5_new("hello", 5);
  printf("%08x%08x%08x%08x\n", ((uint32_t*)hash)[0], ((uint32_t*)hash)[1], ((uint32_t*)hash)[2], ((uint32_t*)hash)[3]);
  MD5_free(hash);
}

以下生成“b975c10ca8b6f1c0e299c33161267769”作为输出,而不是预期的“60b725f10c9c85c70d97880dfe8191b3”。预先感谢!

c cryptography
1个回答
0
投票

我在我的实现中发现了错误。 RFC 还提到消息摘要应该首先具有低位,因此通过执行以下操作来反转每个 A、B、C 和 D,就像魅力一样:

还要感谢@topaco 建议查看每个步骤的值并将其与其他输出进行比较。 https://rosettacode.org/wiki/MD5/Implementation_Debug帮助了我。我发现我的实现根本没有错误,只需要一些字节反转。

此外,在通过 GNU coreutils'

md5hash
获取哈希值时,我没有将
-n
标志传递给
echo
,这也改变了哈希值,否则我可能很快就弄清楚了。谢谢大家!这就是我对任何碰巧来到这里的未来访客所做的事情。确保您自己完成问题后也回答自己的问题。

  for (int i = 0; i < 4; i++) {
    uint8_t tmp1 = ((uint8_t *)md5sum)[4 * i];
    ((uint8_t *)md5sum)[4 * i] = ((uint8_t *)md5sum)[4 * i + 3];
    ((uint8_t *)md5sum)[4 * i + 3] = tmp1;
    uint8_t tmp2 = ((uint8_t *)md5sum)[4 * i + 1];
    ((uint8_t *)md5sum)[4 * i + 1] = ((uint8_t *)md5sum)[4 * i + 2];
    ((uint8_t *)md5sum)[4 * i + 2] = tmp2;
  }
 return md5sum;
© www.soinside.com 2019 - 2024. All rights reserved.