1

I want to implement an MD5 hashing function in a C project, and I want to make my own, because one thing I am afraid of is using someone else's code (mainly because I have trouble understanding such codes). So, I headed straight to the Wiki page for the pseudocode: http://en.wikipedia.org/wiki/MD5 I decided to leave the padding and breaking into 512 bit chunks stuff for later and just start with an MD5 hash of an empty string. Properly padded, it should (I think) look like this:

unsigned int message[16] = {0x80000000, 0x00000000, 0x00000000, 0x00000000,//binary: 100000000000000000000000... 0x00000000, 0x00000000, 0x00000000, 0x00000000,// ...0000000000000000000000000000... 0x00000000, 0x00000000, 0x00000000, 0x00000000,// ...0000000000000000000000000000... 0x00000000, 0x00000000, 0x00000000, 0x00000000};//...0000000000000000000000000000000 

And here is how I reproduced Wiki's main loop (the one that deals with just one 512-bit chunk) in C:

unsigned int a0, b0, c0, d0, A, B, C, D, i, F, g, bufD; //empty string appended with 1 bit and zeroes till it is 512 bytes long as 16 32-bit words unsigned int message[16] = {0x80000000, 0x00000000, 0x00000000, 0x00000000,//binary: 100000000000000000000000... 0x00000000, 0x00000000, 0x00000000, 0x00000000,// ...0000000000000000000000000000... 0x00000000, 0x00000000, 0x00000000, 0x00000000,// ...0000000000000000000000000000... 0x00000000, 0x00000000, 0x00000000, 0x00000000};//...0000000000000000000000000000000 unsigned int shiftAmounts[64] = {7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21}; unsigned int partsOfSines[64] = {0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391}; a0 = 0x67452301; b0 = 0xefcdab89; c0 = 0x98badcfe; d0 = 0x10325476; //gotta put this into a loop for each 512-bit chunk A = a0; B = b0; C = c0; D = d0; for (i=0; i<64; i++){ if (i < 16){ F = (B & C) | (~B & D); g = i; } else if (i >= 16 && i < 32){ F = (D & B) | (~D & C); g = (5*i + 1) % 16; } else if (i >= 32 && i < 48){ F = B ^ C ^ D; g = (3*i + 5) % 16; } else if (i >= 48){ F = C ^ (B | ~D); g = (7*i) % 16; } bufD = D; D = C; C = B; B += leftRotate((A + F + partsOfSines[i] + message[g]), shiftAmounts[i]); A = bufD; } a0 += A; b0 += B; c0 += C; d0 += D; //end future loop printf ("a0=%u, b0=%u, c0=%u, d0=%u", a0, b0, c0, d0); 

The leftRotate function, also modified from the pseudocode on Wikipedia:

unsigned int leftRotate(unsigned int x, int n){ return ((x) << n) | ((x) >> (32 - n)); } 

This outputs the following:

a0=578518856, b0=3524428790, c0=1003076545, d0=1531243034 

Converting these decimal values to hex, I get the following:

a0 = 578518856 -> 227B7F48 b0 = 3524428790 -> D21283F6 c0 = 1003076545 -> 3BC9BBC1 d0 = 1531243034 -> 5B44EA1A 

Which isn't even a bit similar to the actual MD5 digest of an empty string (which is d41d8cd98f00b204e9800998ecf8427e). So, my question is, where am I going wrong?

9
  • Show your leftRotate function. Commented Jun 29, 2014 at 20:31
  • 2
    If you're doing this on a little-endian system, and the first bit of the message is 1, shouldn't message[0] be 0x00000080 rather than 0x80000000 ? Commented Jun 29, 2014 at 20:34
  • 1
    @MarkPlotnick Yep, that's it. And the bytes of the answer need to be reordered as well if they're accessed as 4-byte unsigned ints. Best to output them as a sequence of unsigned chars. Commented Jun 29, 2014 at 20:36
  • @Mark Plotnick: perhaps you meant 0x00000008, not 0x00000080? and how would I go about reordering the bytes of the answer, will swapping the hex values in the end suffice? Commented Jun 29, 2014 at 20:39
  • 1
    Nope, he meant 0x00000080. It's the bytes (not the nybbles!) that are reversed. Commented Jun 29, 2014 at 20:40

2 Answers 2

5
#include <stdio.h> //empty string appended with 1 bit and zeroes till it is 512 bytes long as 16 32-bit words unsigned int message[16] = {0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}; unsigned int shiftAmounts[64] = {7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21}; unsigned int partsOfSines[64] = {0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391}; unsigned leftRotate(unsigned x, int n) { return (x << n) | (x >> (32 - n)); } void printReverseEndian(unsigned n) { printf("%02x%02x%02x%02x", n & 0xff, (n >> 8) & 0xff, (n >> 16) & 0xff, n >> 24); } int main() { unsigned int a0, b0, c0, d0, A, B, C, D, i, F, g, bufD; a0 = 0x67452301; b0 = 0xefcdab89; c0 = 0x98badcfe; d0 = 0x10325476; A = a0; B = b0; C = c0; D = d0; for (i=0; i<64; i++){ if (i < 16){ F = (B & C) | (~B & D); g = i; } else if (i < 32){ F = (D & B) | (~D & C); g = (5*i + 1) % 16; } else if (i < 48){ F = B ^ C ^ D; g = (3*i + 5) % 16; } else { F = C ^ (B | ~D); g = (7*i) % 16; } bufD = D; D = C; C = B; B += leftRotate((A + F + partsOfSines[i] + message[g]), shiftAmounts[i]); A = bufD; } a0 += A; b0 += B; c0 += C; d0 += D; printReverseEndian(a0); printReverseEndian(b0); printReverseEndian(c0); printReverseEndian(d0); return 0; } 
Sign up to request clarification or add additional context in comments.

3 Comments

@Mints97 How is there no main??? And it doesn't need prototypes since the functions are defined before main.
sorry, sorry, I am always that slow at night =) I edited my original comment =)
Wait so how do I input and get the output?
2

Might be a problem of target architecture (endianness, size of types, ...)
My answer comes from bits of http://bytes.com/topic/c/answers/855645-simple-md5-hash-different-output-different-os

Edit:
The problem is because the bit of padding you added isn't on the correct byte considering little-endian storage in memory. Moreover, you must print the resulting md5 hash as it is stored in memory (in LE).

#include <stdio.h> unsigned int leftRotate(unsigned int x, unsigned int n){ return (x << n) | (x >> (32 - n)); } int main(){ unsigned int a0, b0, c0, d0, A, B, C, D, i, F, g, bufD; //empty string appended with 1 bit and zeroes till it is 512 bytes long as 16 32-bit words unsigned int message[16] = {0x00000080, 0x00000000, 0x00000000, 0x00000000,//binary: 100000000000000000000000... 0x00000000, 0x00000000, 0x00000000, 0x00000000,// ...0000000000000000000000000000... 0x00000000, 0x00000000, 0x00000000, 0x00000000,// ...0000000000000000000000000000... 0x00000000, 0x00000000, 0x00000000, 0x00000000};//...0000000000000000000000000000000 unsigned int shiftAmounts[64] = {7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21}; unsigned int partsOfSines[64] = {0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391}; a0 = 0x67452301; b0 = 0xefcdab89; c0 = 0x98badcfe; d0 = 0x10325476; //gotta put this into a loop for each 512-bit chunk A = a0; B = b0; C = c0; D = d0; for (i=0; i<64; i++){ if (i < 16){ F = (B & C) | (~B & D); g = i; } else if (i >= 16 && i < 32){ F = (D & B) | (~D & C); g = (5*i + 1) % 16; } else if (i >= 32 && i < 48){ F = B ^ C ^ D; g = (3*i + 5) % 16; } else if (i >= 48){ F = C ^ (B | ~D); g = (7*i) % 16; } bufD = D; D = C; C = B; B += leftRotate((A + F + partsOfSines[i] + message[g]), shiftAmounts[i]); A = bufD; } a0 += A; b0 += B; c0 += C; d0 += D; //end future loop printf("%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", a0&0xff, (a0>>8)&0xff, (a0>>16)&0xff, (a0>>24)&0xff, b0&0xff, (b0>>8)&0xff, (b0>>16)&0xff, (b0>>24)&0xff, c0&0xff, (c0>>8)&0xff, (c0>>16)&0xff, (c0>>24)&0xff, d0&0xff, (d0>>8)&0xff, (d0>>16)&0xff, (d0>>24)&0xff); } 

Edit2:
Ahhhh, too late :/

2 Comments

endianness is just the matter of the order of bytes in the end, but, however I rearrange the bytes of the hexadecimal strings, I still don't get the correct digest. And I've checked sizeof(unsigned int), and it is 4 on my OS (Windows 8 x64) as it should be.
Windows implies x86 implies little-endian, so it's probably not that.