I want to use a byte array as a lookup key in a concurentDictionary. Currently I solve this by using a custom EqualityComparer<byte[]>.
This works fine, but I do realize that my hashcode generator will generate a lot of overlaps where things end up in the same hash bucket.
public class ByteArrayEqualityComparer : EqualityComparer<byte[]> { public override bool Equals(byte[] x, byte[] y) { //fast buffer compare return UnsafeCompare(x, y); } public override int GetHashCode(byte[] obj) { int hash = 0; for (int i = 0; i < obj.Length; i += 2) { hash += obj[i]; //xor? shift? black magic? } return hash; } } What would be a good formula to create a relatively fast hash from the byte array?
My idea is that I can calculate the hashcode by skipping every x bytes for speed. As the final comparison is still done over the full data set, it seems pointless to compare all bytes multiple times.
I imagine that some xor magic and shifting on the hash var would make things better.
This is extremely performance critical, so any shortcut that can be used is welcome too.
[edit] I ended up using this solution. I use a struct to wrap the byte array so that I can use a cached hashcode for it instead of calculating it for each comparison. This resulted in a very nice performance gain.
public struct ByteArrayKey { public readonly byte[] Bytes; private readonly int _hashCode; public override bool Equals(object obj) { var other = (ByteArrayKey) obj; return Compare(Bytes, other.Bytes); } public override int GetHashCode() { return _hashCode; } private static int GetHashCode([NotNull] byte[] bytes) { unchecked { var hash = 17; for (var i = 0; i < bytes.Length; i++) { hash = hash*23 + bytes[i]; } return hash; } } public ByteArrayKey(byte[] bytes) { Bytes = bytes; _hashCode = GetHashCode(bytes); } public static ByteArrayKey Create(byte[] bytes) { return new ByteArrayKey(bytes); } public static unsafe bool Compare(byte[] a1, byte[] a2) { if (a1 == null || a2 == null || a1.Length != a2.Length) return false; fixed (byte* p1 = a1, p2 = a2) { byte* x1 = p1, x2 = p2; var l = a1.Length; for (var i = 0; i < l/8; i++, x1 += 8, x2 += 8) if (*(long*) x1 != *(long*) x2) return false; if ((l & 4) != 0) { if (*(int*) x1 != *(int*) x2) return false; x1 += 4; x2 += 4; } if ((l & 2) != 0) { if (*(short*) x1 != *(short*) x2) return false; x1 += 2; x2 += 2; } if ((l & 1) != 0) if (*x1 != *x2) return false; return true; } } }