"foo bar" and "bar foo" hash to the same value right? Implement it in such a way that the ascii value and its position in the string are used to compute the hash, I naively imagine this will reduce collision significantly.
int hash(char* s) { int hash = 0; int pos = 0; while(*s) { pos++; hash += (*s * pos); s++; } return hash; }
Try this and see if it helps. I dont have much theoretical knowledge behind this answer.
EDIT* as mentioned below, you would probably want hash to be an unsigned int. I tested this out on codechef.com, here is the source and results:
#include <stdio.h> unsigned int hash(char* s); unsigned int hash2(char* s); int main(void) { unsigned int temp1 = hash("foo bar"); unsigned int temp2 = hash("bar foo"); printf("temp1 is %d and temp2 is %d\n",temp1, temp2); temp1 = hash2("foo bar"); temp2 = hash2("bar foo"); printf("temp1 is %d and temp2 is %d\n",temp1, temp2); return 0; } unsigned int hash(char* s) { unsigned int hash = 0; while(*s) { hash = hash + *s; s++; } return hash; } unsigned int hash2(char* s) { unsigned int hash = 0; int pos = 0; while(*s) { pos++; hash += (*s * pos); s++; } return hash; }
With output:
temp1 is 665 and temp2 is 665
temp1 is 2655 and temp2 is 2715
hash("god") == hash("dog")