1 /*
2     This file is part of Sambamba.
3     Copyright (C) 2017 Pjotr Prins <pjotr.prins@thebird.nl>
4 
5     Sambamba is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published
7     by the Free Software Foundation; either version 2 of the License,
8     or (at your option) any later version.
9 
10     Sambamba is distributed in the hope that it will be useful, but
11     WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13     General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with this program; if not, write to the Free Software
17     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18     02111-1307 USA
19 
20 */
21 
22 module bio.std.experimental.hts.hashing;
23 
24 import std.conv;
25 import std.stdio;
26 
27 pragma(inline):
28 pure nothrow @nogc ushort get16bits(char *p)
29 {
30   ushort p0 = p[0];
31   ushort p1 = p[1];
32   // return p[1]*256+p[0];
33   return cast(ushort)(p1*256+p0);
34 }
35 
36 /// Paul Hsieh's fast hash (LGPL license), see http://www.azillionmonkeys.com/qed/hash.html
37 pure nothrow @nogc uint SuperFastHash(string str, uint hashinc = 0) {
38   auto data = cast(char*)str.ptr;
39   auto len  = cast(uint)str.length;
40 
41   uint hash = (hashinc > 0 ? hashinc : 0);
42 
43   if (len == 0) return 0;
44 
45   int rem = len & 3;
46   len >>= 2;
47 
48   /* Main loop */
49   for (;len > 0; len--) {
50     hash  += get16bits(data);
51     uint tmp    = (get16bits(data+2) << 11) ^ hash;
52     hash   = (hash << 16) ^ tmp;
53     data  += 2*ushort.sizeof;
54     hash  += hash >> 11;
55   }
56 
57   /* Handle end cases */
58   switch (rem) {
59   case 3: hash += get16bits(data);
60     hash ^= hash << 16;
61     hash ^= (cast(ushort)data[ushort.sizeof]) << 18;
62     hash += hash >> 11;
63     break;
64   case 2: hash += get16bits(data);
65     hash ^= hash << 11;
66     hash += hash >> 17;
67     break;
68   case 1: hash += cast(ushort)*data;
69     hash ^= hash << 10;
70     hash += hash >> 1;
71     break;
72   default:
73     break;
74   }
75 
76   /* Force "avalanching" of final 127 bits */
77   hash ^= hash << 3;
78   hash += hash >> 5;
79   hash ^= hash << 4;
80   hash += hash >> 17;
81   hash ^= hash << 25;
82   hash += hash >> 6;
83 
84   return hash;
85 }
86 
87 
88 unittest {
89   // Make sure it behaves the same as the original
90   auto test = get16bits(cast(char *)("xy".ptr));
91   assert(test == 31096, to!string(test));
92   auto test2 = get16bits(cast(char *)("xyz".ptr));
93   assert(test2 == 31096, to!string(test2));
94   assert(get16bits(cast(char *)"xyz".ptr) == 31096);
95   assert(SuperFastHash("*")==1029965590,to!string(SuperFastHash("*")));
96   assert(SuperFastHash("hst")==1867544282,to!string(SuperFastHash("hst")));
97   assert(SuperFastHash("hstiaashccaht")==2173265029,to!string(SuperFastHash("hstiaashccaht")));
98   assert(SuperFastHash("Pjotr")==2808102289,to!string(SuperFastHash("Pjotr")));
99 }