1 /* 2 This file is part of Sambamba. 3 Copyright (C) 2017 Pjotr Prins <pjotr.prins@thebird.nl> 4 5 Sambamba is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published 7 by the Free Software Foundation; either version 2 of the License, 8 or (at your option) any later version. 9 10 Sambamba is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software 17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 18 02111-1307 USA 19 20 */ 21 22 module bio.std.experimental.hts.hashing; 23 24 import std.conv; 25 import std.stdio; 26 27 pragma(inline): 28 pure nothrow @nogc ushort get16bits(char *p) 29 { 30 ushort p0 = p[0]; 31 ushort p1 = p[1]; 32 // return p[1]*256+p[0]; 33 return cast(ushort)(p1*256+p0); 34 } 35 36 /// Paul Hsieh's fast hash (LGPL license), see http://www.azillionmonkeys.com/qed/hash.html 37 pure nothrow @nogc uint SuperFastHash(string str, uint hashinc = 0) { 38 auto data = cast(char*)str.ptr; 39 auto len = cast(uint)str.length; 40 41 uint hash = (hashinc > 0 ? hashinc : 0); 42 43 if (len == 0) return 0; 44 45 int rem = len & 3; 46 len >>= 2; 47 48 /* Main loop */ 49 for (;len > 0; len--) { 50 hash += get16bits(data); 51 uint tmp = (get16bits(data+2) << 11) ^ hash; 52 hash = (hash << 16) ^ tmp; 53 data += 2*ushort.sizeof; 54 hash += hash >> 11; 55 } 56 57 /* Handle end cases */ 58 switch (rem) { 59 case 3: hash += get16bits(data); 60 hash ^= hash << 16; 61 hash ^= (cast(ushort)data[ushort.sizeof]) << 18; 62 hash += hash >> 11; 63 break; 64 case 2: hash += get16bits(data); 65 hash ^= hash << 11; 66 hash += hash >> 17; 67 break; 68 case 1: hash += cast(ushort)*data; 69 hash ^= hash << 10; 70 hash += hash >> 1; 71 break; 72 default: 73 break; 74 } 75 76 /* Force "avalanching" of final 127 bits */ 77 hash ^= hash << 3; 78 hash += hash >> 5; 79 hash ^= hash << 4; 80 hash += hash >> 17; 81 hash ^= hash << 25; 82 hash += hash >> 6; 83 84 return hash; 85 } 86 87 88 unittest { 89 // Make sure it behaves the same as the original 90 auto test = get16bits(cast(char *)("xy".ptr)); 91 assert(test == 31096, to!string(test)); 92 auto test2 = get16bits(cast(char *)("xyz".ptr)); 93 assert(test2 == 31096, to!string(test2)); 94 assert(get16bits(cast(char *)"xyz".ptr) == 31096); 95 assert(SuperFastHash("*")==1029965590,to!string(SuperFastHash("*"))); 96 assert(SuperFastHash("hst")==1867544282,to!string(SuperFastHash("hst"))); 97 assert(SuperFastHash("hstiaashccaht")==2173265029,to!string(SuperFastHash("hstiaashccaht"))); 98 assert(SuperFastHash("Pjotr")==2808102289,to!string(SuperFastHash("Pjotr"))); 99 }