1 module bio.core.kmer; 2 3 import bio.core.base; 4 import std.range; 5 6 /// Represents k-mer of ACGT bases of length no more than 32. 7 struct KMer(uint K) 8 if (K <= 32) 9 { 10 private ulong _id; 11 12 static Base5 code2base(int code) { 13 return Base5("ACGT"[code]); 14 } 15 16 static int char2code(char base) { 17 switch (base) { 18 case 'A': return 0; 19 case 'C': return 1; 20 case 'G': return 2; 21 case 'T': return 3; 22 default: return -1; 23 } 24 } 25 26 /// Unique ID 27 ulong id() @property const { 28 return _id; 29 } 30 31 /// Construct by ID 32 this(S)(S id) 33 if (is(S == ulong)) 34 { 35 _id = id; 36 } 37 38 /// Construct from sequence. Takes bases from the provided sequence 39 /// until K symbols 'A/C/G/T' are found. That is, 'N' and other ambiguous 40 /// bases are skipped. 41 /// 42 /// If sequence does not contain at least K bases 'A/C/G/T', the result of 43 /// operation is undefined. 44 this(S)(S sequence) 45 if (isInputRange!S) 46 { 47 size_t i = 0; 48 foreach (nuc; sequence) { 49 _id <<= 2; 50 ++i; 51 switch (cast(char)nuc) { 52 case 'A': 53 break; 54 case 'C': 55 _id += 1; 56 break; 57 case 'G': 58 _id += 2; 59 break; 60 case 'T': 61 _id += 3; 62 break; 63 default: 64 _id >>= 2; 65 --i; 66 break; 67 } 68 69 if (i == K) 70 break; 71 } 72 } 73 74 struct KMerSequence { 75 this(ulong number) { 76 _n = number; 77 } 78 79 private ulong _n; 80 private size_t _len = K; 81 82 bool empty() @property const { return _len == 0; } 83 void popFront() { --_len; } 84 void popBack() { --_len; _n >>= 2; } 85 86 Base5 opIndex(size_t i) const { 87 return code2base((_n >> (2 * (_len - i - 1))) & 3); 88 } 89 90 size_t length() @property const { return _len; } 91 Base5 front() @property const { return opIndex(0); } 92 Base5 back() @property const { return opIndex(_len - 1); } 93 KMerSequence save() @property const { 94 KMerSequence _seq = void; 95 _seq._n = _n; 96 _seq._len = _len; 97 return _seq; 98 } 99 } 100 101 /// Sequence corresponding to the k-mer 102 KMerSequence sequence() @property const { 103 return KMerSequence(_id); 104 } 105 } 106 107 unittest { 108 import std.algorithm; 109 auto kmer = KMer!10("AACGTACGTG"); 110 assert(equal(kmer.sequence, "AACGTACGTG")); 111 112 assert(KMer!5(KMer!5(0b1011001001UL).sequence).id == 0b1011001001UL); 113 }