1 /*
2     This file is part of BioD.
3     Copyright (C) 2012    Artem Tarasov <lomereiter@gmail.com>
4 
5     Permission is hereby granted, free of charge, to any person obtaining a
6     copy of this software and associated documentation files (the "Software"),
7     to deal in the Software without restriction, including without limitation
8     the rights to use, copy, modify, merge, publish, distribute, sublicense,
9     and/or sell copies of the Software, and to permit persons to whom the
10     Software is furnished to do so, subject to the following conditions:
11     
12     The above copyright notice and this permission notice shall be included in
13     all copies or substantial portions of the Software.
14     
15     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21     DEALINGS IN THE SOFTWARE.
22 
23 */
24 module bio.core.base;
25 
26 import bio.core.tinymap;
27 import std.traits;
28 
29 /// Code common to both Base5 and Base16
30 mixin template CommonBaseOperations() {
31     /// Convert to char
32     char asCharacter() @property const { return _code2char[_code]; }
33     ///
34     alias asCharacter this;
35 
36 }
37 
38 /// Base representation supporting full set of IUPAC codes
39 struct Base {
40     mixin TinyMapInterface!16; 
41 
42     private enum ubyte[256] _char2code = [
43                 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
44                 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
45                 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
46                  1, 2, 4, 8, 15,15,15,15, 15,15,15,15, 15, 0,15,15,
47 
48                 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,
49                 15,15, 5, 6,  8,15, 7, 9, 15,10,15,15, 15,15,15,15,
50                 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,
51                 15,15, 5, 6,  8,15, 7, 9, 15,10,15,15, 15,15,15,15,
52 
53                 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
54                 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
55                 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
56                 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
57 
58                 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
59                 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
60                 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
61                 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15
62             ];
63 
64     // = 0000
65     //
66     // A 0001
67     // C 0010
68     // G 0100
69     // T 1000
70     //
71     // W 1001 (A T) Weak
72     // S 0110 (C G) Strong
73     //
74     // M 0011 (A C) aMino
75     // K 1100 (G T) Keto
76     // R 0101 (A G) puRine
77     // Y 1010 (A G) pYrimidine
78     //
79     // B 1110 (not A)
80     // D 1101 (not C)
81     // H 1011 (not G)
82     // V 0111 (not T)
83     //
84     // N 1111 (aNy base)
85     private enum _code2char = "=ACMGRSVTWYHKDBN";
86 
87     private enum ubyte[16] _complement_table = [0x0, 0x8, 0x4, 0xC, 
88                                                      0x2, 0xA, 0x6, 0xE,
89                                                      0x1, 0x9, 0x5, 0xD,
90                                                      0x3, 0xB, 0x7, 0xF];
91     /// Complementary base
92     Base complement() @property const {
93         // take the code, reverse the bits, and return the base
94         return Base.fromInternalCode(_complement_table[_code]);
95     }
96 
97     unittest {
98         import std.ascii;
99 
100         foreach (i, c; _code2char) {
101             assert(_char2code[c] == i);
102         }
103 
104         foreach (c; 0 .. 256) {
105             auto c2 = _code2char[_char2code[c]];
106             if (c2 != 'N') {
107                 if ('0' <= c && c <= '9') {
108                     assert(c2 == "ACGT"[c - '0']);
109                 } else {
110                     assert(c2 == toUpper(c));
111                 }
112             }
113         }
114     }
115 
116     mixin CommonBaseOperations;
117     /// Construct from IUPAC code
118     this(char c) {
119         _code = _char2code[cast(ubyte)c];
120     }
121 
122     /// ditto
123     this(dchar c) {
124         _code = _char2code[cast(ubyte)c];
125     }
126 
127     private enum ubyte[5] nt5_to_nt16 = [1, 2, 4, 8, 15];
128     private static Base fromBase5(Base5 base) {
129         Base b = void;
130         b._code = nt5_to_nt16[base.internal_code];
131         return b;
132     }
133 
134     /// Conversion to Base5
135     Base5 opCast(T)() const
136         if (is(T == Base5)) 
137     {
138         return Base5.fromBase16(this);
139     }
140 
141     T opCast(T)() const 
142         if (is(Unqual!T == char) || is(Unqual!T == dchar))
143     {
144         return asCharacter;
145     }
146 }
147 
148 unittest {
149     Base b = 'W';
150     assert(b == 'W');
151 
152     b = Base.fromInternalCode(0);
153     assert(b == '=');
154 }
155 
156 alias Base Base16;
157 
158 /// Base representation supporting only 'A', 'C', 'G', 'T', and 'N'
159 /// (internal codes are 0, 1, 2, 3, and 4 correspondingly)
160 struct Base5 {
161     mixin TinyMapInterface!5;
162 
163     private enum ubyte[256] _char2code = [
164                 4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
165                 4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
166                 4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
167                 4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
168 
169                 4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4,
170                 4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
171                 4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4,
172                 4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
173 
174                 4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
175                 4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
176                 4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
177                 4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
178 
179                 4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
180                 4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
181                 4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
182                 4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4
183                 ];
184 
185     private enum _code2char = "ACGTN";
186     private enum ubyte[16] nt16_to_nt5 = [4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4];
187 
188     mixin CommonBaseOperations;
189 
190     /// Complementary base
191     Base5 complement() @property const {
192         return Base5.fromInternalCode(cast(ubyte)(_code == 4 ? 4 : (3 - _code)));
193     }
194 
195     /// Construct base from one of "acgtACGT" symbols.
196     /// Every other character is converted to 'N'
197     this(char c) {
198         _code = _char2code[cast(ubyte)c];
199     }
200 
201     /// ditto
202     this(dchar c) {
203         _code = _char2code[cast(ubyte)c];
204     }
205 
206     private static Base5 fromBase16(Base16 base) {
207         Base5 b = void;
208         b._code = nt16_to_nt5[base.internal_code];
209         return b;
210     }
211 
212     /// Conversion to Base16
213     Base16 opCast(T)() const
214         if(is(T == Base16)) 
215     {
216         return Base16.fromBase5(this);
217     }
218 
219     T opCast(T)() const 
220         if (is(Unqual!T == char) || is(Unqual!T == dchar))
221     {
222         return asCharacter;
223     }
224 }
225 
226 unittest {
227     auto b5 = Base5('C');
228     assert(b5.internal_code == 1);
229     b5 = Base5.fromInternalCode(3);
230     assert(b5 == 'T');
231 
232     // doesn't work with std.conv.to
233     //
234     //import std.conv;
235     //assert(to!Base16(b5).internal_code == 8);
236 
237     assert((cast(Base16)b5).internal_code == 8);
238 }
239 
240 /// Complement base, which might be Base5, Base16, char, or dchar.
241 B complementBase(B)(B base) {
242     static if(is(Unqual!B == dchar) || is(Unqual!B == char))
243     {
244         return cast(B)(Base16(base).complement);
245     }
246     else
247         return base.complement;
248 }
249 
250 /// Convert character to base
251 template charToBase(B=Base16)
252 {
253     B charToBase(C)(C c)
254         if(is(Unqual!C == char) || is(Unqual!C == dchar))
255     {
256         return B(c);
257     }
258 }
259 
260 unittest {
261     assert(complementBase('T') == 'A');
262     assert(complementBase('G') == 'C');
263 
264     assert(complementBase(Base5('A')) == Base5('T'));
265     assert(complementBase(Base16('C')) == Base16('G'));
266 
267     assert(charToBase!Base16('A').complement == Base16('T'));
268 }