1 /* 2 This file is part of BioD. 3 Copyright (C) 2012 Artem Tarasov <lomereiter@gmail.com> 4 5 Permission is hereby granted, free of charge, to any person obtaining a 6 copy of this software and associated documentation files (the "Software"), 7 to deal in the Software without restriction, including without limitation 8 the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 and/or sell copies of the Software, and to permit persons to whom the 10 Software is furnished to do so, subject to the following conditions: 11 12 The above copyright notice and this permission notice shall be included in 13 all copies or substantial portions of the Software. 14 15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 DEALINGS IN THE SOFTWARE. 22 23 */ 24 module bio.core.sequence; 25 26 import bio.core.base; 27 28 import std.algorithm; 29 import std.range; 30 import std.conv; 31 import std.traits; 32 public import std.array; 33 34 /// Identity function 35 T identity(T)(auto ref T t) { return t; } 36 37 /// Range that allows to unify operations in forward and reverse directions 38 /// without virtual function call overhead introduced by $(D inputRangeObject). 39 /// 40 /// $(D reverseTransform) is a function that will be applied to elements 41 /// if range is iterated backwards. 42 struct ReversableRange(alias reverseTransform=identity, R) 43 if(isBidirectionalRange!R) 44 { 45 private 46 { 47 bool _rev = void; 48 R _range = void; 49 } 50 51 /// Construct reversable range. 52 /// 53 /// Params: 54 /// range = bidirectional range 55 /// reverse = if true, all operations on the range will be as if 56 /// $(D retro(range)) was used instead of $(D range). 57 this(R range, bool reverse=false) 58 { 59 _rev = reverse; 60 _range = range; 61 } 62 63 /// Bidirectional range primitives 64 bool empty() @property 65 { 66 return _range.empty; 67 } 68 69 /// ditto 70 auto front() @property 71 { 72 return _rev ? reverseTransform(_range.back) : _range.front; 73 } 74 75 /// ditto 76 auto back() @property 77 { 78 return _rev ? reverseTransform(_range.front) : _range.back; 79 } 80 81 /// ditto 82 void popFront() 83 { 84 if (_rev) 85 _range.popBack(); 86 else 87 _range.popFront(); 88 } 89 90 /// ditto 91 void popBack() 92 { 93 if (_rev) 94 _range.popFront(); 95 else 96 _range.popBack(); 97 } 98 99 /// ditto 100 auto save() @property 101 { 102 return ReversableRange(_range.save, _rev); 103 } 104 105 /// Reverse of this range 106 ReversableRange reverse() @property { 107 return ReversableRange(_range.save, !_rev); 108 } 109 110 static if(hasLength!R) 111 { 112 /// If source range has length, the result also has length 113 size_t length() @property 114 { 115 return _range.length; 116 } 117 } 118 119 static if(isRandomAccessRange!R) 120 { 121 /// If source range is a random access range, $(D opIndex) is defined 122 auto opIndex(size_t index) 123 { 124 if (_rev) 125 return reverseTransform(_range[_range.length - 1 - index]); 126 else 127 return _range[index]; 128 } 129 } 130 131 static if(hasSlicing!R) 132 { 133 /// Slicing is also propagated 134 auto opSlice(size_t from, size_t to) 135 { 136 if (_rev) 137 { 138 auto len = _range.length; 139 // 140 // [b, e) -> (len - 1 - e, len - 1 - b] ~ [len - e, len - b) 141 // 142 return ReversableRange(_range[len - to .. len - from], true); 143 } 144 else 145 return ReversableRange(_range[from .. to], false); 146 } 147 } 148 } 149 150 /// Create reversable range from bidirectional one. 151 ReversableRange!(reverseTransform, R) 152 reversableRange(alias reverseTransform=identity, R)(R range, bool reverse=false) 153 { 154 return typeof(return)(range, reverse); 155 } 156 157 unittest { 158 import std.stdio; 159 writeln("BioD: running unittests"); 160 auto bidir_range = [1, 2, 3, 4, 5]; 161 auto rev = reversableRange(bidir_range[], true); 162 163 assert(rev.front == 5); 164 assert(rev[2] == 3); 165 rev.popFront(); 166 assert(rev.back == 1); 167 assert(rev.front == 4); 168 assert(equal(rev[1 .. 3], [3, 2])); 169 170 // Here. That's the whole point. 171 // One can't do the same with $(D retro) 172 // without using $(D inputRangeObject), 173 // but that kills performance because 174 // virtual calls can not be inlined. 175 rev = reversableRange(bidir_range[], false); 176 177 assert(rev.front == 1); 178 assert(equal(rev[1 .. 3], [2, 3])); 179 } 180 181 182 /// Sequence of bases. Element of reversed range will be complemented. 183 template Sequence(R) 184 { 185 alias ReversableRange!(complementBase, R) Sequence; 186 } 187 188 /// Returns an object very similar to string, but sliceable. 189 /// Tricks std.traits.isNarrowString. 190 auto sliceableString(string s) { 191 return map!"cast(char)a"(cast(ubyte[])s); 192 } 193 194 /// 195 alias ReturnType!sliceableString SliceableString; 196 197 /// Create nucleotide sequence from bidirectional base range. 198 auto nucleotideSequence(R)(R bases, bool reverse=false) 199 if(isBidirectionalRange!R) 200 { 201 202 static if(isNarrowString!R) 203 { 204 return nucleotideSequence(sliceableString(bases), reverse); 205 } 206 else static if(is(Unqual!(ElementType!R) == char) || 207 is(Unqual!(ElementType!R) == dchar)) 208 { 209 return nucleotideSequence(map!(charToBase!Base16)(bases), reverse); 210 } 211 else 212 { 213 return Sequence!R(bases, reverse); 214 } 215 } 216 217 /// 218 alias ReturnType!(nucleotideSequence!SliceableString) NucleotideSequence; 219 220 unittest { 221 auto seq0 = nucleotideSequence("ACGTACGT"); 222 223 // reverse-complement 224 assert(equal(seq0.reverse[2 .. 6], "GTAC")); 225 226 auto seq1 = nucleotideSequence(seq0, true); 227 assert(equal(seq1[1 .. 5], "CGTA")); 228 assert(equal(seq1, map!complementBase(retro(seq0)))); 229 230 seq1 = nucleotideSequence(seq0, false); 231 assert(equal(seq1, seq0)); 232 }