1 module bio.std.hts.sam.utils.recordparser; 2 3 #line 1 "sam_alignment.rl" 4 /* 5 This file is part of BioD. 6 Copyright (C) 2012 Artem Tarasov <lomereiter@gmail.com> 7 8 Permission is hereby granted, free of charge, to any person obtaining a 9 copy of this software and associated documentation files (the "Software"), 10 to deal in the Software without restriction, including without limitation 11 the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 and/or sell copies of the Software, and to permit persons to whom the 13 Software is furnished to do so, subject to the following conditions: 14 15 The above copyright notice and this permission notice shall be included in 16 all copies or substantial portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 DEALINGS IN THE SOFTWARE. 25 26 */ 27 28 #line 28 "sam_alignment.d" 29 static byte[] _sam_alignment_actions = [ 30 0, 1, 0, 1, 2, 1, 4, 1, 31 6, 1, 7, 1, 8, 1, 9, 1, 32 10, 1, 11, 1, 12, 1, 13, 1, 33 14, 1, 15, 1, 16, 1, 17, 1, 34 18, 1, 19, 1, 21, 1, 22, 1, 35 23, 1, 27, 1, 28, 1, 29, 1, 36 30, 1, 31, 1, 32, 1, 33, 1, 37 34, 1, 35, 1, 36, 1, 37, 1, 38 39, 1, 40, 1, 41, 1, 42, 1, 39 43, 1, 44, 1, 45, 1, 46, 1, 40 48, 1, 49, 1, 51, 1, 53, 1, 41 57, 1, 60, 1, 61, 1, 62, 1, 42 63, 1, 64, 2, 1, 2, 2, 3, 43 38, 2, 3, 58, 2, 5, 59, 2, 44 6, 7, 2, 20, 23, 2, 24, 25, 45 2, 26, 29, 2, 47, 50, 2, 55, 46 64, 2, 56, 64, 3, 3, 52, 64, 47 3, 3, 58, 64, 3, 5, 54, 64, 48 3, 5, 59, 64, 3, 26, 1, 2 49 50 ]; 51 52 static short[] _sam_alignment_key_offsets = [ 53 0, 0, 5, 7, 10, 15, 18, 20, 54 23, 25, 28, 31, 32, 36, 39, 41, 55 44, 48, 50, 53, 60, 61, 63, 67, 56 73, 74, 80, 81, 83, 84, 91, 92, 57 96, 98, 99, 106, 110, 112, 116, 118, 58 119, 120, 121, 122, 123, 129, 130, 132, 59 133, 140, 144, 146, 150, 152, 153, 154, 60 155, 156, 157, 161, 163, 170, 173, 176, 61 179, 182, 185, 188, 191, 194, 197, 200, 62 203, 206, 209, 212, 215, 218, 219, 222, 63 225, 228, 231, 234, 237, 240, 243, 246, 64 249, 252, 255, 258, 261, 264, 267, 268, 65 269, 270, 281, 292, 303, 314, 325, 336, 66 347, 358, 369, 380, 391, 402, 413, 424, 67 435, 446, 457, 466, 469, 472, 475, 478, 68 481, 484, 487, 490, 493, 496, 499, 502, 69 505, 508, 511, 514, 517, 518, 521, 524, 70 527, 530, 533, 536, 539, 542, 545, 548, 71 551, 554, 557, 560, 563, 566, 567, 568, 72 571, 574, 577, 580, 583, 586, 589, 592, 73 595, 598, 601, 604, 607, 610, 613, 616, 74 617, 622, 623, 624, 625, 626, 627, 628, 75 629, 630, 631, 632, 633, 634, 635, 636, 76 637, 638, 639, 640, 641, 642, 643, 644, 77 647, 648, 652, 656, 660, 664, 668, 672, 78 676, 680, 684, 688, 692, 696, 700, 704, 79 708, 712, 716, 718, 724, 728, 735, 737, 80 744, 747, 752, 755, 761, 762, 765, 768, 81 771, 774, 777, 780, 783, 786, 789, 792, 82 795, 798, 801, 804, 807, 810, 813, 814, 83 814, 814, 814, 814, 814, 814, 814, 814, 84 814, 814, 814, 814 85 ]; 86 87 static char[] _sam_alignment_trans_keys = [ 88 9u, 33u, 63u, 65u, 126u, 48u, 57u, 9u, 89 48u, 57u, 42u, 33u, 60u, 62u, 126u, 9u, 90 33u, 126u, 48u, 57u, 9u, 48u, 57u, 48u, 91 57u, 9u, 48u, 57u, 42u, 48u, 57u, 9u, 92 42u, 61u, 33u, 126u, 9u, 33u, 126u, 48u, 93 57u, 9u, 48u, 57u, 43u, 45u, 48u, 57u, 94 48u, 57u, 9u, 48u, 57u, 42u, 46u, 61u, 95 65u, 90u, 97u, 122u, 9u, 33u, 126u, 65u, 96 90u, 97u, 122u, 48u, 57u, 65u, 90u, 97u, 97 122u, 58u, 65u, 66u, 72u, 90u, 102u, 105u, 98 58u, 33u, 126u, 58u, 67u, 73u, 83u, 99u, 99 102u, 105u, 115u, 44u, 43u, 45u, 48u, 57u, 100 48u, 57u, 44u, 43u, 45u, 46u, 105u, 110u, 101 48u, 57u, 46u, 105u, 48u, 57u, 48u, 57u, 102 43u, 45u, 48u, 57u, 48u, 57u, 110u, 102u, 103 97u, 110u, 58u, 48u, 57u, 65u, 70u, 97u, 104 102u, 58u, 32u, 126u, 58u, 43u, 45u, 46u, 105 105u, 110u, 48u, 57u, 46u, 105u, 48u, 57u, 106 48u, 57u, 43u, 45u, 48u, 57u, 48u, 57u, 107 110u, 102u, 97u, 110u, 58u, 43u, 45u, 48u, 108 57u, 48u, 57u, 9u, 46u, 61u, 65u, 90u, 109 97u, 122u, 9u, 48u, 57u, 9u, 48u, 57u, 110 9u, 48u, 57u, 9u, 48u, 57u, 9u, 48u, 111 57u, 9u, 48u, 57u, 9u, 48u, 57u, 9u, 112 48u, 57u, 9u, 48u, 57u, 9u, 48u, 57u, 113 9u, 48u, 57u, 9u, 48u, 57u, 9u, 48u, 114 57u, 9u, 48u, 57u, 9u, 48u, 57u, 9u, 115 48u, 57u, 9u, 9u, 48u, 57u, 9u, 48u, 116 57u, 9u, 48u, 57u, 9u, 48u, 57u, 9u, 117 48u, 57u, 9u, 48u, 57u, 9u, 48u, 57u, 118 9u, 48u, 57u, 9u, 48u, 57u, 9u, 48u, 119 57u, 9u, 48u, 57u, 9u, 48u, 57u, 9u, 120 48u, 57u, 9u, 48u, 57u, 9u, 48u, 57u, 121 9u, 48u, 57u, 9u, 9u, 9u, 61u, 68u, 122 80u, 83u, 88u, 48u, 57u, 72u, 73u, 77u, 123 78u, 61u, 68u, 80u, 83u, 88u, 48u, 57u, 124 72u, 73u, 77u, 78u, 61u, 68u, 80u, 83u, 125 88u, 48u, 57u, 72u, 73u, 77u, 78u, 61u, 126 68u, 80u, 83u, 88u, 48u, 57u, 72u, 73u, 127 77u, 78u, 61u, 68u, 80u, 83u, 88u, 48u, 128 57u, 72u, 73u, 77u, 78u, 61u, 68u, 80u, 129 83u, 88u, 48u, 57u, 72u, 73u, 77u, 78u, 130 61u, 68u, 80u, 83u, 88u, 48u, 57u, 72u, 131 73u, 77u, 78u, 61u, 68u, 80u, 83u, 88u, 132 48u, 57u, 72u, 73u, 77u, 78u, 61u, 68u, 133 80u, 83u, 88u, 48u, 57u, 72u, 73u, 77u, 134 78u, 61u, 68u, 80u, 83u, 88u, 48u, 57u, 135 72u, 73u, 77u, 78u, 61u, 68u, 80u, 83u, 136 88u, 48u, 57u, 72u, 73u, 77u, 78u, 61u, 137 68u, 80u, 83u, 88u, 48u, 57u, 72u, 73u, 138 77u, 78u, 61u, 68u, 80u, 83u, 88u, 48u, 139 57u, 72u, 73u, 77u, 78u, 61u, 68u, 80u, 140 83u, 88u, 48u, 57u, 72u, 73u, 77u, 78u, 141 61u, 68u, 80u, 83u, 88u, 48u, 57u, 72u, 142 73u, 77u, 78u, 61u, 68u, 80u, 83u, 88u, 143 48u, 57u, 72u, 73u, 77u, 78u, 61u, 68u, 144 80u, 83u, 88u, 48u, 57u, 72u, 73u, 77u, 145 78u, 61u, 68u, 80u, 83u, 88u, 72u, 73u, 146 77u, 78u, 9u, 48u, 57u, 9u, 48u, 57u, 147 9u, 48u, 57u, 9u, 48u, 57u, 9u, 48u, 148 57u, 9u, 48u, 57u, 9u, 48u, 57u, 9u, 149 48u, 57u, 9u, 48u, 57u, 9u, 48u, 57u, 150 9u, 48u, 57u, 9u, 48u, 57u, 9u, 48u, 151 57u, 9u, 48u, 57u, 9u, 48u, 57u, 9u, 152 48u, 57u, 9u, 48u, 57u, 9u, 9u, 48u, 153 57u, 9u, 48u, 57u, 9u, 48u, 57u, 9u, 154 48u, 57u, 9u, 48u, 57u, 9u, 48u, 57u, 155 9u, 48u, 57u, 9u, 48u, 57u, 9u, 48u, 156 57u, 9u, 48u, 57u, 9u, 48u, 57u, 9u, 157 48u, 57u, 9u, 48u, 57u, 9u, 48u, 57u, 158 9u, 48u, 57u, 9u, 48u, 57u, 9u, 9u, 159 9u, 48u, 57u, 9u, 48u, 57u, 9u, 48u, 160 57u, 9u, 48u, 57u, 9u, 48u, 57u, 9u, 161 48u, 57u, 9u, 48u, 57u, 9u, 48u, 57u, 162 9u, 48u, 57u, 9u, 48u, 57u, 9u, 48u, 163 57u, 9u, 48u, 57u, 9u, 48u, 57u, 9u, 164 48u, 57u, 9u, 48u, 57u, 9u, 48u, 57u, 165 9u, 9u, 33u, 63u, 65u, 126u, 9u, 9u, 166 9u, 9u, 9u, 9u, 9u, 9u, 9u, 9u, 167 9u, 9u, 9u, 9u, 9u, 9u, 9u, 9u, 168 9u, 9u, 9u, 9u, 9u, 33u, 126u, 9u, 169 9u, 44u, 48u, 57u, 9u, 44u, 48u, 57u, 170 9u, 44u, 48u, 57u, 9u, 44u, 48u, 57u, 171 9u, 44u, 48u, 57u, 9u, 44u, 48u, 57u, 172 9u, 44u, 48u, 57u, 9u, 44u, 48u, 57u, 173 9u, 44u, 48u, 57u, 9u, 44u, 48u, 57u, 174 9u, 44u, 48u, 57u, 9u, 44u, 48u, 57u, 175 9u, 44u, 48u, 57u, 9u, 44u, 48u, 57u, 176 9u, 44u, 48u, 57u, 9u, 44u, 48u, 57u, 177 9u, 44u, 48u, 57u, 9u, 44u, 9u, 44u, 178 69u, 101u, 48u, 57u, 9u, 44u, 48u, 57u, 179 9u, 44u, 46u, 69u, 101u, 48u, 57u, 9u, 180 44u, 9u, 48u, 57u, 65u, 70u, 97u, 102u, 181 9u, 32u, 126u, 9u, 69u, 101u, 48u, 57u, 182 9u, 48u, 57u, 9u, 46u, 69u, 101u, 48u, 183 57u, 9u, 9u, 48u, 57u, 9u, 48u, 57u, 184 9u, 48u, 57u, 9u, 48u, 57u, 9u, 48u, 185 57u, 9u, 48u, 57u, 9u, 48u, 57u, 9u, 186 48u, 57u, 9u, 48u, 57u, 9u, 48u, 57u, 187 9u, 48u, 57u, 9u, 48u, 57u, 9u, 48u, 188 57u, 9u, 48u, 57u, 9u, 48u, 57u, 9u, 189 48u, 57u, 9u, 48u, 57u, 9u, 9u, 0 190 ]; 191 192 static byte[] _sam_alignment_single_lengths = [ 193 0, 1, 0, 1, 1, 1, 0, 1, 194 0, 1, 1, 1, 2, 1, 0, 1, 195 2, 0, 1, 3, 1, 0, 0, 0, 196 1, 6, 1, 0, 1, 7, 1, 2, 197 0, 1, 5, 2, 0, 2, 0, 1, 198 1, 1, 1, 1, 0, 1, 0, 1, 199 5, 2, 0, 2, 0, 1, 1, 1, 200 1, 1, 2, 0, 3, 1, 1, 1, 201 1, 1, 1, 1, 1, 1, 1, 1, 202 1, 1, 1, 1, 1, 1, 1, 1, 203 1, 1, 1, 1, 1, 1, 1, 1, 204 1, 1, 1, 1, 1, 1, 1, 1, 205 1, 5, 5, 5, 5, 5, 5, 5, 206 5, 5, 5, 5, 5, 5, 5, 5, 207 5, 5, 5, 1, 1, 1, 1, 1, 208 1, 1, 1, 1, 1, 1, 1, 1, 209 1, 1, 1, 1, 1, 1, 1, 1, 210 1, 1, 1, 1, 1, 1, 1, 1, 211 1, 1, 1, 1, 1, 1, 1, 1, 212 1, 1, 1, 1, 1, 1, 1, 1, 213 1, 1, 1, 1, 1, 1, 1, 1, 214 1, 1, 1, 1, 1, 1, 1, 1, 215 1, 1, 1, 1, 1, 1, 1, 1, 216 1, 1, 1, 1, 1, 1, 1, 1, 217 1, 2, 2, 2, 2, 2, 2, 2, 218 2, 2, 2, 2, 2, 2, 2, 2, 219 2, 2, 2, 4, 2, 5, 2, 1, 220 1, 3, 1, 4, 1, 1, 1, 1, 221 1, 1, 1, 1, 1, 1, 1, 1, 222 1, 1, 1, 1, 1, 1, 1, 0, 223 0, 0, 0, 0, 0, 0, 0, 0, 224 0, 0, 0, 1 225 ]; 226 227 static byte[] _sam_alignment_range_lengths = [ 228 0, 2, 1, 1, 2, 1, 1, 1, 229 1, 1, 1, 0, 1, 1, 1, 1, 230 1, 1, 1, 2, 0, 1, 2, 3, 231 0, 0, 0, 1, 0, 0, 0, 1, 232 1, 0, 1, 1, 1, 1, 1, 0, 233 0, 0, 0, 0, 3, 0, 1, 0, 234 1, 1, 1, 1, 1, 0, 0, 0, 235 0, 0, 1, 1, 2, 1, 1, 1, 236 1, 1, 1, 1, 1, 1, 1, 1, 237 1, 1, 1, 1, 1, 0, 1, 1, 238 1, 1, 1, 1, 1, 1, 1, 1, 239 1, 1, 1, 1, 1, 1, 0, 0, 240 0, 3, 3, 3, 3, 3, 3, 3, 241 3, 3, 3, 3, 3, 3, 3, 3, 242 3, 3, 2, 1, 1, 1, 1, 1, 243 1, 1, 1, 1, 1, 1, 1, 1, 244 1, 1, 1, 1, 0, 1, 1, 1, 245 1, 1, 1, 1, 1, 1, 1, 1, 246 1, 1, 1, 1, 1, 0, 0, 1, 247 1, 1, 1, 1, 1, 1, 1, 1, 248 1, 1, 1, 1, 1, 1, 1, 0, 249 2, 0, 0, 0, 0, 0, 0, 0, 250 0, 0, 0, 0, 0, 0, 0, 0, 251 0, 0, 0, 0, 0, 0, 0, 1, 252 0, 1, 1, 1, 1, 1, 1, 1, 253 1, 1, 1, 1, 1, 1, 1, 1, 254 1, 1, 0, 1, 1, 1, 0, 3, 255 1, 1, 1, 1, 0, 1, 1, 1, 256 1, 1, 1, 1, 1, 1, 1, 1, 257 1, 1, 1, 1, 1, 1, 0, 0, 258 0, 0, 0, 0, 0, 0, 0, 0, 259 0, 0, 0, 0 260 ]; 261 262 static short[] _sam_alignment_index_offsets = [ 263 0, 0, 4, 6, 9, 13, 16, 18, 264 21, 23, 26, 29, 31, 35, 38, 40, 265 43, 47, 49, 52, 58, 60, 62, 65, 266 69, 71, 78, 80, 82, 84, 92, 94, 267 98, 100, 102, 109, 113, 115, 119, 121, 268 123, 125, 127, 129, 131, 135, 137, 139, 269 141, 148, 152, 154, 158, 160, 162, 164, 270 166, 168, 170, 174, 176, 182, 185, 188, 271 191, 194, 197, 200, 203, 206, 209, 212, 272 215, 218, 221, 224, 227, 230, 232, 235, 273 238, 241, 244, 247, 250, 253, 256, 259, 274 262, 265, 268, 271, 274, 277, 280, 282, 275 284, 286, 295, 304, 313, 322, 331, 340, 276 349, 358, 367, 376, 385, 394, 403, 412, 277 421, 430, 439, 447, 450, 453, 456, 459, 278 462, 465, 468, 471, 474, 477, 480, 483, 279 486, 489, 492, 495, 498, 500, 503, 506, 280 509, 512, 515, 518, 521, 524, 527, 530, 281 533, 536, 539, 542, 545, 548, 550, 552, 282 555, 558, 561, 564, 567, 570, 573, 576, 283 579, 582, 585, 588, 591, 594, 597, 600, 284 602, 606, 608, 610, 612, 614, 616, 618, 285 620, 622, 624, 626, 628, 630, 632, 634, 286 636, 638, 640, 642, 644, 646, 648, 650, 287 653, 655, 659, 663, 667, 671, 675, 679, 288 683, 687, 691, 695, 699, 703, 707, 711, 289 715, 719, 723, 726, 732, 736, 743, 746, 290 751, 754, 759, 762, 768, 770, 773, 776, 291 779, 782, 785, 788, 791, 794, 797, 800, 292 803, 806, 809, 812, 815, 818, 821, 823, 293 824, 825, 826, 827, 828, 829, 830, 831, 294 832, 833, 834, 835 295 ]; 296 297 static ubyte[] _sam_alignment_trans_targs = [ 298 2, 168, 168, 0, 3, 0, 4, 151, 299 0, 150, 5, 5, 0, 6, 5, 0, 300 7, 0, 8, 133, 0, 9, 0, 10, 301 116, 0, 11, 97, 0, 12, 0, 95, 302 96, 13, 0, 14, 13, 0, 15, 0, 303 16, 78, 0, 17, 17, 18, 0, 18, 304 0, 19, 61, 0, 20, 60, 60, 60, 305 60, 0, 21, 0, 191, 0, 23, 23, 306 0, 24, 24, 24, 0, 25, 0, 26, 307 28, 43, 45, 47, 57, 0, 27, 0, 308 192, 0, 29, 0, 30, 30, 30, 30, 309 33, 30, 30, 0, 31, 0, 32, 32, 310 193, 0, 193, 0, 34, 0, 35, 35, 311 36, 39, 41, 213, 0, 36, 39, 213, 312 0, 211, 0, 38, 38, 212, 0, 212, 313 0, 40, 0, 214, 0, 42, 0, 214, 314 0, 44, 0, 215, 215, 215, 0, 46, 315 0, 216, 0, 48, 0, 49, 49, 50, 316 53, 55, 219, 0, 50, 53, 219, 0, 317 217, 0, 52, 52, 218, 0, 218, 0, 318 54, 0, 220, 0, 56, 0, 220, 0, 319 58, 0, 59, 59, 221, 0, 221, 0, 320 21, 60, 60, 60, 60, 0, 19, 62, 321 0, 19, 63, 0, 19, 64, 0, 19, 322 65, 0, 19, 66, 0, 19, 67, 0, 323 19, 68, 0, 19, 69, 0, 19, 70, 324 0, 19, 71, 0, 19, 72, 0, 19, 325 73, 0, 19, 74, 0, 19, 75, 0, 326 19, 76, 0, 19, 77, 0, 19, 0, 327 16, 79, 0, 16, 80, 0, 16, 81, 328 0, 16, 82, 0, 16, 83, 0, 16, 329 84, 0, 16, 85, 0, 16, 86, 0, 330 16, 87, 0, 16, 88, 0, 16, 89, 331 0, 16, 90, 0, 16, 91, 0, 16, 332 92, 0, 16, 93, 0, 16, 94, 0, 333 16, 0, 14, 0, 14, 0, 115, 115, 334 115, 115, 115, 98, 115, 115, 0, 115, 335 115, 115, 115, 115, 99, 115, 115, 0, 336 115, 115, 115, 115, 115, 100, 115, 115, 337 0, 115, 115, 115, 115, 115, 101, 115, 338 115, 0, 115, 115, 115, 115, 115, 102, 339 115, 115, 0, 115, 115, 115, 115, 115, 340 103, 115, 115, 0, 115, 115, 115, 115, 341 115, 104, 115, 115, 0, 115, 115, 115, 342 115, 115, 105, 115, 115, 0, 115, 115, 343 115, 115, 115, 106, 115, 115, 0, 115, 344 115, 115, 115, 115, 107, 115, 115, 0, 345 115, 115, 115, 115, 115, 108, 115, 115, 346 0, 115, 115, 115, 115, 115, 109, 115, 347 115, 0, 115, 115, 115, 115, 115, 110, 348 115, 115, 0, 115, 115, 115, 115, 115, 349 111, 115, 115, 0, 115, 115, 115, 115, 350 115, 112, 115, 115, 0, 115, 115, 115, 351 115, 115, 113, 115, 115, 0, 115, 115, 352 115, 115, 115, 114, 115, 115, 0, 115, 353 115, 115, 115, 115, 115, 115, 0, 12, 354 97, 0, 10, 117, 0, 10, 118, 0, 355 10, 119, 0, 10, 120, 0, 10, 121, 356 0, 10, 122, 0, 10, 123, 0, 10, 357 124, 0, 10, 125, 0, 10, 126, 0, 358 10, 127, 0, 10, 128, 0, 10, 129, 359 0, 10, 130, 0, 10, 131, 0, 10, 360 132, 0, 10, 0, 8, 134, 0, 8, 361 135, 0, 8, 136, 0, 8, 137, 0, 362 8, 138, 0, 8, 139, 0, 8, 140, 363 0, 8, 141, 0, 8, 142, 0, 8, 364 143, 0, 8, 144, 0, 8, 145, 0, 365 8, 146, 0, 8, 147, 0, 8, 148, 366 0, 8, 149, 0, 8, 0, 6, 0, 367 4, 152, 0, 4, 153, 0, 4, 154, 368 0, 4, 155, 0, 4, 156, 0, 4, 369 157, 0, 4, 158, 0, 4, 159, 0, 370 4, 160, 0, 4, 161, 0, 4, 162, 371 0, 4, 163, 0, 4, 164, 0, 4, 372 165, 0, 4, 166, 0, 4, 167, 0, 373 4, 0, 2, 168, 168, 0, 239, 169, 374 240, 170, 241, 171, 242, 172, 243, 173, 375 244, 174, 245, 175, 246, 176, 247, 177, 376 248, 178, 249, 179, 250, 180, 2, 0, 377 4, 0, 6, 0, 8, 0, 10, 0, 378 12, 0, 14, 0, 16, 0, 19, 0, 379 21, 0, 22, 191, 0, 22, 0, 22, 380 31, 194, 0, 22, 31, 195, 0, 22, 381 31, 196, 0, 22, 31, 197, 0, 22, 382 31, 198, 0, 22, 31, 199, 0, 22, 383 31, 200, 0, 22, 31, 201, 0, 22, 384 31, 202, 0, 22, 31, 203, 0, 22, 385 31, 204, 0, 22, 31, 205, 0, 22, 386 31, 206, 0, 22, 31, 207, 0, 22, 387 31, 208, 0, 22, 31, 209, 0, 22, 388 31, 210, 0, 22, 31, 0, 22, 34, 389 37, 37, 211, 0, 22, 34, 212, 0, 390 22, 34, 36, 37, 37, 213, 0, 22, 391 34, 0, 22, 215, 215, 215, 0, 22, 392 216, 0, 22, 51, 51, 217, 0, 22, 393 218, 0, 22, 50, 51, 51, 219, 0, 394 22, 0, 22, 222, 0, 22, 223, 0, 395 22, 224, 0, 22, 225, 0, 22, 226, 396 0, 22, 227, 0, 22, 228, 0, 22, 397 229, 0, 22, 230, 0, 22, 231, 0, 398 22, 232, 0, 22, 233, 0, 22, 234, 399 0, 22, 235, 0, 22, 236, 0, 22, 400 237, 0, 22, 238, 0, 22, 0, 0, 401 0, 0, 0, 0, 0, 0, 0, 0, 402 0, 0, 0, 22, 0, 0 403 ]; 404 405 static ubyte[] _sam_alignment_trans_actions = [ 406 111, 7, 7, 11, 99, 17, 15, 3, 407 17, 0, 21, 21, 25, 23, 0, 25, 408 99, 31, 29, 3, 31, 99, 35, 114, 409 3, 35, 0, 99, 41, 45, 41, 0, 410 0, 49, 53, 51, 0, 53, 99, 59, 411 57, 3, 59, 1, 1, 99, 63, 99, 412 63, 102, 3, 63, 0, 67, 67, 67, 413 67, 71, 75, 71, 77, 79, 89, 89, 414 0, 0, 0, 0, 0, 91, 0, 0, 415 0, 0, 0, 0, 0, 93, 0, 93, 416 83, 93, 0, 93, 87, 87, 87, 87, 417 87, 87, 87, 93, 0, 93, 1, 1, 418 99, 93, 99, 93, 0, 93, 5, 5, 419 5, 5, 5, 5, 93, 0, 0, 0, 420 93, 0, 93, 0, 0, 0, 93, 0, 421 93, 0, 93, 0, 93, 0, 93, 0, 422 93, 0, 93, 85, 85, 85, 93, 0, 423 93, 85, 93, 0, 93, 5, 5, 5, 424 5, 5, 5, 93, 0, 0, 0, 93, 425 0, 93, 0, 0, 0, 93, 0, 93, 426 0, 93, 0, 93, 0, 93, 0, 93, 427 0, 93, 1, 1, 99, 93, 99, 93, 428 69, 0, 0, 0, 0, 71, 102, 3, 429 63, 102, 3, 63, 102, 3, 63, 102, 430 3, 63, 102, 3, 63, 102, 3, 63, 431 102, 3, 63, 102, 3, 63, 102, 3, 432 63, 102, 3, 63, 102, 3, 63, 102, 433 3, 63, 102, 3, 63, 102, 3, 63, 434 102, 3, 63, 102, 3, 63, 102, 63, 435 57, 3, 59, 57, 3, 59, 57, 3, 436 59, 57, 3, 59, 57, 3, 59, 57, 437 3, 59, 57, 3, 59, 57, 3, 59, 438 57, 3, 59, 57, 3, 59, 57, 3, 439 59, 57, 3, 59, 57, 3, 59, 57, 440 3, 59, 57, 3, 59, 57, 3, 59, 441 57, 59, 0, 53, 47, 53, 117, 117, 442 117, 117, 117, 3, 117, 117, 41, 117, 443 117, 117, 117, 117, 3, 117, 117, 41, 444 117, 117, 117, 117, 117, 3, 117, 117, 445 41, 117, 117, 117, 117, 117, 3, 117, 446 117, 41, 117, 117, 117, 117, 117, 3, 447 117, 117, 41, 117, 117, 117, 117, 117, 448 3, 117, 117, 41, 117, 117, 117, 117, 449 117, 3, 117, 117, 41, 117, 117, 117, 450 117, 117, 3, 117, 117, 41, 117, 117, 451 117, 117, 117, 3, 117, 117, 41, 117, 452 117, 117, 117, 117, 3, 117, 117, 41, 453 117, 117, 117, 117, 117, 3, 117, 117, 454 41, 117, 117, 117, 117, 117, 3, 117, 455 117, 41, 117, 117, 117, 117, 117, 3, 456 117, 117, 41, 117, 117, 117, 117, 117, 457 3, 117, 117, 41, 117, 117, 117, 117, 458 117, 3, 117, 117, 41, 117, 117, 117, 459 117, 117, 3, 117, 117, 41, 117, 117, 460 117, 117, 117, 3, 117, 117, 41, 117, 461 117, 117, 117, 117, 117, 117, 41, 120, 462 148, 41, 114, 3, 35, 114, 3, 35, 463 114, 3, 35, 114, 3, 35, 114, 3, 464 35, 114, 3, 35, 114, 3, 35, 114, 465 3, 35, 114, 3, 35, 114, 3, 35, 466 114, 3, 35, 114, 3, 35, 114, 3, 467 35, 114, 3, 35, 114, 3, 35, 114, 468 3, 35, 114, 35, 29, 3, 31, 29, 469 3, 31, 29, 3, 31, 29, 3, 31, 470 29, 3, 31, 29, 3, 31, 29, 3, 471 31, 29, 3, 31, 29, 3, 31, 29, 472 3, 31, 29, 3, 31, 29, 3, 31, 473 29, 3, 31, 29, 3, 31, 29, 3, 474 31, 29, 3, 31, 29, 31, 0, 25, 475 15, 3, 17, 15, 3, 17, 15, 3, 476 17, 15, 3, 17, 15, 3, 17, 15, 477 3, 17, 15, 3, 17, 15, 3, 17, 478 15, 3, 17, 15, 3, 17, 15, 3, 479 17, 15, 3, 17, 15, 3, 17, 15, 480 3, 17, 15, 3, 17, 15, 3, 17, 481 15, 17, 9, 0, 0, 11, 13, 0, 482 19, 0, 27, 0, 33, 0, 37, 0, 483 43, 0, 55, 0, 61, 0, 65, 0, 484 73, 0, 81, 0, 95, 0, 0, 0, 485 0, 0, 0, 0, 0, 0, 39, 0, 486 45, 0, 0, 0, 0, 0, 0, 0, 487 0, 0, 123, 77, 79, 97, 93, 136, 488 105, 3, 93, 136, 105, 3, 93, 136, 489 105, 3, 93, 136, 105, 3, 93, 136, 490 105, 3, 93, 136, 105, 3, 93, 136, 491 105, 3, 93, 136, 105, 3, 93, 136, 492 105, 3, 93, 136, 105, 3, 93, 136, 493 105, 3, 93, 136, 105, 3, 93, 136, 494 105, 3, 93, 136, 105, 3, 93, 136, 495 105, 3, 93, 136, 105, 3, 93, 136, 496 105, 3, 93, 136, 105, 93, 144, 108, 497 0, 0, 0, 93, 144, 108, 0, 93, 498 144, 108, 0, 0, 0, 0, 93, 144, 499 108, 93, 129, 0, 0, 0, 93, 126, 500 0, 93, 140, 0, 0, 0, 93, 140, 501 0, 93, 140, 0, 0, 0, 0, 93, 502 140, 93, 132, 3, 93, 132, 3, 93, 503 132, 3, 93, 132, 3, 93, 132, 3, 504 93, 132, 3, 93, 132, 3, 93, 132, 505 3, 93, 132, 3, 93, 132, 3, 93, 506 132, 3, 93, 132, 3, 93, 132, 3, 507 93, 132, 3, 93, 132, 3, 93, 132, 508 3, 93, 132, 3, 93, 132, 93, 0, 509 0, 0, 0, 0, 0, 0, 0, 0, 510 0, 0, 0, 0, 0, 0 511 ]; 512 513 static ubyte[] _sam_alignment_eof_actions = [ 514 0, 11, 17, 17, 25, 25, 31, 31, 515 35, 35, 41, 41, 53, 53, 59, 59, 516 63, 63, 63, 71, 71, 79, 0, 0, 517 0, 93, 93, 93, 93, 93, 93, 93, 518 93, 93, 93, 93, 93, 93, 93, 93, 519 93, 93, 93, 93, 93, 93, 93, 93, 520 93, 93, 93, 93, 93, 93, 93, 93, 521 93, 93, 93, 93, 71, 63, 63, 63, 522 63, 63, 63, 63, 63, 63, 63, 63, 523 63, 63, 63, 63, 63, 63, 59, 59, 524 59, 59, 59, 59, 59, 59, 59, 59, 525 59, 59, 59, 59, 59, 59, 59, 53, 526 53, 41, 41, 41, 41, 41, 41, 41, 527 41, 41, 41, 41, 41, 41, 41, 41, 528 41, 41, 41, 41, 35, 35, 35, 35, 529 35, 35, 35, 35, 35, 35, 35, 35, 530 35, 35, 35, 35, 35, 31, 31, 31, 531 31, 31, 31, 31, 31, 31, 31, 31, 532 31, 31, 31, 31, 31, 31, 25, 17, 533 17, 17, 17, 17, 17, 17, 17, 17, 534 17, 17, 17, 17, 17, 17, 17, 17, 535 11, 0, 0, 0, 0, 0, 0, 0, 536 0, 0, 0, 0, 0, 0, 0, 0, 537 0, 0, 0, 0, 0, 0, 0, 123, 538 97, 136, 136, 136, 136, 136, 136, 136, 539 136, 136, 136, 136, 136, 136, 136, 136, 540 136, 136, 136, 144, 144, 144, 144, 129, 541 126, 140, 140, 140, 140, 132, 132, 132, 542 132, 132, 132, 132, 132, 132, 132, 132, 543 132, 132, 132, 132, 132, 132, 132, 0, 544 0, 0, 0, 0, 0, 0, 0, 0, 545 0, 0, 0, 0 546 ]; 547 548 static int sam_alignment_start = 1; 549 static int sam_alignment_first_final = 191; 550 static int sam_alignment_error = 0; 551 552 static int sam_alignment_en_recover_from_invalid_qname = 169; 553 static int sam_alignment_en_recover_from_invalid_flag = 170; 554 static int sam_alignment_en_recover_from_invalid_rname = 171; 555 static int sam_alignment_en_recover_from_invalid_pos = 172; 556 static int sam_alignment_en_recover_from_invalid_mapq = 173; 557 static int sam_alignment_en_recover_from_invalid_cigar = 174; 558 static int sam_alignment_en_recover_from_invalid_rnext = 175; 559 static int sam_alignment_en_recover_from_invalid_pnext = 176; 560 static int sam_alignment_en_recover_from_invalid_tlen = 177; 561 static int sam_alignment_en_recover_from_invalid_seq = 178; 562 static int sam_alignment_en_recover_from_invalid_qual = 179; 563 static int sam_alignment_en_recover_from_invalid_tag = 180; 564 static int sam_alignment_en_alignment = 1; 565 static int sam_alignment_en_alignment_field_parsing_mandatoryfields_flag_parsing = 181; 566 static int sam_alignment_en_alignment_field_parsing_mandatoryfields_rname_parsing = 182; 567 static int sam_alignment_en_alignment_field_parsing_mandatoryfields_pos_parsing = 183; 568 static int sam_alignment_en_alignment_field_parsing_mandatoryfields_mapq_parsing = 184; 569 static int sam_alignment_en_alignment_field_parsing_mandatoryfields_cigar_parsing = 185; 570 static int sam_alignment_en_alignment_field_parsing_mandatoryfields_rnext_parsing = 186; 571 static int sam_alignment_en_alignment_field_parsing_mandatoryfields_pnext_parsing = 187; 572 static int sam_alignment_en_alignment_field_parsing_mandatoryfields_tlen_parsing = 188; 573 static int sam_alignment_en_alignment_field_parsing_mandatoryfields_seq_parsing = 189; 574 static int sam_alignment_en_alignment_field_parsing_mandatoryfields_qual_parsing = 190; 575 static int sam_alignment_en_alignment_tag_parsing = 251; 576 577 578 #line 419 "sam_alignment.rl" 579 580 581 import bio.std.hts.sam.header; 582 import bio.std.hts.bam.cigar; 583 import bio.std.hts.bam.read; 584 import bio.std.hts.bam.bai.bin; 585 import bio.core.utils.outbuffer; 586 import bio.core.base; 587 import std.conv; 588 import std.array; 589 import std.exception; 590 591 BamRead parseAlignmentLine(string line, SamHeader header, OutBuffer buffer=null) { 592 char* p = cast(char*)line.ptr; 593 char* pe = p + line.length; 594 char* eof = pe; 595 int cs; 596 597 if (buffer is null) 598 buffer = new OutBuffer(8192); 599 else 600 buffer.clear(); 601 602 size_t rollback_size; // needed in case of invalid data 603 604 byte current_sign = 1; 605 606 size_t read_name_beg; // position of beginning of QNAME 607 608 size_t sequence_beg; // position of SEQ start 609 int l_seq; // sequence length 610 611 uint cigar_op_len; // length of CIGAR operation 612 char cigar_op_chr; // CIGAR operation 613 614 size_t quals_length; // number of QUAL characters 615 char quals_last_char; // needed in order to handle '*' correctly 616 617 size_t cigar_op_len_start; // position of start of CIGAR operation 618 619 long int_value; // for storing temporary integers 620 float float_value; // for storing temporary floats 621 size_t float_beg; // position of start of current float 622 char arraytype; // type of last array tag value 623 size_t tag_array_length_offset; // where the length is stored in the buffer 624 625 string read_name; 626 ushort flag; 627 int pos = -1; 628 int end_pos; // for bin calculation 629 int mate_pos = -1; 630 ubyte mapping_quality = 255; 631 int template_length = 0; 632 633 size_t tag_key_beg, tagvalue_beg; 634 ubyte[] tag_key; 635 size_t rname_beg, rnext_beg; 636 637 int ref_id = -1; 638 639 640 #line 640 "sam_alignment.d" 641 { 642 cs = sam_alignment_start; 643 } 644 645 #line 480 "sam_alignment.rl" 646 647 #line 647 "sam_alignment.d" 648 { 649 int _klen; 650 uint _trans; 651 byte* _acts; 652 uint _nacts; 653 char* _keys; 654 655 if ( p == pe ) 656 goto _test_eof; 657 if ( cs == 0 ) 658 goto _out; 659 _resume: 660 _keys = &_sam_alignment_trans_keys[_sam_alignment_key_offsets[cs]]; 661 _trans = _sam_alignment_index_offsets[cs]; 662 663 _klen = _sam_alignment_single_lengths[cs]; 664 if ( _klen > 0 ) { 665 char* _lower = _keys; 666 char* _mid; 667 char* _upper = _keys + _klen - 1; 668 while (1) { 669 if ( _upper < _lower ) 670 break; 671 672 _mid = _lower + ((_upper-_lower) >> 1); 673 if ( (*p) < *_mid ) 674 _upper = _mid - 1; 675 else if ( (*p) > *_mid ) 676 _lower = _mid + 1; 677 else { 678 _trans += cast(uint)(_mid - _keys); 679 goto _match; 680 } 681 } 682 _keys += _klen; 683 _trans += _klen; 684 } 685 686 _klen = _sam_alignment_range_lengths[cs]; 687 if ( _klen > 0 ) { 688 char* _lower = _keys; 689 char* _mid; 690 char* _upper = _keys + (_klen<<1) - 2; 691 while (1) { 692 if ( _upper < _lower ) 693 break; 694 695 _mid = _lower + (((_upper-_lower) >> 1) & ~1); 696 if ( (*p) < _mid[0] ) 697 _upper = _mid - 2; 698 else if ( (*p) > _mid[1] ) 699 _lower = _mid + 2; 700 else { 701 _trans += cast(uint)((_mid - _keys)>>1); 702 goto _match; 703 } 704 } 705 _trans += _klen; 706 } 707 708 _match: 709 cs = _sam_alignment_trans_targs[_trans]; 710 711 if ( _sam_alignment_trans_actions[_trans] == 0 ) 712 goto _again; 713 714 _acts = &_sam_alignment_actions[_sam_alignment_trans_actions[_trans]]; 715 _nacts = cast(uint) *_acts++; 716 while ( _nacts-- > 0 ) 717 { 718 switch ( *_acts++ ) 719 { 720 case 0: 721 #line 27 "sam_alignment.rl" 722 { current_sign = (*p) == '-' ? -1 : 1; } 723 break; 724 case 1: 725 #line 28 "sam_alignment.rl" 726 { int_value = 0; } 727 break; 728 case 2: 729 #line 29 "sam_alignment.rl" 730 { int_value *= 10; int_value += (*p) - '0'; } 731 break; 732 case 3: 733 #line 30 "sam_alignment.rl" 734 { int_value *= current_sign; current_sign = 1; } 735 break; 736 case 4: 737 #line 37 "sam_alignment.rl" 738 { float_beg = p - line.ptr; } 739 break; 740 case 5: 741 #line 38 "sam_alignment.rl" 742 { 743 float_value = to!float(line[float_beg .. p - line.ptr]); 744 } 745 break; 746 case 6: 747 #line 48 "sam_alignment.rl" 748 { read_name_beg = p - line.ptr; } 749 break; 750 case 7: 751 #line 49 "sam_alignment.rl" 752 { read_name = line[read_name_beg .. p - line.ptr]; } 753 break; 754 case 8: 755 #line 50 "sam_alignment.rl" 756 { p--; {cs = 169; if (true) goto _again;} } 757 break; 758 case 9: 759 #line 51 "sam_alignment.rl" 760 { p--; {cs = 181; if (true) goto _again;} } 761 break; 762 case 10: 763 #line 56 "sam_alignment.rl" 764 { flag = to!ushort(int_value); } 765 break; 766 case 11: 767 #line 58 "sam_alignment.rl" 768 { p--; {cs = 170; if (true) goto _again;} } 769 break; 770 case 12: 771 #line 59 "sam_alignment.rl" 772 { p--; {cs = 182; if (true) goto _again;} } 773 break; 774 case 13: 775 #line 62 "sam_alignment.rl" 776 { rname_beg = p - line.ptr; } 777 break; 778 case 14: 779 #line 63 "sam_alignment.rl" 780 { 781 ref_id = header.getSequenceIndex(line[rname_beg .. p - line.ptr]); 782 } 783 break; 784 case 15: 785 #line 67 "sam_alignment.rl" 786 { p--; {cs = 171; if (true) goto _again;} } 787 break; 788 case 16: 789 #line 68 "sam_alignment.rl" 790 { p--; {cs = 183; if (true) goto _again;} } 791 break; 792 case 17: 793 #line 73 "sam_alignment.rl" 794 { end_pos = pos = to!uint(int_value); } 795 break; 796 case 18: 797 #line 75 "sam_alignment.rl" 798 { p--; {cs = 172; if (true) goto _again;} } 799 break; 800 case 19: 801 #line 76 "sam_alignment.rl" 802 { p--; {cs = 184; if (true) goto _again;} } 803 break; 804 case 20: 805 #line 79 "sam_alignment.rl" 806 { mapping_quality = to!ubyte(int_value); } 807 break; 808 case 21: 809 #line 81 "sam_alignment.rl" 810 { p--; {cs = 173; if (true) goto _again;} } 811 break; 812 case 22: 813 #line 82 "sam_alignment.rl" 814 { p--; {cs = 185; if (true) goto _again;} } 815 break; 816 case 23: 817 #line 85 "sam_alignment.rl" 818 { 819 buffer.capacity = 32 + read_name.length + 1; 820 buffer.putUnsafe!int(ref_id); 821 buffer.putUnsafe!int(pos - 1); 822 823 enforce(read_name.length + 1 <= 255, "Read name " ~ read_name ~ " is too long!"); 824 825 // bin will be set later 826 auto bin_mq_nl = ((cast(uint)mapping_quality) << 8) | (read_name.length + 1); 827 buffer.putUnsafe(cast(uint)bin_mq_nl); 828 829 // number of CIGAR operations will be set later 830 buffer.putUnsafe!uint(flag << 16); 831 832 buffer.putUnsafe!int(0); 833 buffer.putUnsafe!int(-1); // mate ref. id 834 buffer.putUnsafe!int(-1); // mate pos 835 buffer.putUnsafe!int(0); // tlen 836 837 buffer.putUnsafe(cast(ubyte[])read_name); 838 buffer.putUnsafe!ubyte(0); 839 840 rollback_size = buffer.length; 841 } 842 break; 843 case 24: 844 #line 111 "sam_alignment.rl" 845 { cigar_op_len = to!uint(int_value); } 846 break; 847 case 25: 848 #line 112 "sam_alignment.rl" 849 { cigar_op_chr = (*p); } 850 break; 851 case 26: 852 #line 113 "sam_alignment.rl" 853 { 854 auto op = CigarOperation(cigar_op_len, cigar_op_chr); 855 if (op.is_reference_consuming) 856 end_pos += op.length; 857 buffer.put!CigarOperation(op); 858 { 859 auto ptr = cast(uint*)(buffer.data.ptr + 3 * uint.sizeof); 860 *ptr = (*ptr) + 1; 861 } 862 } 863 break; 864 case 27: 865 #line 124 "sam_alignment.rl" 866 { 867 auto ptr = cast(uint*)(buffer.data.ptr + 3 * uint.sizeof); 868 *ptr = (*ptr) & 0xFFFF0000; 869 buffer.shrink(rollback_size); 870 end_pos = pos + 1; 871 p--; {cs = 174; if (true) goto _again;} 872 } 873 break; 874 case 28: 875 #line 131 "sam_alignment.rl" 876 { p--; {cs = 186; if (true) goto _again;} } 877 break; 878 case 29: 879 #line 137 "sam_alignment.rl" 880 { 881 if (end_pos == pos) 882 ++end_pos; 883 { 884 auto bin = reg2bin(pos - 1, end_pos - 1); // 0-based [) interval 885 auto ptr = cast(uint*)(buffer.data.ptr + 2 * uint.sizeof); 886 *ptr = (*ptr) | ((cast(uint)bin) << 16); 887 } 888 } 889 break; 890 case 30: 891 #line 148 "sam_alignment.rl" 892 { 893 { 894 auto ptr = cast(int*)(buffer.data.ptr + 5 * int.sizeof); 895 *ptr = ref_id; 896 } 897 } 898 break; 899 case 31: 900 #line 155 "sam_alignment.rl" 901 { rnext_beg = p - line.ptr; } 902 break; 903 case 32: 904 #line 156 "sam_alignment.rl" 905 { 906 { 907 auto ptr = cast(int*)(buffer.data.ptr + 5 * int.sizeof); 908 *ptr = header.getSequenceIndex(line[rnext_beg .. p - line.ptr]); 909 } 910 } 911 break; 912 case 33: 913 #line 162 "sam_alignment.rl" 914 { p--; {cs = 175; if (true) goto _again;} } 915 break; 916 case 34: 917 #line 163 "sam_alignment.rl" 918 { p--; {cs = 187; if (true) goto _again;} } 919 break; 920 case 35: 921 #line 169 "sam_alignment.rl" 922 { 923 { 924 auto ptr = cast(int*)(buffer.data.ptr + 6 * int.sizeof); 925 *ptr = to!int(int_value) - 1; 926 } 927 } 928 break; 929 case 36: 930 #line 175 "sam_alignment.rl" 931 { p--; {cs = 176; if (true) goto _again;} } 932 break; 933 case 37: 934 #line 176 "sam_alignment.rl" 935 { p--; {cs = 188; if (true) goto _again;} } 936 break; 937 case 38: 938 #line 181 "sam_alignment.rl" 939 { 940 { 941 auto ptr = cast(int*)(buffer.data.ptr + 7 * int.sizeof); 942 *ptr = to!int(int_value); 943 } 944 } 945 break; 946 case 39: 947 #line 187 "sam_alignment.rl" 948 { p--; {cs = 177; if (true) goto _again;} } 949 break; 950 case 40: 951 #line 188 "sam_alignment.rl" 952 { p--; {cs = 189; if (true) goto _again;} } 953 break; 954 case 41: 955 #line 193 "sam_alignment.rl" 956 { sequence_beg = p - line.ptr; } 957 break; 958 case 42: 959 #line 194 "sam_alignment.rl" 960 { 961 auto data = cast(ubyte[])line[sequence_beg .. p - line.ptr]; 962 l_seq = cast(int)data.length; 963 auto raw_len = (l_seq + 1) / 2; 964 965 // reserve space for base qualities, too 966 buffer.capacity = buffer.length + raw_len + l_seq; 967 968 for (size_t i = 0; i < raw_len; ++i) { 969 auto b = cast(ubyte)(Base(data[2 * i]).internal_code << 4); 970 if (2 * i + 1 < l_seq) 971 b |= cast(ubyte)(Base(data[2 * i + 1]).internal_code); 972 buffer.putUnsafe!ubyte(b); 973 } 974 975 // set l_seq 976 { 977 auto ptr = cast(int*)(buffer.data.ptr + 4 * int.sizeof); 978 *ptr = l_seq; 979 } 980 981 rollback_size = buffer.length; 982 } 983 break; 984 case 43: 985 #line 217 "sam_alignment.rl" 986 { 987 rollback_size = buffer.length; 988 p--; {cs = 178; if (true) goto _again;} 989 } 990 break; 991 case 44: 992 #line 221 "sam_alignment.rl" 993 { p--; {cs = 190; if (true) goto _again;} } 994 break; 995 case 45: 996 #line 223 "sam_alignment.rl" 997 { 998 rollback_size = buffer.length; 999 } 1000 break; 1001 case 46: 1002 #line 230 "sam_alignment.rl" 1003 { 1004 ++quals_length; 1005 quals_last_char = (*p); 1006 buffer.putUnsafe!ubyte(cast(ubyte)((*p) - 33)); 1007 } 1008 break; 1009 case 47: 1010 #line 236 "sam_alignment.rl" 1011 { 1012 // '*' may correspond either to a one-base long sequence 1013 // or to absence of information 1014 if (quals_length == 1 && quals_last_char == '*' && l_seq == 0) 1015 buffer.shrink(rollback_size); 1016 } 1017 break; 1018 case 48: 1019 #line 243 "sam_alignment.rl" 1020 { 1021 buffer.shrink(rollback_size); 1022 for (size_t i = 0; i < l_seq; ++i) 1023 buffer.putUnsafe!ubyte(0xFF); 1024 rollback_size = buffer.length; 1025 p--; {cs = 179; if (true) goto _again;} 1026 } 1027 break; 1028 case 49: 1029 #line 251 "sam_alignment.rl" 1030 { p--; {cs = 251; if (true) goto _again;} } 1031 break; 1032 case 50: 1033 #line 253 "sam_alignment.rl" 1034 { 1035 if (buffer.length - rollback_size != l_seq) { 1036 buffer.shrink(rollback_size); 1037 for (size_t i = 0; i < l_seq; ++i) 1038 buffer.putUnsafe!ubyte(0xFF); 1039 } 1040 rollback_size = buffer.length; 1041 } 1042 break; 1043 case 51: 1044 #line 278 "sam_alignment.rl" 1045 { 1046 buffer.capacity = buffer.length + 4; 1047 buffer.putUnsafe(tag_key); 1048 buffer.putUnsafe!char('A'); 1049 buffer.putUnsafe!char((*p)); 1050 } 1051 break; 1052 case 52: 1053 #line 285 "sam_alignment.rl" 1054 { 1055 buffer.capacity = buffer.length + 7; 1056 buffer.putUnsafe(tag_key); 1057 if (int_value < 0) { 1058 if (int_value >= byte.min) { 1059 buffer.putUnsafe!char('c'); 1060 buffer.putUnsafe(cast(byte)int_value); 1061 } else if (int_value >= short.min) { 1062 buffer.putUnsafe!char('s'); 1063 buffer.putUnsafe(cast(short)int_value); 1064 } else if (int_value >= int.min) { 1065 buffer.putUnsafe!char('i'); 1066 buffer.putUnsafe(cast(int)int_value); 1067 } else { 1068 throw new Exception("integer out of range"); 1069 } 1070 } else { 1071 if (int_value <= ubyte.max) { 1072 buffer.putUnsafe!char('C'); 1073 buffer.putUnsafe(cast(ubyte)int_value); 1074 } else if (int_value <= ushort.max) { 1075 buffer.putUnsafe!char('S'); 1076 buffer.putUnsafe(cast(ushort)int_value); 1077 } else if (int_value <= uint.max) { 1078 buffer.putUnsafe!char('I'); 1079 buffer.putUnsafe(cast(uint)int_value); 1080 } else { 1081 throw new Exception("integer out of range"); 1082 } 1083 } 1084 } 1085 break; 1086 case 53: 1087 #line 317 "sam_alignment.rl" 1088 { tagvalue_beg = p - line.ptr; } 1089 break; 1090 case 54: 1091 #line 319 "sam_alignment.rl" 1092 { 1093 buffer.capacity = buffer.length + 7; 1094 buffer.putUnsafe(tag_key); 1095 buffer.putUnsafe!char('f'); 1096 buffer.putUnsafe!float(float_value); 1097 } 1098 break; 1099 case 55: 1100 #line 326 "sam_alignment.rl" 1101 { 1102 { 1103 auto data = cast(ubyte[])(line[tagvalue_beg .. p - line.ptr]); 1104 buffer.capacity = buffer.length + 4 + data.length; 1105 buffer.putUnsafe(tag_key); 1106 buffer.putUnsafe!char('Z'); 1107 buffer.putUnsafe(data); 1108 buffer.putUnsafe!ubyte(0); 1109 } 1110 } 1111 break; 1112 case 56: 1113 #line 337 "sam_alignment.rl" 1114 { 1115 { 1116 auto data = cast(ubyte[])(line[tagvalue_beg .. p - line.ptr]); 1117 buffer.capacity = buffer.length + 4 + data.length; 1118 buffer.putUnsafe(tag_key); 1119 buffer.putUnsafe!char('H'); 1120 buffer.putUnsafe(data); 1121 buffer.putUnsafe!ubyte(0); 1122 } 1123 } 1124 break; 1125 case 57: 1126 #line 352 "sam_alignment.rl" 1127 { 1128 arraytype = (*p); 1129 buffer.capacity = buffer.length + 8; 1130 buffer.putUnsafe(tag_key); 1131 buffer.putUnsafe!char('B'); 1132 buffer.putUnsafe!char(arraytype); 1133 buffer.putUnsafe!uint(0); 1134 tag_array_length_offset = buffer.length - uint.sizeof; 1135 } 1136 break; 1137 case 58: 1138 #line 362 "sam_alignment.rl" 1139 { 1140 // here, we assume that compiler is smart enough to move switch out of loop. 1141 switch (arraytype) { 1142 case 'c': buffer.put(to!byte(int_value)); break; 1143 case 'C': buffer.put(to!ubyte(int_value)); break; 1144 case 's': buffer.put(to!short(int_value)); break; 1145 case 'S': buffer.put(to!ushort(int_value)); break; 1146 case 'i': buffer.put(to!int(int_value)); break; 1147 case 'I': buffer.put(to!uint(int_value)); break; 1148 default: assert(0); 1149 } 1150 { 1151 auto ptr = cast(uint*)(buffer.data.ptr + tag_array_length_offset); 1152 ++*ptr; 1153 } 1154 } 1155 break; 1156 case 59: 1157 #line 379 "sam_alignment.rl" 1158 { 1159 buffer.put!float(float_value); 1160 { 1161 auto ptr = cast(uint*)(buffer.data.ptr + tag_array_length_offset); 1162 ++*ptr; 1163 } 1164 } 1165 break; 1166 case 60: 1167 #line 400 "sam_alignment.rl" 1168 { tag_key_beg = p - line.ptr; } 1169 break; 1170 case 61: 1171 #line 401 "sam_alignment.rl" 1172 { tag_key = cast(ubyte[])(line[tag_key_beg .. p - line.ptr]); } 1173 break; 1174 case 62: 1175 #line 403 "sam_alignment.rl" 1176 { 1177 buffer.shrink(rollback_size); 1178 p--; {cs = 180; if (true) goto _again;} 1179 } 1180 break; 1181 case 63: 1182 #line 408 "sam_alignment.rl" 1183 { p--; {cs = 251; if (true) goto _again;} } 1184 break; 1185 case 64: 1186 #line 410 "sam_alignment.rl" 1187 { rollback_size = buffer.length; } 1188 break; 1189 #line 1189 "sam_alignment.d" 1190 default: break; 1191 } 1192 } 1193 1194 _again: 1195 if ( cs == 0 ) 1196 goto _out; 1197 if ( ++p != pe ) 1198 goto _resume; 1199 _test_eof: {} 1200 if ( p == eof ) 1201 { 1202 byte* __acts = &_sam_alignment_actions[_sam_alignment_eof_actions[cs]]; 1203 uint __nacts = cast(uint) *__acts++; 1204 while ( __nacts-- > 0 ) { 1205 switch ( *__acts++ ) { 1206 case 3: 1207 #line 30 "sam_alignment.rl" 1208 { int_value *= current_sign; current_sign = 1; } 1209 break; 1210 case 5: 1211 #line 38 "sam_alignment.rl" 1212 { 1213 float_value = to!float(line[float_beg .. p - line.ptr]); 1214 } 1215 break; 1216 case 8: 1217 #line 50 "sam_alignment.rl" 1218 { p--; {cs = 169; if (true) goto _again;} } 1219 break; 1220 case 11: 1221 #line 58 "sam_alignment.rl" 1222 { p--; {cs = 170; if (true) goto _again;} } 1223 break; 1224 case 15: 1225 #line 67 "sam_alignment.rl" 1226 { p--; {cs = 171; if (true) goto _again;} } 1227 break; 1228 case 18: 1229 #line 75 "sam_alignment.rl" 1230 { p--; {cs = 172; if (true) goto _again;} } 1231 break; 1232 case 21: 1233 #line 81 "sam_alignment.rl" 1234 { p--; {cs = 173; if (true) goto _again;} } 1235 break; 1236 case 27: 1237 #line 124 "sam_alignment.rl" 1238 { 1239 auto ptr = cast(uint*)(buffer.data.ptr + 3 * uint.sizeof); 1240 *ptr = (*ptr) & 0xFFFF0000; 1241 buffer.shrink(rollback_size); 1242 end_pos = pos + 1; 1243 p--; {cs = 174; if (true) goto _again;} 1244 } 1245 break; 1246 case 33: 1247 #line 162 "sam_alignment.rl" 1248 { p--; {cs = 175; if (true) goto _again;} } 1249 break; 1250 case 36: 1251 #line 175 "sam_alignment.rl" 1252 { p--; {cs = 176; if (true) goto _again;} } 1253 break; 1254 case 39: 1255 #line 187 "sam_alignment.rl" 1256 { p--; {cs = 177; if (true) goto _again;} } 1257 break; 1258 case 43: 1259 #line 217 "sam_alignment.rl" 1260 { 1261 rollback_size = buffer.length; 1262 p--; {cs = 178; if (true) goto _again;} 1263 } 1264 break; 1265 case 47: 1266 #line 236 "sam_alignment.rl" 1267 { 1268 // '*' may correspond either to a one-base long sequence 1269 // or to absence of information 1270 if (quals_length == 1 && quals_last_char == '*' && l_seq == 0) 1271 buffer.shrink(rollback_size); 1272 } 1273 break; 1274 case 48: 1275 #line 243 "sam_alignment.rl" 1276 { 1277 buffer.shrink(rollback_size); 1278 for (size_t i = 0; i < l_seq; ++i) 1279 buffer.putUnsafe!ubyte(0xFF); 1280 rollback_size = buffer.length; 1281 p--; {cs = 179; if (true) goto _again;} 1282 } 1283 break; 1284 case 50: 1285 #line 253 "sam_alignment.rl" 1286 { 1287 if (buffer.length - rollback_size != l_seq) { 1288 buffer.shrink(rollback_size); 1289 for (size_t i = 0; i < l_seq; ++i) 1290 buffer.putUnsafe!ubyte(0xFF); 1291 } 1292 rollback_size = buffer.length; 1293 } 1294 break; 1295 case 52: 1296 #line 285 "sam_alignment.rl" 1297 { 1298 buffer.capacity = buffer.length + 7; 1299 buffer.putUnsafe(tag_key); 1300 if (int_value < 0) { 1301 if (int_value >= byte.min) { 1302 buffer.putUnsafe!char('c'); 1303 buffer.putUnsafe(cast(byte)int_value); 1304 } else if (int_value >= short.min) { 1305 buffer.putUnsafe!char('s'); 1306 buffer.putUnsafe(cast(short)int_value); 1307 } else if (int_value >= int.min) { 1308 buffer.putUnsafe!char('i'); 1309 buffer.putUnsafe(cast(int)int_value); 1310 } else { 1311 throw new Exception("integer out of range"); 1312 } 1313 } else { 1314 if (int_value <= ubyte.max) { 1315 buffer.putUnsafe!char('C'); 1316 buffer.putUnsafe(cast(ubyte)int_value); 1317 } else if (int_value <= ushort.max) { 1318 buffer.putUnsafe!char('S'); 1319 buffer.putUnsafe(cast(ushort)int_value); 1320 } else if (int_value <= uint.max) { 1321 buffer.putUnsafe!char('I'); 1322 buffer.putUnsafe(cast(uint)int_value); 1323 } else { 1324 throw new Exception("integer out of range"); 1325 } 1326 } 1327 } 1328 break; 1329 case 54: 1330 #line 319 "sam_alignment.rl" 1331 { 1332 buffer.capacity = buffer.length + 7; 1333 buffer.putUnsafe(tag_key); 1334 buffer.putUnsafe!char('f'); 1335 buffer.putUnsafe!float(float_value); 1336 } 1337 break; 1338 case 55: 1339 #line 326 "sam_alignment.rl" 1340 { 1341 { 1342 auto data = cast(ubyte[])(line[tagvalue_beg .. p - line.ptr]); 1343 buffer.capacity = buffer.length + 4 + data.length; 1344 buffer.putUnsafe(tag_key); 1345 buffer.putUnsafe!char('Z'); 1346 buffer.putUnsafe(data); 1347 buffer.putUnsafe!ubyte(0); 1348 } 1349 } 1350 break; 1351 case 56: 1352 #line 337 "sam_alignment.rl" 1353 { 1354 { 1355 auto data = cast(ubyte[])(line[tagvalue_beg .. p - line.ptr]); 1356 buffer.capacity = buffer.length + 4 + data.length; 1357 buffer.putUnsafe(tag_key); 1358 buffer.putUnsafe!char('H'); 1359 buffer.putUnsafe(data); 1360 buffer.putUnsafe!ubyte(0); 1361 } 1362 } 1363 break; 1364 case 58: 1365 #line 362 "sam_alignment.rl" 1366 { 1367 // here, we assume that compiler is smart enough to move switch out of loop. 1368 switch (arraytype) { 1369 case 'c': buffer.put(to!byte(int_value)); break; 1370 case 'C': buffer.put(to!ubyte(int_value)); break; 1371 case 's': buffer.put(to!short(int_value)); break; 1372 case 'S': buffer.put(to!ushort(int_value)); break; 1373 case 'i': buffer.put(to!int(int_value)); break; 1374 case 'I': buffer.put(to!uint(int_value)); break; 1375 default: assert(0); 1376 } 1377 { 1378 auto ptr = cast(uint*)(buffer.data.ptr + tag_array_length_offset); 1379 ++*ptr; 1380 } 1381 } 1382 break; 1383 case 59: 1384 #line 379 "sam_alignment.rl" 1385 { 1386 buffer.put!float(float_value); 1387 { 1388 auto ptr = cast(uint*)(buffer.data.ptr + tag_array_length_offset); 1389 ++*ptr; 1390 } 1391 } 1392 break; 1393 case 62: 1394 #line 403 "sam_alignment.rl" 1395 { 1396 buffer.shrink(rollback_size); 1397 p--; {cs = 180; if (true) goto _again;} 1398 } 1399 break; 1400 case 64: 1401 #line 410 "sam_alignment.rl" 1402 { rollback_size = buffer.length; } 1403 break; 1404 #line 1404 "sam_alignment.d" 1405 default: break; 1406 } 1407 } 1408 } 1409 1410 _out: {} 1411 } 1412 1413 #line 481 "sam_alignment.rl" 1414 1415 BamRead read; 1416 read.raw_data = buffer.data[]; 1417 return read; 1418 } 1419 1420 unittest { 1421 import std.algorithm; 1422 import std.math; 1423 1424 auto line = "ERR016155.15021091\t185\t20\t60033\t25\t66S35M\t=\t60033\t0\tAGAAAAAACTGGAAGTTAATAGAGTGGTGACTCAGATCCAGTGGTGGAAGGGTAAGGGATCTTGGAACCCTATAGAGTTGCTGTGTGCCAGGGCCAGATCC\t#####################################################################################################\tX0:i:1\tX1:i:0\tXC:i:35\tMD:Z:17A8A8\tRG:Z:ERR016155\tAM:i:0\tNM:i:2\tSM:i:25\tXT:A:U\tBQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\tY0:B:c,1,2,3\tY1:B:f,13.263,-3.1415,52.63461"; 1425 1426 auto header = new SamHeader("@SQ\tSN:20\tLN:1234567"); 1427 auto alignment = parseAlignmentLine(line, header); 1428 assert(alignment.name == "ERR016155.15021091"); 1429 assert(equal(alignment.sequence(), "AGAAAAAACTGGAAGTTAATAGAGTGGTGACTCAGATCCAGTGGTGGAAGGGTAAGGGATCTTGGAACCCTATAGAGTTGCTGTGTGCCAGGGCCAGATCC")); 1430 assert(alignment.cigarString() == "66S35M"); 1431 assert(alignment.flag == 185); 1432 assert(alignment.position == 60032); 1433 assert(alignment.mapping_quality == 25); 1434 assert(alignment.mate_position == 60032); 1435 assert(alignment.ref_id == 0); 1436 assert(alignment.mate_ref_id == 0); 1437 assert(to!ubyte(alignment["AM"]) == 0); 1438 assert(to!ubyte(alignment["SM"]) == 25); 1439 assert(to!string(alignment["MD"]) == "17A8A8"); 1440 assert(equal(to!(byte[])(alignment["Y0"]), [1, 2, 3])); 1441 assert(equal!approxEqual(to!(float[])(alignment["Y1"]), [13.263, -3.1415, 52.63461])); 1442 assert(to!char(alignment["XT"]) == 'U'); 1443 1444 import bio.std.hts.bam.reference; 1445 1446 auto info = ReferenceSequenceInfo("20", 1234567); 1447 1448 auto invalid_cigar_string = "1\t100\t20\t50000\t30\tMZABC\t=\t50000\t0\tACGT\t####"; 1449 alignment = parseAlignmentLine(invalid_cigar_string, header); 1450 assert(equal(alignment.sequence(), "ACGT")); 1451 1452 auto invalid_tag_and_qual = "2\t100\t20\t5\t40\t27M30X5D\t=\t3\t10\tACT\t !\n\tX1:i:7\tX3:i:zzz\tX4:i:5"; 1453 alignment = parseAlignmentLine(invalid_tag_and_qual, header); 1454 assert(alignment.base_qualities == [255, 255, 255]); // i.e. invalid 1455 assert(to!ubyte(alignment["X1"]) == 7); 1456 assert(alignment["X3"].is_nothing); 1457 assert(to!ubyte(alignment["X4"]) == 5); 1458 }