1 /* 2 This file is part of BioD. 3 Copyright (C) 2012 Artem Tarasov <lomereiter@gmail.com> 4 5 Permission is hereby granted, free of charge, to any person obtaining a 6 copy of this software and associated documentation files (the "Software"), 7 to deal in the Software without restriction, including without limitation 8 the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 and/or sell copies of the Software, and to permit persons to whom the 10 Software is furnished to do so, subject to the following conditions: 11 12 The above copyright notice and this permission notice shall be included in 13 all copies or substantial portions of the Software. 14 15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 DEALINGS IN THE SOFTWARE. 22 23 */ 24 module bio.std.hts.bam.baseinfo; 25 26 import bio.core.base; 27 import bio.core.sequence; 28 29 import bio.std.hts.bam.read; 30 import bio.std.hts.bam.tagvalue; 31 import bio.std.hts.iontorrent.flowcall; 32 import bio.std.hts.bam.md.core; 33 import bio.std.hts.bam.cigar; 34 35 import std.range; 36 import std.conv; 37 import std.traits; 38 import std.typecons; 39 import std.typetuple; 40 41 /// 42 enum Option 43 { 44 /// adds 'cigar_before' and 'cigar_after' properties 45 cigarExtra, 46 47 /// adds 'md_operation', 'md_operation_offset' properties 48 mdCurrentOp, 49 50 /// adds 'previous_md_operation' property 51 mdPreviousOp, 52 53 /// adds 'next_md_operation' property 54 mdNextOp 55 } 56 57 /// 58 struct MixinArg(T, string Tag) { 59 T value; 60 alias value this; 61 alias Tag TagName; 62 } 63 64 /// Wrapper for arguments to $(D basesWith) function (see below). 65 /// Required to distinguish to which tag each parameter refers. 66 MixinArg!(T, Tag) arg(string Tag, T)(T value) { 67 return MixinArg!(T, Tag)(value); 68 } 69 70 template staticFilter(alias P, T...) 71 { 72 static if (T.length == 0) 73 alias TypeTuple!() staticFilter; 74 else static if (P!(T[0])) 75 alias TypeTuple!(T[0], staticFilter!(P, T[1..$])) staticFilter; 76 else 77 alias staticFilter!(P, T[1..$]) staticFilter; 78 } 79 80 template isTag(alias argument) 81 { 82 enum isTag = is(typeof(argument) == string); 83 } 84 85 template isOption(alias argument) 86 { 87 enum isOption = is(typeof(argument) == Option); 88 } 89 90 struct PerBaseInfo(R, TagsAndOptions...) { 91 92 alias staticFilter!(isTag, TagsAndOptions) Tags; 93 alias staticFilter!(isOption, TagsAndOptions) Options; 94 95 private alias TypeTuple!("CIGAR", Tags) Extensions; 96 97 // ///////////////////////////////////////////////////////////////////////// 98 // 99 // Each 'extension' is a template with name TAGbaseInfo, containing 100 // a couple of mixin templates: 101 // 102 // * resultProperties 103 // These are additional properties provided by the template 104 // 105 // * rangeMethods 106 // These describe how to proceed to the next base. 107 // The following methods must be implemented: 108 // 109 // - void setup(Args...)(const ref R read, Args args); 110 // Gets called during range construction. All constructor 111 // arguments are forwarded, and it's this function which 112 // is responsible for getting required parameters for this 113 // particular template. 114 // 115 // - void populate(Result)(ref Result r); 116 // Populates fields of the result declared in resultProperties. 117 // Should run in O(1), just copying a few variables. 118 // Current base of the result is updated before the call. 119 // 120 // - void update(const ref R read); 121 // Encapsulates logic of moving to the next base and updating 122 // mixin variables correspondingly. 123 // 124 // - void copy(Range)(const ref Range source, ref Range target); 125 // Gets called during $(D source.save). Therefore, any ranges 126 // used in mixin templates must be saved as well at that time. 127 // 128 // ///////////////////////////////////////////////////////////////////////// 129 130 private static string getResultProperties(Exts...)() { 131 char[] result; 132 foreach (ext; Exts) 133 result ~= "mixin " ~ ext ~ "baseInfo!(R, Options).resultProperties;".dup; 134 return cast(string)result; 135 } 136 137 static struct Result { 138 /// Actual read base, with strand taken into account. 139 Base base; 140 alias base this; 141 142 string opCast(T)() if (is(T == string)) 143 { 144 return to!string(base); 145 } 146 147 bool opEquals(T)(T base) const 148 if (is(Unqual!T == Base)) 149 { 150 return this.base == base; 151 } 152 153 bool opEquals(T)(T result) const 154 if (is(Unqual!T == Result)) 155 { 156 return this == result; 157 } 158 159 bool opEquals(T)(T base) const 160 if (is(Unqual!T == char) || is(Unqual!T == dchar)) 161 { 162 return this.base == base; 163 } 164 165 mixin(getResultProperties!Extensions()); 166 } 167 168 private static string getRangeMethods(Exts...)() { 169 char[] result; 170 foreach (ext; Exts) 171 result ~= "mixin " ~ ext ~ "baseInfo!(R, Options).rangeMethods " ~ ext ~ ";".dup; 172 return cast(string)result; 173 } 174 175 mixin(getRangeMethods!Extensions()); 176 177 private void setup(string tag, Args...)(R read, Args args) { 178 mixin(tag ~ ".setup(read, args);"); 179 } 180 181 private void populate(string tag)(ref Result r) { 182 mixin(tag ~ ".populate(r);"); 183 } 184 185 private void update(string tag)() { 186 mixin(tag ~ ".update(_read);"); 187 } 188 189 private void copy(string tag)(ref typeof(this) other) { 190 mixin(tag ~ ".copy(this, other);"); 191 } 192 193 this(Args...)(R read, Args args) { 194 _read = read; 195 _rev = read.is_reverse_strand; 196 _seq = reversableRange!complementBase(read.sequence, _rev); 197 198 foreach (t; Extensions) { 199 setup!t(read, args); 200 } 201 } 202 203 bool empty() @property { 204 return _seq.empty; 205 } 206 207 /// Allows to construct front element in-place, avoiding a copy. 208 void constructFront(Result* addr) 209 { 210 addr.base = _seq.front; 211 foreach (t; Extensions) 212 populate!t(*addr); 213 } 214 215 Result front() @property { 216 Result r = void; 217 r.base = _seq.front; 218 foreach (t; Extensions) 219 populate!t(r); 220 return r; 221 } 222 223 void popFront() { 224 moveToNextBase(); 225 } 226 227 PerBaseInfo save() @property { 228 PerBaseInfo r = void; 229 r._read = _read.dup; 230 r._seq = _seq.save; 231 r._rev = _rev; 232 foreach (t; Extensions) 233 copy!t(r); 234 return r; 235 } 236 237 ref PerBaseInfo opAssign(PerBaseInfo other) { 238 _read = other._read; 239 _seq = other._seq.save; 240 _rev = other._rev; 241 foreach (t; Extensions) 242 other.copy!t(this); 243 return this; 244 } 245 246 private void moveToNextBase() { 247 248 foreach (t; Extensions) { 249 update!t(); 250 } 251 252 _seq.popFront(); 253 } 254 255 /// Returns true if the read is reverse strand, 256 /// and false otherwise. 257 bool reverse_strand() @property const { 258 return _rev; 259 } 260 261 private { 262 bool _rev = void; 263 R _read = void; 264 ReversableRange!(complementBase, typeof(_read.sequence)) _seq = void; 265 } 266 } 267 268 /// 269 /// Collect per-base information from available tags. 270 /// Use $(D arg!TagName) to pass a parameter related to a particular tag. 271 /// 272 /// Example: 273 /// 274 /// basesWith!"FZ"(arg!"flowOrder"(flow_order), arg!"keySequence"(key_sequence)); 275 /// 276 template basesWith(TagsAndOptions...) { 277 auto basesWith(R, Args...)(R read, Args args) { 278 return PerBaseInfo!(R, TagsAndOptions)(read, args); 279 } 280 } 281 282 /// Provides additional property $(D reference_base) 283 template MDbaseInfo(R, Options...) { 284 285 mixin template resultProperties() { 286 287 enum MdCurrentOp = staticIndexOf!(Option.mdCurrentOp, Options) != -1; 288 enum MdPreviousOp = staticIndexOf!(Option.mdPreviousOp, Options) != -1; 289 enum MdNextOp = staticIndexOf!(Option.mdNextOp, Options) != -1; 290 291 /// If current CIGAR operation is reference consuming, 292 /// returns reference base at this position, otherwise 293 /// returns '-'. 294 /// 295 /// If read is on '-' strand, the result will be 296 /// complementary base. 297 char reference_base() @property const { 298 return _ref_base; 299 } 300 301 private char _ref_base = void; 302 303 static if (MdPreviousOp) 304 { 305 private Nullable!MdOperation _previous_md_operation = void; 306 307 /// Previous MD operation 308 Nullable!MdOperation previous_md_operation() @property { 309 return _previous_md_operation; 310 } 311 } 312 313 static if (MdCurrentOp) 314 { 315 316 private MdOperation _current_md_operation = void; 317 private uint _current_md_operation_offset = void; 318 319 /// Current MD operation 320 MdOperation md_operation() @property { 321 return _current_md_operation; 322 } 323 324 /// If current MD operation is match, returns how many bases 325 /// have matched before the current base. Otherwise returns 0. 326 uint md_operation_offset() @property const { 327 return _current_md_operation_offset; 328 } 329 } 330 331 static if (MdNextOp) 332 { 333 private Nullable!MdOperation _next_md_operation = void; 334 /// Next MD operation 335 Nullable!MdOperation next_md_operation() @property { 336 return _next_md_operation; 337 } 338 } 339 } 340 341 mixin template rangeMethods() { 342 343 enum MdCurrentOp = staticIndexOf!(Option.mdCurrentOp, Options) != -1; 344 enum MdPreviousOp = staticIndexOf!(Option.mdPreviousOp, Options) != -1; 345 enum MdNextOp = staticIndexOf!(Option.mdNextOp, Options) != -1; 346 347 private { 348 ReversableRange!(reverseMdOp, MdOperationRange) _md_ops = void; 349 uint _match; // remaining length of current match operation 350 MdOperation _md_front = void; 351 352 static if (MdPreviousOp) 353 { 354 Nullable!MdOperation _previous_md_op; 355 bool _md_front_is_initialized; 356 } 357 } 358 359 private void updateMdFrontVariable() 360 { 361 static if (MdPreviousOp) 362 { 363 if (_md_front_is_initialized) 364 _previous_md_op = _md_front; 365 366 _md_front_is_initialized = true; 367 } 368 369 _md_front = _md_ops.front; 370 _md_ops.popFront(); 371 } 372 373 void setup(Args...)(const ref R read, Args args) 374 { 375 auto md = read["MD"]; 376 auto md_str = *(cast(string*)&md); 377 _md_ops = reversableRange!reverseMdOp(mdOperations(md_str), 378 read.is_reverse_strand); 379 380 while (!_md_ops.empty) 381 { 382 updateMdFrontVariable(); 383 if (!_md_front.is_deletion) { 384 if (_md_front.is_match) { 385 _match = _md_front.match; 386 } 387 break; 388 } 389 } 390 } 391 392 void populate(Result)(ref Result result) 393 { 394 if (!current_cigar_operation.is_reference_consuming) 395 { 396 result._ref_base = '-'; 397 return; 398 } 399 400 MdOperation op = _md_front; 401 if (op.is_mismatch) 402 result._ref_base = op.mismatch.asCharacter; 403 else if (op.is_match) { 404 result._ref_base = result.base.asCharacter; 405 } 406 else assert(0); 407 408 static if (MdPreviousOp) 409 { 410 if (_previous_md_op.isNull) 411 result._previous_md_operation.nullify(); 412 else 413 result._previous_md_operation = _previous_md_op.get; 414 } 415 416 static if (MdCurrentOp) 417 { 418 419 result._current_md_operation = op; 420 result._current_md_operation_offset = _md_front.match - _match; 421 } 422 423 static if (MdNextOp) 424 { 425 if (_md_ops.empty) 426 result._next_md_operation.nullify(); 427 else 428 result._next_md_operation = _md_ops.front; 429 } 430 } 431 432 void update(const ref R read) 433 { 434 if (!current_cigar_operation.is_reference_consuming) 435 return; 436 437 if (_md_front.is_mismatch) 438 { 439 if (_md_ops.empty) 440 return; 441 442 updateMdFrontVariable(); 443 } 444 else if (_md_front.is_match) 445 { 446 --_match; 447 if (_match == 0 && !_md_ops.empty) { 448 updateMdFrontVariable(); 449 } 450 } 451 else assert(0); 452 453 while (_md_front.is_deletion) { 454 if (_md_ops.empty) 455 return; 456 457 updateMdFrontVariable(); 458 } 459 460 if (_match == 0 && _md_front.is_match) 461 _match = _md_front.match; 462 } 463 464 void copy(Range)(ref Range source, ref Range target) 465 { 466 target.MD._md_ops = source.MD._md_ops.save; 467 target.MD._md_front = source.MD._md_front; 468 469 static if (MdPreviousOp) 470 { 471 if (source.MD._previous_md_op.isNull) 472 target.MD._previous_md_op.nullify(); 473 else 474 target.MD._previous_md_op = source.MD._previous_md_op.get; 475 target.MD._md_front_is_initialized = source.MD._md_front_is_initialized; 476 } 477 } 478 } 479 } 480 481 /// Provides additional property $(D flow_call). 482 template FZbaseInfo(R, Options...) { 483 484 mixin template resultProperties() { 485 /// Current flow call 486 ReadFlowCall flow_call() @property const { 487 return _flow_call; 488 } 489 490 private { 491 ReadFlowCall _flow_call; 492 } 493 } 494 495 mixin template rangeMethods() { 496 497 private { 498 ReadFlowCallRange!(BamRead.SequenceResult) _flow_calls = void; 499 ReadFlowCall _current_flow_call = void; 500 ushort _at = void; 501 502 debug { 503 string _read_name; 504 } 505 } 506 507 void setup(Args...)(const ref R read, Args args) 508 { 509 string flow_order = void; 510 string key_sequence = void; 511 512 debug { 513 _read_name = read.name.idup; 514 } 515 516 enum flowOrderExists = staticIndexOf!(MixinArg!(string, "flowOrder"), Args); 517 enum keySequenceExists = staticIndexOf!(MixinArg!(string, "keySequence"), Args); 518 static assert(flowOrderExists != -1, `Flow order must be provided via arg!"flowOrder"`); 519 static assert(keySequenceExists != -1, `Flow order must be provided via arg!"keySequence"`); 520 521 foreach (arg; args) { 522 static if(is(typeof(arg) == MixinArg!(string, "flowOrder"))) 523 flow_order = arg; 524 525 static if(is(typeof(arg) == MixinArg!(string, "keySequence"))) 526 key_sequence = arg; 527 } 528 529 _at = 0; 530 531 _flow_calls = readFlowCalls(read, flow_order, key_sequence); 532 if (!_flow_calls.empty) { 533 _current_flow_call = _flow_calls.front; 534 } 535 } 536 537 void populate(Result)(ref Result result) { 538 result._flow_call = _current_flow_call; 539 540 debug { 541 if (result.base != result._flow_call.base) { 542 import std.stdio; 543 stderr.writeln("invalid flow call at ", _read_name, ": ", result.position); 544 } 545 } 546 } 547 548 void update(const ref R read) 549 { 550 ++_at; 551 if (_at == _current_flow_call.length) { 552 _flow_calls.popFront(); 553 if (!_flow_calls.empty) { 554 _current_flow_call = _flow_calls.front; 555 _at = 0; 556 } 557 } 558 } 559 560 void copy(Range)(ref Range source, ref Range target) { 561 target.FZ._flow_calls = source._flow_calls.save(); 562 target.FZ._at = source.FZ._at; 563 target.FZ._current_flow_call = source._current_flow_call; 564 565 debug { 566 target._read_name = _read_name; 567 } 568 } 569 } 570 } 571 572 /// Retrieving flow signal intensities from ZM tags is also available. 573 alias FZbaseInfo ZMbaseInfo; 574 575 /// Provides additional properties 576 /// * position 577 /// * cigar_operation 578 /// * cigar_operation_offset 579 template CIGARbaseInfo(R, Options...) { 580 581 mixin template resultProperties() { 582 583 enum CigarExtraProperties = staticIndexOf!(Option.cigarExtra, Options) != -1; 584 585 static if (CigarExtraProperties) 586 { 587 /// Current CIGAR operation 588 CigarOperation cigar_operation() @property { 589 return _cigar[_operation_index]; 590 } 591 592 /// CIGAR operations before current one 593 auto cigar_before() @property { 594 return _cigar[0 .. _operation_index]; 595 } 596 597 /// CIGAR operations after current one 598 auto cigar_after() @property { 599 return _cigar[_operation_index + 1 .. _cigar.length]; 600 } 601 } 602 else 603 { 604 /// Current CIGAR operation 605 CigarOperation cigar_operation() @property const { 606 return _current_cigar_op; 607 } 608 } 609 610 /// Position of the corresponding base on the reference. 611 /// If current CIGAR operation is not one of 'M', '=', 'X', 612 /// returns the position of the previous mapped base. 613 uint position() @property const { 614 return _reference_position; 615 } 616 617 /// Offset in current CIGAR operation, starting from 0. 618 uint cigar_operation_offset() @property const { 619 return _cigar_operation_offset; 620 } 621 622 private { 623 int _operation_index = void; 624 uint _reference_position = void; 625 uint _cigar_operation_offset = void; 626 static if (CigarExtraProperties) 627 { 628 ReversableRange!(identity, const(CigarOperation)[]) _cigar = void; 629 } 630 else 631 { 632 CigarOperation _current_cigar_op; 633 } 634 } 635 } 636 637 mixin template rangeMethods() { 638 639 enum CigarExtraProperties = staticIndexOf!(Option.cigarExtra, Options) != -1; 640 641 private { 642 CigarOperation _current_cigar_op = void; 643 644 ulong _cur_cig_op_len = void; 645 bool _cur_cig_op_is_ref_cons = void; 646 647 int _index = void; 648 uint _at = void; 649 uint _ref_pos = void; 650 ReversableRange!(identity, const(CigarOperation)[]) _cigar = void; 651 } 652 653 /// Current CIGAR operation, available to all extensions 654 const(CigarOperation) current_cigar_operation() @property const { 655 return _current_cigar_op; 656 } 657 658 void setup(Args...)(const ref R read, Args) 659 { 660 _cigar = reversableRange(read.cigar, read.is_reverse_strand); 661 662 _index = -1; 663 _ref_pos = reverse_strand ? (read.position + read.basesCovered() - 1) 664 : read.position; 665 666 _moveToNextCigarOperator(); 667 assert(_index >= 0); 668 } 669 670 void populate(Result)(ref Result result) { 671 result._reference_position = _ref_pos; 672 result._cigar_operation_offset = _at; 673 static if (CigarExtraProperties) 674 { 675 result._cigar = _cigar; 676 result._operation_index = _index; 677 } 678 else 679 { 680 result._current_cigar_op = _current_cigar_op; 681 } 682 } 683 684 void update(const ref R read) 685 { 686 ++_at; 687 688 if (_cur_cig_op_is_ref_cons) { 689 _ref_pos += reverse_strand ? -1 : 1; 690 } 691 692 if (_at == _cur_cig_op_len) { 693 _moveToNextCigarOperator(); 694 } 695 } 696 697 void copy(Range)(const ref Range source, ref Range target) { 698 target.CIGAR._cigar = source.CIGAR._cigar; 699 target.CIGAR._index = source.CIGAR._index; 700 target.CIGAR._current_cigar_op = source.CIGAR._current_cigar_op; 701 target.CIGAR._cur_cig_op_len = source.CIGAR._cur_cig_op_len; 702 target.CIGAR._cur_cig_op_is_ref_cons = source.CIGAR._cur_cig_op_is_ref_cons; 703 target.CIGAR._at = source.CIGAR._at; 704 target.CIGAR._ref_pos = source.CIGAR._ref_pos; 705 } 706 707 private void _moveToNextCigarOperator() { 708 _at = 0; 709 for (++_index; _index < _cigar.length; ++_index) 710 { 711 _current_cigar_op = _cigar[_index]; 712 _cur_cig_op_is_ref_cons = _current_cigar_op.is_reference_consuming; 713 _cur_cig_op_len = _current_cigar_op.length; 714 715 if (_current_cigar_op.is_query_consuming) 716 break; 717 718 if (_cur_cig_op_is_ref_cons) 719 { 720 if (reverse_strand) 721 _ref_pos -= _cur_cig_op_len; 722 else 723 _ref_pos += _cur_cig_op_len; 724 } 725 } 726 } 727 } 728 }