1 /*
2     This file is part of BioD.
3     Copyright (C) 2012    Artem Tarasov <lomereiter@gmail.com>
4 
5     Permission is hereby granted, free of charge, to any person obtaining a
6     copy of this software and associated documentation files (the "Software"),
7     to deal in the Software without restriction, including without limitation
8     the rights to use, copy, modify, merge, publish, distribute, sublicense,
9     and/or sell copies of the Software, and to permit persons to whom the
10     Software is furnished to do so, subject to the following conditions:
11     
12     The above copyright notice and this permission notice shall be included in
13     all copies or substantial portions of the Software.
14     
15     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21     DEALINGS IN THE SOFTWARE.
22 
23 */
24 module bio.std.hts.bam.baseinfo;
25 
26 import bio.core.base;
27 import bio.core.sequence;
28 
29 import bio.std.hts.bam.read;
30 import bio.std.hts.bam.tagvalue;
31 import bio.std.hts.iontorrent.flowcall;
32 import bio.std.hts.bam.md.core;
33 import bio.std.hts.bam.cigar;
34 
35 import std.range;
36 import std.conv;
37 import std.traits;
38 import std.typecons;
39 import std.typetuple;
40 
41 /// 
42 enum Option
43 {
44     /// adds 'cigar_before' and 'cigar_after' properties
45     cigarExtra, 
46 
47     /// adds 'md_operation', 'md_operation_offset' properties
48     mdCurrentOp,
49 
50     /// adds 'previous_md_operation' property
51     mdPreviousOp,
52 
53     /// adds 'next_md_operation' property
54     mdNextOp
55 }
56 
57 ///
58 struct MixinArg(T, string Tag) {
59     T value;
60     alias value this;
61     alias Tag TagName;
62 }
63 
64 /// Wrapper for arguments to $(D basesWith) function (see below).
65 /// Required to distinguish to which tag each parameter refers.
66 MixinArg!(T, Tag) arg(string Tag, T)(T value) {
67     return MixinArg!(T, Tag)(value);
68 }
69 
70 template staticFilter(alias P, T...)
71 {
72     static if (T.length == 0)
73         alias TypeTuple!() staticFilter;
74     else static if (P!(T[0]))
75         alias TypeTuple!(T[0], staticFilter!(P, T[1..$])) staticFilter;
76     else
77         alias staticFilter!(P, T[1..$]) staticFilter;
78 }
79 
80 template isTag(alias argument)
81 {
82     enum isTag = is(typeof(argument) == string);
83 }
84 
85 template isOption(alias argument)
86 {
87     enum isOption = is(typeof(argument) == Option);
88 }
89 
90 struct PerBaseInfo(R, TagsAndOptions...) {
91 
92     alias staticFilter!(isTag, TagsAndOptions) Tags;
93     alias staticFilter!(isOption, TagsAndOptions) Options;
94 
95     private alias TypeTuple!("CIGAR", Tags) Extensions;
96 
97     // /////////////////////////////////////////////////////////////////////////
98     //
99     // Each 'extension' is a template with name TAGbaseInfo, containing 
100     // a couple of mixin templates:
101     // 
102     // * resultProperties
103     //      These are additional properties provided by the template
104     //
105     // * rangeMethods
106     //      These describe how to proceed to the next base.
107     //      The following methods must be implemented:
108     //      
109     //      - void setup(Args...)(const ref R read, Args args);
110     //          Gets called during range construction. All constructor
111     //          arguments are forwarded, and it's this function which
112     //          is responsible for getting required parameters for this
113     //          particular template.
114     //
115     //      - void populate(Result)(ref Result r);
116     //          Populates fields of the result declared in resultProperties.
117     //          Should run in O(1), just copying a few variables.
118     //          Current base of the result is updated before the call.
119     //
120     //      - void update(const ref R read);
121     //          Encapsulates logic of moving to the next base and updating
122     //          mixin variables correspondingly.
123     //
124     //      - void copy(Range)(const ref Range source, ref Range target);
125     //          Gets called during $(D source.save). Therefore, any ranges
126     //          used in mixin templates must be saved as well at that time.
127     //
128     // /////////////////////////////////////////////////////////////////////////
129 
130     private static string getResultProperties(Exts...)() {
131         char[] result;
132         foreach (ext; Exts) 
133             result ~= "mixin " ~ ext ~ "baseInfo!(R, Options).resultProperties;".dup;
134         return cast(string)result;
135     }
136 
137     static struct Result {
138         /// Actual read base, with strand taken into account.
139         Base base;
140         alias base this;
141 
142         string opCast(T)() if (is(T == string))
143         {
144             return to!string(base);
145         }
146 
147         bool opEquals(T)(T base) const
148             if (is(Unqual!T == Base)) 
149         {
150             return this.base == base;
151         }
152 
153         bool opEquals(T)(T result) const
154             if (is(Unqual!T == Result))
155         {
156             return this == result;
157         }
158 
159         bool opEquals(T)(T base) const
160             if (is(Unqual!T == char) || is(Unqual!T == dchar))
161         {
162             return this.base == base;
163         }
164 
165         mixin(getResultProperties!Extensions());
166     }
167 
168     private static string getRangeMethods(Exts...)() {
169         char[] result;
170         foreach (ext; Exts)
171             result ~= "mixin " ~ ext ~ "baseInfo!(R, Options).rangeMethods " ~ ext ~ ";".dup;
172         return cast(string)result;
173     }
174 
175     mixin(getRangeMethods!Extensions());
176 
177     private void setup(string tag, Args...)(R read, Args args) {
178         mixin(tag ~ ".setup(read, args);");
179     }
180 
181     private void populate(string tag)(ref Result r) {
182         mixin(tag ~ ".populate(r);");
183     }
184 
185     private void update(string tag)() {
186         mixin(tag ~ ".update(_read);");
187     }
188 
189     private void copy(string tag)(ref typeof(this) other) {
190         mixin(tag ~ ".copy(this, other);");
191     }
192 
193     this(Args...)(R read, Args args) {
194         _read = read;
195         _rev = read.is_reverse_strand;
196         _seq = reversableRange!complementBase(read.sequence, _rev);
197 
198         foreach (t; Extensions) {
199             setup!t(read, args);
200         }
201     }
202 
203     bool empty() @property {
204         return _seq.empty;
205     }
206 
207     /// Allows to construct front element in-place, avoiding a copy.
208     void constructFront(Result* addr)
209     {
210         addr.base = _seq.front;
211         foreach (t; Extensions)
212             populate!t(*addr);
213     }
214 
215     Result front() @property {
216         Result r = void;
217         r.base = _seq.front;
218         foreach (t; Extensions)
219             populate!t(r);
220         return r;
221     }
222 
223     void popFront() {
224         moveToNextBase();
225     }
226 
227     PerBaseInfo save() @property {
228         PerBaseInfo r = void;
229         r._read = _read.dup;
230         r._seq = _seq.save;
231         r._rev = _rev;
232         foreach (t; Extensions)
233             copy!t(r);
234         return r;
235     }
236 
237     ref PerBaseInfo opAssign(PerBaseInfo other) {
238         _read = other._read;
239         _seq = other._seq.save;
240         _rev = other._rev;
241         foreach (t; Extensions)
242             other.copy!t(this);
243         return this;
244     }
245 
246     private void moveToNextBase() {
247 
248         foreach (t; Extensions) {
249             update!t();
250         }
251 
252         _seq.popFront();
253     }
254 
255     /// Returns true if the read is reverse strand,
256     /// and false otherwise.
257     bool reverse_strand() @property const {
258         return _rev;
259     }
260 
261     private {
262         bool _rev = void;
263         R _read = void;
264         ReversableRange!(complementBase, typeof(_read.sequence)) _seq = void;
265     }
266 }
267 
268 ///
269 ///  Collect per-base information from available tags. 
270 ///  Use $(D arg!TagName) to pass a parameter related to a particular tag.
271 ///
272 ///  Example:
273 ///
274 /// basesWith!"FZ"(arg!"flowOrder"(flow_order), arg!"keySequence"(key_sequence));
275 ///
276 template basesWith(TagsAndOptions...) {
277     auto basesWith(R, Args...)(R read, Args args) {
278         return PerBaseInfo!(R, TagsAndOptions)(read, args);
279     }
280 }
281 
282 /// Provides additional property $(D reference_base)
283 template MDbaseInfo(R, Options...) {
284 
285     mixin template resultProperties() {
286     
287         enum MdCurrentOp = staticIndexOf!(Option.mdCurrentOp, Options) != -1;
288         enum MdPreviousOp = staticIndexOf!(Option.mdPreviousOp, Options) != -1;
289         enum MdNextOp = staticIndexOf!(Option.mdNextOp, Options) != -1;
290 
291         /// If current CIGAR operation is reference consuming,
292         /// returns reference base at this position, otherwise
293         /// returns '-'.
294         ///
295         /// If read is on '-' strand, the result will be
296         /// complementary base.
297         char reference_base() @property const {
298             return _ref_base;
299         }
300 
301         private char _ref_base = void;
302 
303         static if (MdPreviousOp)
304         {
305             private Nullable!MdOperation _previous_md_operation = void;
306 
307             /// Previous MD operation
308             Nullable!MdOperation previous_md_operation() @property {
309                 return _previous_md_operation;
310             }
311         }
312 
313         static if (MdCurrentOp)
314         {
315 
316             private MdOperation _current_md_operation = void;
317             private uint _current_md_operation_offset = void;
318 
319             /// Current MD operation
320             MdOperation md_operation() @property {
321                 return _current_md_operation;
322             }
323 
324             /// If current MD operation is match, returns how many bases
325             /// have matched before the current base. Otherwise returns 0.
326             uint md_operation_offset() @property const {
327                 return _current_md_operation_offset;
328             }
329         }
330 
331         static if (MdNextOp)
332         {
333             private Nullable!MdOperation _next_md_operation = void;
334             /// Next MD operation
335             Nullable!MdOperation next_md_operation() @property {
336                 return _next_md_operation;
337             }
338         }
339     }
340 
341     mixin template rangeMethods() {
342 
343         enum MdCurrentOp = staticIndexOf!(Option.mdCurrentOp, Options) != -1;
344         enum MdPreviousOp = staticIndexOf!(Option.mdPreviousOp, Options) != -1;
345         enum MdNextOp = staticIndexOf!(Option.mdNextOp, Options) != -1;
346 
347         private {
348             ReversableRange!(reverseMdOp, MdOperationRange) _md_ops = void;
349             uint _match; // remaining length of current match operation
350             MdOperation _md_front = void;
351 
352             static if (MdPreviousOp)
353             {
354                 Nullable!MdOperation _previous_md_op;
355                 bool _md_front_is_initialized;
356             }
357         }
358 
359         private void updateMdFrontVariable()
360         {
361             static if (MdPreviousOp)
362             {
363                 if (_md_front_is_initialized)
364                     _previous_md_op = _md_front;
365 
366                 _md_front_is_initialized = true;
367             }
368 
369             _md_front = _md_ops.front;
370             _md_ops.popFront();
371         }
372 
373         void setup(Args...)(const ref R read, Args args)
374         {
375             auto md = read["MD"];
376             auto md_str = *(cast(string*)&md);
377             _md_ops = reversableRange!reverseMdOp(mdOperations(md_str),
378                                                   read.is_reverse_strand);
379          
380             while (!_md_ops.empty)
381             {
382                 updateMdFrontVariable();
383                 if (!_md_front.is_deletion) {
384                     if (_md_front.is_match) {
385                         _match = _md_front.match;
386                     }
387                     break;
388                 }
389             }
390         }
391 
392         void populate(Result)(ref Result result)
393         {
394             if (!current_cigar_operation.is_reference_consuming)
395             {
396                 result._ref_base = '-';
397                 return;
398             }
399 
400             MdOperation op = _md_front;
401             if (op.is_mismatch)
402                 result._ref_base = op.mismatch.asCharacter;
403             else if (op.is_match) {
404                 result._ref_base = result.base.asCharacter;
405             }
406             else assert(0);
407 
408             static if (MdPreviousOp)
409             {
410                 if (_previous_md_op.isNull)
411                     result._previous_md_operation.nullify();
412                 else
413                     result._previous_md_operation = _previous_md_op.get;
414             }
415 
416             static if (MdCurrentOp)
417             {
418 
419                 result._current_md_operation = op;
420                 result._current_md_operation_offset = _md_front.match - _match;
421             }
422 
423             static if (MdNextOp)
424             {
425                 if (_md_ops.empty)
426                     result._next_md_operation.nullify();
427                 else
428                     result._next_md_operation = _md_ops.front;
429             }
430         }
431 
432         void update(const ref R read)
433         {
434             if (!current_cigar_operation.is_reference_consuming)
435                 return;
436 
437             if (_md_front.is_mismatch)
438             {
439                 if (_md_ops.empty)
440                     return;
441 
442                 updateMdFrontVariable();
443             }
444             else if (_md_front.is_match)
445             {
446                 --_match;
447                 if (_match == 0 && !_md_ops.empty) {
448                     updateMdFrontVariable();
449                 }
450             }
451             else assert(0);
452 
453             while (_md_front.is_deletion) {
454                 if (_md_ops.empty)
455                     return;
456 
457                 updateMdFrontVariable();
458             }
459 
460             if (_match == 0 && _md_front.is_match)
461                 _match = _md_front.match;
462         }
463 
464         void copy(Range)(ref Range source, ref Range target)
465         {
466             target.MD._md_ops = source.MD._md_ops.save;
467             target.MD._md_front = source.MD._md_front;
468 
469             static if (MdPreviousOp)
470             {
471                 if (source.MD._previous_md_op.isNull)
472                     target.MD._previous_md_op.nullify();
473                 else
474                     target.MD._previous_md_op = source.MD._previous_md_op.get;
475                 target.MD._md_front_is_initialized = source.MD._md_front_is_initialized;
476             }
477         }
478     }
479 }
480 
481 /// Provides additional property $(D flow_call).
482 template FZbaseInfo(R, Options...) {
483 
484     mixin template resultProperties() {
485         /// Current flow call
486         ReadFlowCall flow_call() @property const {
487             return _flow_call;
488         }
489 
490         private {
491             ReadFlowCall _flow_call;
492         }
493     }
494 
495     mixin template rangeMethods() {
496 
497         private {
498             ReadFlowCallRange!(BamRead.SequenceResult) _flow_calls = void;
499             ReadFlowCall _current_flow_call = void;
500             ushort _at = void;
501 
502             debug {
503                 string _read_name;
504             }
505         }
506 
507         void setup(Args...)(const ref R read, Args args) 
508         {
509             string flow_order = void;
510             string key_sequence = void;
511 
512             debug {
513                 _read_name = read.name.idup;
514             }
515 
516             enum flowOrderExists = staticIndexOf!(MixinArg!(string, "flowOrder"), Args);
517             enum keySequenceExists = staticIndexOf!(MixinArg!(string, "keySequence"), Args);
518             static assert(flowOrderExists != -1, `Flow order must be provided via arg!"flowOrder"`);
519             static assert(keySequenceExists != -1, `Flow order must be provided via arg!"keySequence"`);
520 
521             foreach (arg; args) {
522                 static if(is(typeof(arg) == MixinArg!(string, "flowOrder")))
523                     flow_order = arg;
524 
525                 static if(is(typeof(arg) == MixinArg!(string, "keySequence")))
526                     key_sequence = arg;
527             }
528 
529             _at = 0;
530 
531             _flow_calls = readFlowCalls(read, flow_order, key_sequence);
532             if (!_flow_calls.empty) {
533                 _current_flow_call = _flow_calls.front;
534             }
535         }
536 
537         void populate(Result)(ref Result result) {
538             result._flow_call = _current_flow_call;
539 
540             debug {
541                 if (result.base != result._flow_call.base) {
542                     import std.stdio;
543                     stderr.writeln("invalid flow call at ", _read_name, ": ", result.position);
544                 }
545             }
546         }
547 
548         void update(const ref R read) 
549         {
550             ++_at;
551             if (_at == _current_flow_call.length) {
552                 _flow_calls.popFront();
553                 if (!_flow_calls.empty) {
554                     _current_flow_call = _flow_calls.front;
555                     _at = 0;
556                 }
557             }
558         }
559 
560         void copy(Range)(ref Range source, ref Range target) {
561             target.FZ._flow_calls = source._flow_calls.save();
562             target.FZ._at = source.FZ._at;
563             target.FZ._current_flow_call = source._current_flow_call;
564 
565             debug {
566                 target._read_name = _read_name;
567             }
568         }
569     }
570 }
571 
572 /// Retrieving flow signal intensities from ZM tags is also available.
573 alias FZbaseInfo ZMbaseInfo;
574 
575 /// Provides additional properties
576 ///     * position
577 ///     * cigar_operation
578 ///     * cigar_operation_offset
579 template CIGARbaseInfo(R, Options...) {
580 
581     mixin template resultProperties() {
582 
583         enum CigarExtraProperties = staticIndexOf!(Option.cigarExtra, Options) != -1;
584 
585         static if (CigarExtraProperties)
586         {
587             /// Current CIGAR operation
588             CigarOperation cigar_operation() @property {
589                 return _cigar[_operation_index];
590             }
591 
592             /// CIGAR operations before current one
593             auto cigar_before() @property {
594                 return _cigar[0 .. _operation_index];
595             }
596 
597             /// CIGAR operations after current one
598             auto cigar_after() @property {
599                 return _cigar[_operation_index + 1 .. _cigar.length];
600             }
601         }
602         else
603         {
604             /// Current CIGAR operation
605             CigarOperation cigar_operation() @property const {
606                 return _current_cigar_op;
607             }
608         }
609 
610         /// Position of the corresponding base on the reference.
611         /// If current CIGAR operation is not one of 'M', '=', 'X',
612         /// returns the position of the previous mapped base.
613         uint position() @property const {
614             return _reference_position;
615         }
616 
617         /// Offset in current CIGAR operation, starting from 0.
618         uint cigar_operation_offset() @property const {
619             return _cigar_operation_offset;
620         }
621 
622         private {
623             int _operation_index = void;
624             uint _reference_position = void;
625             uint _cigar_operation_offset = void;
626             static if (CigarExtraProperties)
627             {
628                 ReversableRange!(identity, const(CigarOperation)[]) _cigar = void;
629             }
630             else
631             {
632                 CigarOperation _current_cigar_op;
633             }
634         }
635     }
636 
637     mixin template rangeMethods() {
638 
639         enum CigarExtraProperties = staticIndexOf!(Option.cigarExtra, Options) != -1;
640 
641         private {
642             CigarOperation _current_cigar_op = void;
643 
644             ulong _cur_cig_op_len = void;
645             bool _cur_cig_op_is_ref_cons = void;
646 
647             int _index = void;
648             uint _at = void;
649             uint _ref_pos = void;
650             ReversableRange!(identity, const(CigarOperation)[]) _cigar = void;
651         }
652 
653         /// Current CIGAR operation, available to all extensions
654         const(CigarOperation) current_cigar_operation() @property const {
655             return _current_cigar_op;
656         }
657 
658         void setup(Args...)(const ref R read, Args) 
659         {
660             _cigar = reversableRange(read.cigar, read.is_reverse_strand);
661 
662             _index = -1;
663             _ref_pos = reverse_strand ? (read.position + read.basesCovered() - 1)
664                                       : read.position;
665 
666             _moveToNextCigarOperator();
667             assert(_index >= 0);
668         }
669 
670         void populate(Result)(ref Result result) {
671             result._reference_position = _ref_pos;
672             result._cigar_operation_offset = _at;
673             static if (CigarExtraProperties)
674             {
675                 result._cigar = _cigar;
676                 result._operation_index = _index;
677             }
678             else
679             {
680                 result._current_cigar_op = _current_cigar_op;
681             }
682         }
683 
684         void update(const ref R read) 
685         {
686            ++_at;
687 
688            if  (_cur_cig_op_is_ref_cons) {
689                _ref_pos += reverse_strand ? -1 : 1;
690            }
691 
692            if (_at == _cur_cig_op_len) {
693                _moveToNextCigarOperator();
694            }
695         }
696 
697         void copy(Range)(const ref Range source, ref Range target) {
698             target.CIGAR._cigar = source.CIGAR._cigar;
699             target.CIGAR._index = source.CIGAR._index;
700             target.CIGAR._current_cigar_op = source.CIGAR._current_cigar_op;
701             target.CIGAR._cur_cig_op_len = source.CIGAR._cur_cig_op_len;
702             target.CIGAR._cur_cig_op_is_ref_cons = source.CIGAR._cur_cig_op_is_ref_cons;
703             target.CIGAR._at = source.CIGAR._at;
704             target.CIGAR._ref_pos = source.CIGAR._ref_pos;
705         }
706 
707         private void _moveToNextCigarOperator() {
708             _at = 0;
709             for (++_index; _index < _cigar.length; ++_index)
710             {
711                 _current_cigar_op = _cigar[_index];
712                 _cur_cig_op_is_ref_cons = _current_cigar_op.is_reference_consuming;
713                 _cur_cig_op_len = _current_cigar_op.length;
714 
715                 if (_current_cigar_op.is_query_consuming)
716                     break;
717 
718                 if (_cur_cig_op_is_ref_cons)
719                 {
720                     if (reverse_strand)
721                         _ref_pos -= _cur_cig_op_len;
722                     else
723                         _ref_pos += _cur_cig_op_len;
724                 }
725             }
726         }
727     }
728 }