1 module bio.std.hts.bam.md.parse;
2 
3 import bio.std.hts.bam.md.operation;
4 import std.ascii;
5 import std.array;
6 import std.algorithm;
7 import std.functional;
8 import std.range;
9 import std.conv;
10 import std.traits;
11 
12 /// Returns bidirectional range of MD operations. Zero matches are skipped.
13 auto mdOperations(string md) {
14 
15     static struct Result {
16         private {
17             string _md = void;
18             MdOperation _cached_front = void;
19             MdOperation _cached_back = void;
20             ubyte _rem = 255;
21         }
22 
23         this(string md) {
24             _md = md;
25             if (!cacheFront()) {
26                 _rem = 0;
27             } else {
28                 if (!cacheBack()) {
29                     _cached_back = _cached_front;
30                     _rem = 1;
31                 }
32             }
33         }
34       
35         bool empty() @property {
36             return _rem == 0;
37         }
38 
39         Result save() @property {
40             Result res = void;
41             res._md = _md;
42             res._cached_front = _cached_front;
43             res._cached_back = _cached_back;
44             res._rem = _rem;
45             return res;
46         }
47 
48         ref MdOperation front() @property {
49             return _cached_front;
50         }
51 
52         ref MdOperation back() @property {
53             return _cached_back;
54         }
55 
56         void popFront() {
57             if (_md.empty) {
58                 if (_rem == 255) {
59                     _cached_front = _cached_back;
60                     _rem = 1;
61                 } else {
62                     _rem = 0;
63                 }
64             } else {
65                 if (!cacheFront())
66                     _rem = 0;
67             }
68         }
69 
70         void popBack() {
71             if (_md.empty) {
72                 if (_rem == 255) {
73                     _cached_back = _cached_front;
74                     _rem = 1;
75                 } else {
76                     _rem = 0;
77                 }
78             } else {
79                 if (!cacheBack())
80                     _rem = 0;
81             }
82         }
83 
84         private bool cacheFront() {
85             if (_md.empty)
86                 return false;
87 
88             if (_md[0] == '^') {          // deletion, get bases
89                 _md = _md[1 .. $];
90                 auto len = countUntil!(not!isUpper)(_md);
91                 if (len == -1) {
92                     len = _md.length;
93                 }
94                 _cached_front = MdOperation.createDeletion(_md[0 .. len]);
95                 _md = _md[len .. $];
96             } else if (isDigit(_md[0])) { // match, get number
97                 auto len = countUntil!(not!isDigit)(_md);
98                 if (len == -1) {
99                     len = _md.length;
100                 }
101                 _cached_front = MdOperation.createMatch(to!uint(_md[0 .. len]));
102                 _md = _md[len .. $];
103             } else {                     // mismatch
104                 _cached_front = MdOperation.createMismatch(_md[0]);
105                 _md = _md[1 .. $];
106             }
107 
108             return true;
109         }
110 
111         private bool cacheBack() {
112             if (_md.empty)
113                 return false;
114 
115             if (isDigit(_md[$ - 1])) { // match, get number
116                 auto len = countUntil!(not!isDigit)(retro(_md));
117                 if (len == -1) {
118                     len = _md.length;
119                 }
120                 _cached_back = MdOperation.createMatch(to!uint(_md[$ - len .. $]));
121                 _md = _md[0 .. $ - len];
122             } else {
123                 if (_md.length == 1 || isDigit(_md[$ - 2])) { // mismatch
124                     _cached_back = MdOperation.createMismatch(_md[$ - 1]);
125                     _md = _md[0 .. $ - 1];
126                 } else { // deletion
127                     auto len = countUntil!"a == '^'"(retro(_md));
128                     _cached_back = MdOperation.createDeletion(_md[$ - len .. $]);
129                     _md = _md[0 .. $ - len - 1];
130                 }
131             }
132 
133             return true;
134         }
135     }
136 
137     static bool isZeroMatch(MdOperation op) {
138         return op.type == MdOperationType.Match &&
139                op.match == 0;
140     }
141 
142     return filterBidirectional!(not!isZeroMatch)(Result(md));
143 }
144 
145 /// Alias for return type of mdOperations
146 alias ReturnType!mdOperations MdOperationRange;
147 
148 unittest {
149 
150     import std.algorithm;
151 
152     import std.stdio;
153     
154     assert(equal(mdOperations("86"), 
155                 [MdOperation.createMatch(86)]));
156 
157     assert(equal(mdOperations("0G81"), 
158                 [MdOperation.createMismatch('G'), 
159                  MdOperation.createMatch(81)]));
160 
161     assert(equal(mdOperations("62^T28"), 
162                 [MdOperation.createMatch(62), 
163                  MdOperation.createDeletion("T"), 
164                  MdOperation.createMatch(28)]));
165 
166     assert(equal(retro(mdOperations("3C6C0A13^A4C2")),
167                  retro([MdOperation.createMatch(3),   
168                         MdOperation.createMismatch('C'), 
169                         MdOperation.createMatch(6),
170                         MdOperation.createMismatch('C'), 
171                         MdOperation.createMismatch('A'),
172                         MdOperation.createMatch(13),  
173                         MdOperation.createDeletion("A"), 
174                         MdOperation.createMatch(4),
175                         MdOperation.createMismatch('C'), 
176                         MdOperation.createMatch(2)])));
177 
178     assert(equal(mdOperations("27^TTT63"),
179                 [MdOperation.createMatch(27), 
180                  MdOperation.createDeletion("TTT"), 
181                  MdOperation.createMatch(63)]));
182 }