1 module bio.std.hts.bam.md.parse;
2
3 import bio.std.hts.bam.md.operation;
4 import std.ascii;
5 import std.array;
6 import std.algorithm;
7 import std.functional;
8 import std.range;
9 import std.conv;
10 import std.traits;
11
12 /// Returns bidirectional range of MD operations. Zero matches are skipped.
13 auto mdOperations(string md) {
14
15 static struct Result {
16 private {
17 string _md = void;
18 MdOperation _cached_front = void;
19 MdOperation _cached_back = void;
20 ubyte _rem = 255;
21 }
22
23 this(string md) {
24 _md = md;
25 if (!cacheFront()) {
26 _rem = 0;
27 } else {
28 if (!cacheBack()) {
29 _cached_back = _cached_front;
30 _rem = 1;
31 }
32 }
33 }
34
35 bool empty() @property {
36 return _rem == 0;
37 }
38
39 Result save() @property {
40 Result res = void;
41 res._md = _md;
42 res._cached_front = _cached_front;
43 res._cached_back = _cached_back;
44 res._rem = _rem;
45 return res;
46 }
47
48 ref MdOperation front() @property {
49 return _cached_front;
50 }
51
52 ref MdOperation back() @property {
53 return _cached_back;
54 }
55
56 void popFront() {
57 if (_md.empty) {
58 if (_rem == 255) {
59 _cached_front = _cached_back;
60 _rem = 1;
61 } else {
62 _rem = 0;
63 }
64 } else {
65 if (!cacheFront())
66 _rem = 0;
67 }
68 }
69
70 void popBack() {
71 if (_md.empty) {
72 if (_rem == 255) {
73 _cached_back = _cached_front;
74 _rem = 1;
75 } else {
76 _rem = 0;
77 }
78 } else {
79 if (!cacheBack())
80 _rem = 0;
81 }
82 }
83
84 private bool cacheFront() {
85 if (_md.empty)
86 return false;
87
88 if (_md[0] == '^') { // deletion, get bases
89 _md = _md[1 .. $];
90 auto len = countUntil!(not!isUpper)(_md);
91 if (len == -1) {
92 len = _md.length;
93 }
94 _cached_front = MdOperation.createDeletion(_md[0 .. len]);
95 _md = _md[len .. $];
96 } else if (isDigit(_md[0])) { // match, get number
97 auto len = countUntil!(not!isDigit)(_md);
98 if (len == -1) {
99 len = _md.length;
100 }
101 _cached_front = MdOperation.createMatch(to!uint(_md[0 .. len]));
102 _md = _md[len .. $];
103 } else { // mismatch
104 _cached_front = MdOperation.createMismatch(_md[0]);
105 _md = _md[1 .. $];
106 }
107
108 return true;
109 }
110
111 private bool cacheBack() {
112 if (_md.empty)
113 return false;
114
115 if (isDigit(_md[$ - 1])) { // match, get number
116 auto len = countUntil!(not!isDigit)(retro(_md));
117 if (len == -1) {
118 len = _md.length;
119 }
120 _cached_back = MdOperation.createMatch(to!uint(_md[$ - len .. $]));
121 _md = _md[0 .. $ - len];
122 } else {
123 if (_md.length == 1 || isDigit(_md[$ - 2])) { // mismatch
124 _cached_back = MdOperation.createMismatch(_md[$ - 1]);
125 _md = _md[0 .. $ - 1];
126 } else { // deletion
127 auto len = countUntil!"a == '^'"(retro(_md));
128 _cached_back = MdOperation.createDeletion(_md[$ - len .. $]);
129 _md = _md[0 .. $ - len - 1];
130 }
131 }
132
133 return true;
134 }
135 }
136
137 static bool isZeroMatch(MdOperation op) {
138 return op.type == MdOperationType.Match &&
139 op.match == 0;
140 }
141
142 return filterBidirectional!(not!isZeroMatch)(Result(md));
143 }
144
145 /// Alias for return type of mdOperations
146 alias ReturnType!mdOperations MdOperationRange;
147
148 unittest {
149
150 import std.algorithm;
151
152 import std.stdio;
153
154 assert(equal(mdOperations("86"),
155 [MdOperation.createMatch(86)]));
156
157 assert(equal(mdOperations("0G81"),
158 [MdOperation.createMismatch('G'),
159 MdOperation.createMatch(81)]));
160
161 assert(equal(mdOperations("62^T28"),
162 [MdOperation.createMatch(62),
163 MdOperation.createDeletion("T"),
164 MdOperation.createMatch(28)]));
165
166 assert(equal(retro(mdOperations("3C6C0A13^A4C2")),
167 retro([MdOperation.createMatch(3),
168 MdOperation.createMismatch('C'),
169 MdOperation.createMatch(6),
170 MdOperation.createMismatch('C'),
171 MdOperation.createMismatch('A'),
172 MdOperation.createMatch(13),
173 MdOperation.createDeletion("A"),
174 MdOperation.createMatch(4),
175 MdOperation.createMismatch('C'),
176 MdOperation.createMatch(2)])));
177
178 assert(equal(mdOperations("27^TTT63"),
179 [MdOperation.createMatch(27),
180 MdOperation.createDeletion("TTT"),
181 MdOperation.createMatch(63)]));
182 }