1 
2 #line 1 "region.rl"
3 /*
4     This file is part of BioD.
5     Copyright (C) 2012    Artem Tarasov <lomereiter@gmail.com>
6 
7     Permission is hereby granted, free of charge, to any person obtaining a
8     copy of this software and associated documentation files (the "Software"),
9     to deal in the Software without restriction, including without limitation
10     the rights to use, copy, modify, merge, publish, distribute, sublicense,
11     and/or sell copies of the Software, and to permit persons to whom the
12     Software is furnished to do so, subject to the following conditions:
13     
14     The above copyright notice and this permission notice shall be included in
15     all copies or substantial portions of the Software.
16     
17     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23     DEALINGS IN THE SOFTWARE.
24 
25 */
26 module bio.core.region;
27 
28 
29 #line 30 "region.d"
30 static byte[] _region_parser_actions = [
31 	0, 1, 1, 1, 2, 1, 3, 1, 
32 	4, 2, 0, 1
33 ];
34 
35 static byte[] _region_parser_key_offsets = [
36 	0, 0, 6, 9, 12, 19, 21, 25
37 ];
38 
39 static char[] _region_parser_trans_keys = [
40 	33u, 41u, 43u, 60u, 62u, 126u, 44u, 48u, 
41 	57u, 58u, 33u, 126u, 44u, 33u, 47u, 48u, 
42 	57u, 58u, 126u, 33u, 126u, 44u, 45u, 48u, 
43 	57u, 44u, 48u, 57u, 0
44 ];
45 
46 static byte[] _region_parser_single_lengths = [
47 	0, 0, 1, 1, 1, 0, 2, 1
48 ];
49 
50 static byte[] _region_parser_range_lengths = [
51 	0, 3, 1, 1, 3, 1, 1, 1
52 ];
53 
54 static byte[] _region_parser_index_offsets = [
55 	0, 0, 4, 7, 10, 15, 17, 21
56 ];
57 
58 static byte[] _region_parser_indicies = [
59 	0, 0, 0, 1, 2, 2, 1, 3, 
60 	0, 1, 5, 4, 5, 4, 1, 4, 
61 	1, 6, 7, 6, 1, 8, 8, 1, 
62 	0
63 ];
64 
65 static byte[] _region_parser_trans_targs = [
66 	3, 0, 7, 4, 5, 6, 6, 2, 
67 	7
68 ];
69 
70 static byte[] _region_parser_trans_actions = [
71 	0, 0, 9, 3, 0, 9, 1, 5, 
72 	1
73 ];
74 
75 static byte[] _region_parser_eof_actions = [
76 	0, 0, 0, 3, 3, 3, 5, 7
77 ];
78 
79 static int region_parser_start = 1;
80 static int region_parser_first_final = 3;
81 static int region_parser_error = 0;
82 
83 static int region_parser_en_region = 1;
84 
85 
86 #line 44 "region.rl"
87 
88 
89 import std.conv;
90 
91 struct Region {
92     string reference;
93     uint beg;
94     uint end;
95 }
96 
97 Region parseRegion(string str) {
98     char* p = cast(char*)str.ptr;
99     char* pe = p + str.length;
100     char* eof = pe;
101     int cs;
102     long uint_value;
103 
104     Region region;
105     region.beg = 0;
106     region.end = uint.max;
107 
108     
109 #line 110 "region.d"
110 	{
111 	cs = region_parser_start;
112 	}
113 
114 #line 66 "region.rl"
115     
116 #line 117 "region.d"
117 	{
118 	int _klen;
119 	uint _trans;
120 	byte* _acts;
121 	uint _nacts;
122 	char* _keys;
123 
124 	if ( p == pe )
125 		goto _test_eof;
126 	if ( cs == 0 )
127 		goto _out;
128 _resume:
129 	_keys = &_region_parser_trans_keys[_region_parser_key_offsets[cs]];
130 	_trans = _region_parser_index_offsets[cs];
131 
132 	_klen = _region_parser_single_lengths[cs];
133 	if ( _klen > 0 ) {
134 		char* _lower = _keys;
135 		char* _mid;
136 		char* _upper = _keys + _klen - 1;
137 		while (1) {
138 			if ( _upper < _lower )
139 				break;
140 
141 			_mid = _lower + ((_upper-_lower) >> 1);
142 			if ( (*p) < *_mid )
143 				_upper = _mid - 1;
144 			else if ( (*p) > *_mid )
145 				_lower = _mid + 1;
146 			else {
147 				_trans += cast(uint)(_mid - _keys);
148 				goto _match;
149 			}
150 		}
151 		_keys += _klen;
152 		_trans += _klen;
153 	}
154 
155 	_klen = _region_parser_range_lengths[cs];
156 	if ( _klen > 0 ) {
157 		char* _lower = _keys;
158 		char* _mid;
159 		char* _upper = _keys + (_klen<<1) - 2;
160 		while (1) {
161 			if ( _upper < _lower )
162 				break;
163 
164 			_mid = _lower + (((_upper-_lower) >> 1) & ~1);
165 			if ( (*p) < _mid[0] )
166 				_upper = _mid - 2;
167 			else if ( (*p) > _mid[1] )
168 				_lower = _mid + 2;
169 			else {
170 				_trans += cast(uint)((_mid - _keys)>>1);
171 				goto _match;
172 			}
173 		}
174 		_trans += _klen;
175 	}
176 
177 _match:
178 	_trans = _region_parser_indicies[_trans];
179 	cs = _region_parser_trans_targs[_trans];
180 
181 	if ( _region_parser_trans_actions[_trans] == 0 )
182 		goto _again;
183 
184 	_acts = &_region_parser_actions[_region_parser_trans_actions[_trans]];
185 	_nacts = cast(uint) *_acts++;
186 	while ( _nacts-- > 0 )
187 	{
188 		switch ( *_acts++ )
189 		{
190 	case 0:
191 #line 29 "region.rl"
192 	{ uint_value = 0; }
193 	break;
194 	case 1:
195 #line 30 "region.rl"
196 	{ if ((*p) != ',') uint_value *= 10, uint_value += (*p) - '0'; }
197 	break;
198 	case 2:
199 #line 33 "region.rl"
200 	{ region.reference = str[0 .. p - str.ptr]; }
201 	break;
202 	case 3:
203 #line 34 "region.rl"
204 	{ region.beg = to!uint(uint_value - 1); }
205 	break;
206 #line 207 "region.d"
207 		default: break;
208 		}
209 	}
210 
211 _again:
212 	if ( cs == 0 )
213 		goto _out;
214 	if ( ++p != pe )
215 		goto _resume;
216 	_test_eof: {}
217 	if ( p == eof )
218 	{
219 	byte* __acts = &_region_parser_actions[_region_parser_eof_actions[cs]];
220 	uint __nacts = cast(uint) *__acts++;
221 	while ( __nacts-- > 0 ) {
222 		switch ( *__acts++ ) {
223 	case 2:
224 #line 33 "region.rl"
225 	{ region.reference = str[0 .. p - str.ptr]; }
226 	break;
227 	case 3:
228 #line 34 "region.rl"
229 	{ region.beg = to!uint(uint_value - 1); }
230 	break;
231 	case 4:
232 #line 35 "region.rl"
233 	{ region.end = to!uint(uint_value); }
234 	break;
235 #line 236 "region.d"
236 		default: break;
237 		}
238 	}
239 	}
240 
241 	_out: {}
242 	}
243 
244 #line 67 "region.rl"
245 
246     return region;
247 }
248 
249 unittest {
250     auto region1 = parseRegion("chr1:1,000-2000");
251     assert(region1.reference == "chr1");
252     assert(region1.beg == 999);
253     assert(region1.end == 2000);
254 
255     auto region2 = parseRegion("chr2");
256     assert(region2.reference == "chr2");
257     assert(region2.beg == 0);
258     assert(region2.end == uint.max);
259 
260     auto region3 = parseRegion("chr3:1,000,000");
261     assert(region3.reference == "chr3");
262     assert(region3.beg == 999_999);
263     assert(region3.end == uint.max);
264 }