1 2 #line 1 "region.rl" 3 /* 4 This file is part of BioD. 5 Copyright (C) 2012 Artem Tarasov <lomereiter@gmail.com> 6 7 Permission is hereby granted, free of charge, to any person obtaining a 8 copy of this software and associated documentation files (the "Software"), 9 to deal in the Software without restriction, including without limitation 10 the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 and/or sell copies of the Software, and to permit persons to whom the 12 Software is furnished to do so, subject to the following conditions: 13 14 The above copyright notice and this permission notice shall be included in 15 all copies or substantial portions of the Software. 16 17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 DEALINGS IN THE SOFTWARE. 24 25 */ 26 module bio.core.region; 27 28 29 #line 30 "region.d" 30 static byte[] _region_parser_actions = [ 31 0, 1, 1, 1, 2, 1, 3, 1, 32 4, 2, 0, 1 33 ]; 34 35 static byte[] _region_parser_key_offsets = [ 36 0, 0, 6, 9, 12, 19, 21, 25 37 ]; 38 39 static char[] _region_parser_trans_keys = [ 40 33u, 41u, 43u, 60u, 62u, 126u, 44u, 48u, 41 57u, 58u, 33u, 126u, 44u, 33u, 47u, 48u, 42 57u, 58u, 126u, 33u, 126u, 44u, 45u, 48u, 43 57u, 44u, 48u, 57u, 0 44 ]; 45 46 static byte[] _region_parser_single_lengths = [ 47 0, 0, 1, 1, 1, 0, 2, 1 48 ]; 49 50 static byte[] _region_parser_range_lengths = [ 51 0, 3, 1, 1, 3, 1, 1, 1 52 ]; 53 54 static byte[] _region_parser_index_offsets = [ 55 0, 0, 4, 7, 10, 15, 17, 21 56 ]; 57 58 static byte[] _region_parser_indicies = [ 59 0, 0, 0, 1, 2, 2, 1, 3, 60 0, 1, 5, 4, 5, 4, 1, 4, 61 1, 6, 7, 6, 1, 8, 8, 1, 62 0 63 ]; 64 65 static byte[] _region_parser_trans_targs = [ 66 3, 0, 7, 4, 5, 6, 6, 2, 67 7 68 ]; 69 70 static byte[] _region_parser_trans_actions = [ 71 0, 0, 9, 3, 0, 9, 1, 5, 72 1 73 ]; 74 75 static byte[] _region_parser_eof_actions = [ 76 0, 0, 0, 3, 3, 3, 5, 7 77 ]; 78 79 static int region_parser_start = 1; 80 static int region_parser_first_final = 3; 81 static int region_parser_error = 0; 82 83 static int region_parser_en_region = 1; 84 85 86 #line 44 "region.rl" 87 88 89 import std.conv; 90 91 struct Region { 92 string reference; 93 uint beg; 94 uint end; 95 } 96 97 Region parseRegion(string str) { 98 char* p = cast(char*)str.ptr; 99 char* pe = p + str.length; 100 char* eof = pe; 101 int cs; 102 long uint_value; 103 104 Region region; 105 region.beg = 0; 106 region.end = uint.max; 107 108 109 #line 110 "region.d" 110 { 111 cs = region_parser_start; 112 } 113 114 #line 66 "region.rl" 115 116 #line 117 "region.d" 117 { 118 int _klen; 119 uint _trans; 120 byte* _acts; 121 uint _nacts; 122 char* _keys; 123 124 if ( p == pe ) 125 goto _test_eof; 126 if ( cs == 0 ) 127 goto _out; 128 _resume: 129 _keys = &_region_parser_trans_keys[_region_parser_key_offsets[cs]]; 130 _trans = _region_parser_index_offsets[cs]; 131 132 _klen = _region_parser_single_lengths[cs]; 133 if ( _klen > 0 ) { 134 char* _lower = _keys; 135 char* _mid; 136 char* _upper = _keys + _klen - 1; 137 while (1) { 138 if ( _upper < _lower ) 139 break; 140 141 _mid = _lower + ((_upper-_lower) >> 1); 142 if ( (*p) < *_mid ) 143 _upper = _mid - 1; 144 else if ( (*p) > *_mid ) 145 _lower = _mid + 1; 146 else { 147 _trans += cast(uint)(_mid - _keys); 148 goto _match; 149 } 150 } 151 _keys += _klen; 152 _trans += _klen; 153 } 154 155 _klen = _region_parser_range_lengths[cs]; 156 if ( _klen > 0 ) { 157 char* _lower = _keys; 158 char* _mid; 159 char* _upper = _keys + (_klen<<1) - 2; 160 while (1) { 161 if ( _upper < _lower ) 162 break; 163 164 _mid = _lower + (((_upper-_lower) >> 1) & ~1); 165 if ( (*p) < _mid[0] ) 166 _upper = _mid - 2; 167 else if ( (*p) > _mid[1] ) 168 _lower = _mid + 2; 169 else { 170 _trans += cast(uint)((_mid - _keys)>>1); 171 goto _match; 172 } 173 } 174 _trans += _klen; 175 } 176 177 _match: 178 _trans = _region_parser_indicies[_trans]; 179 cs = _region_parser_trans_targs[_trans]; 180 181 if ( _region_parser_trans_actions[_trans] == 0 ) 182 goto _again; 183 184 _acts = &_region_parser_actions[_region_parser_trans_actions[_trans]]; 185 _nacts = cast(uint) *_acts++; 186 while ( _nacts-- > 0 ) 187 { 188 switch ( *_acts++ ) 189 { 190 case 0: 191 #line 29 "region.rl" 192 { uint_value = 0; } 193 break; 194 case 1: 195 #line 30 "region.rl" 196 { if ((*p) != ',') uint_value *= 10, uint_value += (*p) - '0'; } 197 break; 198 case 2: 199 #line 33 "region.rl" 200 { region.reference = str[0 .. p - str.ptr]; } 201 break; 202 case 3: 203 #line 34 "region.rl" 204 { region.beg = to!uint(uint_value - 1); } 205 break; 206 #line 207 "region.d" 207 default: break; 208 } 209 } 210 211 _again: 212 if ( cs == 0 ) 213 goto _out; 214 if ( ++p != pe ) 215 goto _resume; 216 _test_eof: {} 217 if ( p == eof ) 218 { 219 byte* __acts = &_region_parser_actions[_region_parser_eof_actions[cs]]; 220 uint __nacts = cast(uint) *__acts++; 221 while ( __nacts-- > 0 ) { 222 switch ( *__acts++ ) { 223 case 2: 224 #line 33 "region.rl" 225 { region.reference = str[0 .. p - str.ptr]; } 226 break; 227 case 3: 228 #line 34 "region.rl" 229 { region.beg = to!uint(uint_value - 1); } 230 break; 231 case 4: 232 #line 35 "region.rl" 233 { region.end = to!uint(uint_value); } 234 break; 235 #line 236 "region.d" 236 default: break; 237 } 238 } 239 } 240 241 _out: {} 242 } 243 244 #line 67 "region.rl" 245 246 return region; 247 } 248 249 unittest { 250 auto region1 = parseRegion("chr1:1,000-2000"); 251 assert(region1.reference == "chr1"); 252 assert(region1.beg == 999); 253 assert(region1.end == 2000); 254 255 auto region2 = parseRegion("chr2"); 256 assert(region2.reference == "chr2"); 257 assert(region2.beg == 0); 258 assert(region2.end == uint.max); 259 260 auto region3 = parseRegion("chr3:1,000,000"); 261 assert(region3.reference == "chr3"); 262 assert(region3.beg == 999_999); 263 assert(region3.end == uint.max); 264 }