1 module bio.std.sff.readrange; 2 3 import bio.std.sff.read; 4 import bio.std.sff.index; 5 import bio.core.utils.switchendianness; 6 7 import std.algorithm; 8 import contrib.undead.stream; 9 import std.system; 10 import std.array; 11 12 private { 13 14 // GC used in D is quite bad at allocating lots of objects in a tight loop. 15 // The following is a simple way to reduce the number of allocations. 16 17 ubyte[] current_chunk; 18 size_t used; 19 20 size_t chunk_size = 65_536; 21 22 static this() { 23 current_chunk = uninitializedArray!(ubyte[])(chunk_size); 24 used = 0; 25 } 26 27 T[] allocateArray(T : T[])(size_t size) { 28 size_t new_used = used + size * T.sizeof; 29 if (new_used > chunk_size) { 30 new_used = size * T.sizeof; 31 if (new_used > chunk_size) 32 chunk_size = new_used; 33 34 current_chunk = uninitializedArray!(ubyte[])(chunk_size); 35 used = new_used; 36 return cast(T[])current_chunk[0 .. used]; 37 } else { 38 auto old_used = used; 39 used = new_used; 40 return cast(T[])current_chunk[old_used .. used]; 41 } 42 } 43 } 44 45 struct SffReadRange { 46 this(Stream stream, 47 ushort number_of_flows_per_read, 48 IndexLocation index_location) 49 { 50 _stream = stream; 51 _n_flows = number_of_flows_per_read; 52 _index_loc = index_location; 53 54 _fetchNextRead(); 55 } 56 57 private { 58 Stream _stream; 59 ushort _n_flows; 60 IndexLocation _index_loc; 61 62 bool _empty; 63 SffRead _read; 64 65 void _fetchNextRead() { 66 if (_stream.position == _index_loc.offset) 67 _stream.seekCur(_index_loc.length); 68 69 if (_stream.eof) { 70 _empty = true; 71 } else { 72 _read.file_offset = _stream.position; 73 // determine how many bytes to read 74 ushort read_header_length = void; 75 ushort name_length = void; 76 uint number_of_bases = void; 77 78 _stream.read(read_header_length); 79 _stream.read(name_length); 80 _stream.read(number_of_bases); 81 _stream.read(_read.clip_qual_left); 82 _stream.read(_read.clip_qual_right); 83 _stream.read(_read.clip_adapter_left); 84 _stream.read(_read.clip_adapter_right); 85 86 char[] name = allocateArray!(char[])(name_length); 87 _stream.readExact(name.ptr, name_length); 88 _stream.seekCur(read_header_length - 16 - name_length); 89 _read.name = cast(string)name; 90 91 size_t _data_length = _n_flows * ushort.sizeof + 3 * number_of_bases; 92 93 _read.flowgram_values = allocateArray!(ushort[])(_n_flows); 94 _stream.readExact(_read.flowgram_values.ptr, _n_flows * ushort.sizeof); 95 96 if (std.system.endian != Endian.bigEndian) { 97 for (size_t i = 0; i < _n_flows; ++i) { 98 switchEndianness(_read.flowgram_values.ptr + i, ushort.sizeof); 99 } 100 } 101 102 _read.flow_index_per_base = allocateArray!(ubyte[])(number_of_bases); 103 _stream.readExact(_read.flow_index_per_base.ptr, number_of_bases); 104 105 _read.bases = allocateArray!(char[])(number_of_bases); 106 _stream.readExact(_read.bases.ptr, number_of_bases); 107 108 _read.quality_scores = allocateArray!(ubyte[])(number_of_bases); 109 _stream.readExact(_read.quality_scores.ptr, number_of_bases); 110 111 if (_data_length % 8 > 0) 112 _stream.seekCur(8 - (_data_length % 8)); 113 } 114 } 115 } 116 117 bool empty() @property const { 118 return _empty; 119 } 120 121 SffRead front() @property { 122 return _read; 123 } 124 125 void popFront() { 126 _fetchNextRead(); 127 } 128 }