1 /*
2     This file is part of BioD.
3     Copyright (C) 2012-2016    Artem Tarasov <lomereiter@gmail.com>
4 
5     Permission is hereby granted, free of charge, to any person obtaining a
6     copy of this software and associated documentation files (the "Software"),
7     to deal in the Software without restriction, including without limitation
8     the rights to use, copy, modify, merge, publish, distribute, sublicense,
9     and/or sell copies of the Software, and to permit persons to whom the
10     Software is furnished to do so, subject to the following conditions:
11 
12     The above copyright notice and this permission notice shall be included in
13     all copies or substantial portions of the Software.
14 
15     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21     DEALINGS IN THE SOFTWARE.
22 
23 */
24 module bio.bam.readrange;
25 
26 import bio.bam.read;
27 import bio.bam.abstractreader;
28 import bio.bam.reader;
29 import bio.core.bgzf.inputstream;
30 import bio.core.bgzf.virtualoffset;
31 
32 import undead.stream;
33 import std.algorithm;
34 import std.system;
35 import std.bitmanip;
36 
37 /// Read + its start/end virtual offsets
38 struct BamReadBlock {
39     VirtualOffset start_virtual_offset; ///
40     VirtualOffset end_virtual_offset; ///
41     BamRead read; ///
42     alias read this; ///
43 
44     ///
45     BamReadBlock dup() @property const {
46         return BamReadBlock(start_virtual_offset, end_virtual_offset, read.dup);
47     }
48 }
49 
50 ///
51 mixin template withOffsets() {
52     /**
53         Returns: virtual offsets of beginning and end of the current read
54                  plus the current read itself.
55      */
56     BamReadBlock front() @property {
57         return BamReadBlock(_start_voffset,
58                             _stream.virtualTell(),
59                             _current_record);
60     }
61 
62     private VirtualOffset _start_voffset;
63 
64     private void beforeNextBamReadLoad() {
65         _start_voffset = _stream.virtualTell();
66     }
67 }
68 
69 ///
70 mixin template withoutOffsets() {
71     /**
72         Returns: current read
73      */
74     ref BamRead front() @property {
75         return _current_record;
76     }
77 
78     private void beforeNextBamReadLoad() {}
79 }
80 
81 /// $(D front) return type is determined by $(I IteratePolicy)
82 struct BamReadRange(alias IteratePolicy)
83 {
84     /// Create new range from BgzfInputStream.
85     this(BgzfInputStream stream, BamReader reader=null) {
86         _stream = stream;
87         _reader = reader;
88         _endian_stream = new EndianStream(_stream, Endian.littleEndian);
89         readNext();
90     }
91 
92     ///
93     bool empty() @property const {
94         return _empty;
95     }
96 
97     mixin IteratePolicy;
98 
99     ///
100     void popFront() {
101         readNext();
102     }
103 
104 private:
105     BgzfInputStream _stream;
106     EndianStream _endian_stream;
107 
108     BamReader _reader;
109 
110     BamRead _current_record;
111     bool _empty = false;
112 
113     ubyte[] _buffer;
114 
115     /*
116       Reads next bamRead block from stream.
117      */
118     void readNext() {
119 
120         // In fact, on BAM files containing a special EOF BGZF block
121         // this condition will be always false!
122         //
123         // The reason is that we don't want to unpack next block just
124         // in order to see if it's an EOF one or not.
125         if (_stream.eof()) {
126             _empty = true;
127             return;
128         }
129 
130         // In order to get the right virtual offset, we need to do it here.
131         version(extraVerbose) {
132             // import std.stdio; stderr.writeln("record v.o. = ", _stream.virtualTell());
133         }
134         beforeNextBamReadLoad();
135 
136         // Here's where _empty is really set!
137         ubyte[int.sizeof] tmp = void;
138         auto _read = 0;
139         while (_read < int.sizeof) {
140             auto _actually_read = _endian_stream.readBlock(tmp.ptr + _read, int.sizeof - _read);
141             if (_actually_read == 0) {
142                 version(development) {
143                     import std.stdio;
144                     stderr.writeln("[info][bamRead range] empty, read ", _read, " bytes, expected ", int.sizeof);
145                 }
146                 _empty = true;
147                 return;
148             }
149             _read += _actually_read;
150         }
151 
152         int block_size = littleEndianToNative!int(tmp);
153 
154         version(extraVerbose) {
155             import std.stdio;
156             stderr.writeln("[uncompressed] record size: ", block_size);
157         }
158 
159         ubyte[] data = void;
160         if (_reader !is null && _reader._seqprocmode) {
161             if (block_size > _buffer.length)
162                 _buffer.length = block_size;
163 
164             data = _buffer[0 .. block_size];
165         } else {
166             data = allocate(block_size);
167         }
168 
169         _stream.readExact(data.ptr, block_size);
170 
171         _current_record = BamRead(data);
172         _current_record.associateWithReader(_reader);
173     }
174 
175     private {
176         ubyte[] allocate(size_t size) {
177             if (_alloc_buffer_used + size > _alloc_buffer.length) {
178                 _alloc_buffer = uninitializedArray!(ubyte[])(max(size, 65536));
179                 _alloc_buffer_used = 0;
180             }
181             auto result = _alloc_buffer[_alloc_buffer_used .. $][0 .. size];
182             _alloc_buffer_used += size;
183             return result;
184         }
185         ubyte[] _alloc_buffer;
186         size_t _alloc_buffer_used;
187     }
188 }
189 
190 /// Returns: lazy range of BamRead/BamReadBlock structs constructed from a given stream.
191 auto bamReadRange(alias IteratePolicy=withoutOffsets)(BgzfInputStream stream, BamReader reader) {
192     return BamReadRange!IteratePolicy(stream, reader);
193 }