1 /*
2     New style BAM reader. This file is part of Sambamba.
3     Copyright (C) 2017,2018 Pjotr Prins <pjotr.prins@thebird.nl>
4 
5     Sambamba is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published
7     by the Free Software Foundation; either version 2 of the License,
8     or (at your option) any later version.
9 
10     Sambamba is distributed in the hope that it will be useful, but
11     WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13     General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with this program; if not, write to the Free Software
17     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18     02111-1307 USA
19 
20 */
21 
22 // This is a complete rewrite of Artem Tarasov's original reader.
23 
24 module bio.std.experimental.hts.bam.header;
25 
26 /*
27 import std.conv;
28 import core.stdc.stdio: fopen, fread, fclose;
29 import std.typecons;
30 import std.bitmanip;
31 
32 import bio.bam.cigar;
33 */
34 
35 import std.exception;
36 import std.file;
37 import std.stdio;
38 import std..string;
39 
40 // why import this from old bio.bam
41 // TODO check it depends on undead. 
42 import bio.std.hts.bam.constants;
43 
44 import bio.std.experimental.hts.bgzf;
45 import bio.std.experimental.hts.bgzf_writer;
46 
47 // what is the difference btw these constants and the ones from bio.std.hts.bam.constants
48 import bio.std.experimental.hts.constants; 
49 
50 struct RefSequence {
51   size_d length;
52   string name;
53 }
54 
55 struct BamHeader {
56   string id;
57   string text;
58   RefSequence[] refs;
59 
60   @disable this(this); // disable copy semantics;
61 }
62 
63 void fetch_bam_header(ref BamHeader header, ref BgzfStream stream) {
64   // stderr.writeln("Fetching BAM header");
65   ubyte[4] ubyte4;
66   stream.read(ubyte4);
67   enforce(ubyte4 == BAM_MAGIC,"Invalid file format: expected BAM magic number");
68   immutable text_size = stream.read!int();
69   // stderr.writeln("Text size ",text_size.sizeof," ",text_size);
70   immutable text = stream.read!string(text_size);
71   header = BamHeader(BAM_MAGIC,text);
72   immutable n_refs = stream.read!int();
73   // stderr.writeln("Fetching ",n_refs," references");
74   foreach(int n_ref; 0..n_refs) {
75     immutable l_name = stream.read!int();
76     // stderr.writeln("!!",l_name);
77     auto ref_name = stream.read!string(l_name);
78     immutable l_ref = stream.read!int(); // length of reference sequence (bps)
79     // stderr.writeln(l_name," ",ref_name," ",l_ref);
80     header.refs ~= RefSequence(l_ref,ref_name[0..l_name-1]); // drop zero terminator
81   }
82 }
83 
84 void write_bam_header(ref BgzfWriter bw, ref BamHeader header) {
85   // stderr.writeln("Writing BAM header");
86   ubyte[4] magic = cast(ubyte[])BAM_MAGIC;
87   bw.write(magic);
88   // stderr.writeln("Text size ",int.sizeof," ",header.text.length);
89   bw.write!int(cast(int)header.text.length);
90   bw.write(header.text);
91   auto n_refs = cast(int)header.refs.length;
92   bw.write!int(cast(int)header.refs.length);
93   // stderr.writeln("Writing ",n_refs," references");
94   foreach(int n_ref; 0..n_refs) {
95     immutable refseq = header.refs[n_ref];
96     bw.write!int(cast(int)(refseq.name.length+1));  // incl. zero terminator
97     // stderr.writeln("!!",refseq.name.length+1);
98     bw.write(refseq.name);
99     bw.write!ubyte(cast(ubyte)'\0');
100     bw.write!int(cast(int)refseq.length);
101     // stderr.writeln(refseq.name.length+1," ",refseq.name," ",refseq.length);
102   }
103   // stderr.writeln("!!");
104   bw.flush_block();
105 }