1 /*
2     This file is part of BioD.
3     Copyright (C) 2012-2016    Artem Tarasov <lomereiter@gmail.com>
4 
5     Permission is hereby granted, free of charge, to any person obtaining a
6     copy of this software and associated documentation files (the "Software"),
7     to deal in the Software without restriction, including without limitation
8     the rights to use, copy, modify, merge, publish, distribute, sublicense,
9     and/or sell copies of the Software, and to permit persons to whom the
10     Software is furnished to do so, subject to the following conditions:
11 
12     The above copyright notice and this permission notice shall be included in
13     all copies or substantial portions of the Software.
14 
15     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21     DEALINGS IN THE SOFTWARE.
22 
23 */
24 module bio.sam.header;
25 
26 import bio.bam.thirdparty.msgpack;
27 import bio.core.utils.format;
28 
29 import std.algorithm;
30 import std.conv;
31 import std.format;
32 import std.json;
33 import std.exception;
34 import std.array;
35 import std.range;
36 import std.traits;
37 import std.stdio : stderr;
38 
39 private {
40 
41     struct Field(string _name, string _abbr, T = string) {
42         enum name = _name;
43         enum abbr = _abbr;
44         alias T FieldType;
45     }
46 
47     mixin template structFields(T...) {
48         static if (!(T.length == 0)) {
49             mixin(T[0].FieldType.stringof ~ " " ~  T[0].name ~ ";");
50             mixin structFields!(T[1..$]);
51         }
52     }
53 
54     string makeSwitchStatements(F...)() {
55         /* certain assumptions about variable names are being made here,
56          * namely, 'record' and 'contents'
57          */
58         char[] result;
59         foreach (t; F) {
60           static if (t.FieldType.stringof == "string")
61             result ~= `case "`~t.abbr~`":`~
62                         `record.`~t.name~`=cast(string)(contents);`~
63                       `break;`.dup;
64           else
65             result ~= `case "`~t.abbr~`":`~
66                         `record.`~t.name~`=to!(`~t.FieldType.stringof~`)(contents);`~
67                       `break;`.dup;
68         }
69         result ~= `default: break;`.dup;
70         return cast(string)result;
71     }
72 
73     auto fields(string header_line) {
74         return splitter(header_line[3..$], '\t');
75     }
76 
77     /*
78         generates 'parse' method which parses given string and
79         fills corresponding struct fields
80      */
81     mixin template parseStaticMethod(string struct_name, Field...) {
82 
83         static auto parse(string line) {
84             mixin(struct_name ~ " record;");
85             foreach (field; fields(line)) {
86                 if (field.length < 3) {
87                     continue;
88                 }
89                 if (field[2] != ':') {
90                     continue;
91                 }
92                 string contents = field[3..$];
93                 switch (field[0..2]) {
94                     mixin(makeSwitchStatements!Field());
95                 }
96             }
97             return record;
98         }
99     }
100 
101     string serializeFields(Field...)() {
102         static if (Field.length > 0) {
103             char[] str = `if (`~Field[0].name~` != `~Field[0].FieldType.stringof~`.init) {`.dup;
104             str ~= `sink.write("\t` ~ Field[0].abbr ~ `:");`.dup;
105             str ~= `sink.write(`~Field[0].name~`);`.dup;
106             str ~= `}`.dup;
107             return str.idup ~ serializeFields!(Field[1..$])();
108         } else {
109             return "";
110         }
111     }
112 
113     /*
114         generates 'toSam' method which converts a struct
115         to SAM header line
116      */
117     mixin template toSamMethod(string line_prefix, Field...) {
118         void toSam(Sink)(auto ref Sink sink) const if (isSomeSink!Sink) {
119             sink.write(line_prefix);
120             mixin(serializeFields!Field());
121         }
122     }
123 
124     string generateHashExpression(Field...)() {
125         char[] res;
126         foreach (t; Field) {
127             res ~= "result = 31 * result + " ~
128                    "typeid(" ~ t.name ~ ").getHash(&" ~ t.name ~ ");".dup;
129         }
130         return res.idup;
131     }
132 
133     mixin template toHashMethod(string struct_name, string id_field, Field...) {
134         static if (id_field != null) {
135             hash_t toHash() const nothrow @safe{
136                 hash_t result = 1;
137                 mixin(generateHashExpression!Field());
138                 return result;
139             }
140 
141             mixin("int opCmp(const ref " ~ struct_name ~ " other) " ~
142                   "    const pure nothrow @safe" ~
143                   "{" ~
144                   "    return " ~ id_field ~ " < other." ~ id_field ~ " ? -1 : " ~
145                   "           " ~ id_field ~ " > other." ~ id_field ~ " ? 1 : 0;" ~
146                   "}");
147         }
148     }
149 
150     string opEqualsExpression(Field...)() {
151         char[] result = Field[0].name ~ " == other.".dup ~ Field[0].name;
152         foreach (t; Field[1..$]) {
153             result ~= " && " ~ t.name ~ " == other.".dup ~ t.name;
154         }
155         return result.idup;
156     }
157 
158     mixin template opEqualsMethod(string struct_name, Field...) {
159         mixin("bool opEquals(const ref " ~ struct_name ~ " other)" ~
160               "    pure const @safe nothrow" ~
161               "{" ~
162               "    return " ~ opEqualsExpression!Field() ~ ";" ~
163               "}");
164 
165         mixin("bool opEquals(" ~ struct_name ~ " other)" ~
166               "    pure const @safe nothrow" ~
167               "{" ~
168               "    return " ~ opEqualsExpression!Field() ~ ";" ~
169               "}");
170     }
171 
172     mixin template getSetIDMethods(string id_field) {
173         static if (id_field != null) {
174             auto getID() const pure nothrow @safe {
175                 mixin("return " ~ id_field ~";");
176             }
177 
178             mixin("void setID(typeof("~id_field~") id) pure nothrow @safe { " ~ id_field ~ " = id; }");
179         }
180     }
181 
182     string generateToMsgpackMethod(Field...)() {
183         char[] method = "packer.beginMap(" ~ to!string(Field.length) ~ ");".dup;
184         foreach (t; Field) {
185             method ~= "packer.pack(`" ~ t.abbr ~ "`);".dup;
186             method ~= "packer.pack(" ~ t.name ~ ");".dup;
187         }
188         return method.idup;
189     }
190 
191     mixin template toMsgpackMethod(Field...) {
192 
193         void toMsgpack(Packer)(ref Packer packer) const {
194             mixin(generateToMsgpackMethod!Field());
195         }
196     }
197 
198     mixin template HeaderLineStruct(string struct_name,
199                                     string line_prefix,
200                                     string id_field,
201                                     Field...)
202     {
203          mixin(`struct `~struct_name~`{
204                     mixin structFields!Field;
205                     mixin parseStaticMethod!(struct_name, Field);
206                     mixin toSamMethod!(line_prefix, Field);
207                     mixin toHashMethod!(struct_name, id_field, Field);
208                     mixin opEqualsMethod!(struct_name, Field);
209                     mixin getSetIDMethods!id_field;
210                     mixin toMsgpackMethod!Field;
211                 }`);
212     }
213 
214 }
215 
216 mixin HeaderLineStruct!("HdLine", "@HD", null,
217           Field!("format_version", "VN"),
218           Field!("sorting_order", "SO"));
219 
220 mixin HeaderLineStruct!("SqLine", "@SQ", "name",
221           Field!("name", "SN"),
222           Field!("length", "LN", uint),
223           Field!("assembly", "AS"),
224           Field!("md5", "M5"),
225           Field!("species", "SP"),
226           Field!("uri", "UR"),
227           Field!("alternate_haplotype", "AH"));
228 
229 mixin HeaderLineStruct!("RgLine", "@RG", "identifier",
230           Field!("identifier", "ID"),
231           Field!("sequencing_center", "CN"),
232           Field!("description", "DS"),
233           Field!("date", "DT"),
234           Field!("flow_order", "FO"),
235           Field!("key_sequence", "KS"),
236           Field!("library", "LB"),
237           Field!("programs", "PG"),
238           Field!("predicted_insert_size", "PI", int),
239           Field!("platform", "PL"),
240           Field!("platform_unit", "PU"),
241           Field!("sample", "SM"),
242           Field!("platform_model", "PM"));
243 
244 mixin HeaderLineStruct!("PgLine", "@PG", "identifier",
245           Field!("identifier", "ID"),
246           Field!("name", "PN"),
247           Field!("command_line", "CL"),
248           Field!("previous_program", "PP"),
249           Field!("program_version", "VN")); // version is a keyword in D
250 
251 unittest {
252     import std.algorithm;
253     import std.stdio;
254 
255     // stderr.writeln("Testing @HD line parsing...");
256     auto hd_line = HdLine.parse("@HD\tVN:1.0\tSO:coordinate");
257     assert(hd_line.format_version == "1.0");
258     assert(hd_line.sorting_order == "coordinate");
259 
260     // stderr.writeln("Testing @SQ line parsing...");
261     auto sq_line = SqLine.parse("@SQ\tSN:NC_007605\tLN:171823\tM5:6743bd63b3ff2b5b8985d8933c53290a\tUR:ftp://.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz\tAS:NCBI37\tSP:HUMAN");
262     assert(sq_line.name == "NC_007605");
263     assert(sq_line.length == 171823);
264     assert(sq_line.md5 == "6743bd63b3ff2b5b8985d8933c53290a");
265     assert(sq_line.uri.endsWith("hs37d5.fa.gz"));
266     assert(sq_line.assembly == "NCBI37");
267     assert(sq_line.species == "HUMAN");
268 
269     // stderr.writeln("Testing @RG line parsing...");
270     auto rg_line = RgLine.parse("@RG\tID:ERR016155\tLB:HUMgdtRAGDIAAPE\tSM:HG00125\tPI:488\tCN:BGI\tPL:ILLUMINA\tDS:SRP001294");
271     assert(rg_line.identifier == "ERR016155");
272     assert(rg_line.library == "HUMgdtRAGDIAAPE");
273     assert(rg_line.sample == "HG00125");
274     assert(rg_line.predicted_insert_size == 488);
275     assert(rg_line.sequencing_center == "BGI");
276     assert(rg_line.platform == "ILLUMINA");
277     assert(rg_line.description == "SRP001294");
278 
279     // stderr.writeln("Testing @PG line parsing...");
280     auto pg_line = PgLine.parse("@PG\tID:bam_calculate_bq\tPN:samtools\tPP:bam_recalibrate_quality_scores\tVN:0.1.17 (r973:277)\tCL:samtools calmd -Erb $bam_file $reference_fasta > $bq_bam_file");
281     assert(pg_line.identifier == "bam_calculate_bq");
282     assert(pg_line.name == "samtools");
283     assert(pg_line.previous_program == "bam_recalibrate_quality_scores");
284     assert(pg_line.program_version == "0.1.17 (r973:277)");
285     assert(pg_line.command_line.endsWith("$bq_bam_file"));
286 }
287 
288 // workaround for LDC bug #217
289 struct ValueRange(T) {
290     this(T[string] dict, string[] ids) {
291         _dict = dict;
292         _ids = ids;
293     }
294 
295     private {
296         T[string] _dict;
297         string[] _ids;
298     }
299 
300     ref T front() @property { return _dict[_ids[0]]; }
301     ref T back() @property { return _dict[_ids[$-1]]; }
302     bool empty() @property { return _ids.length == 0; }
303     void popFront() { _ids = _ids[1 .. $]; }
304     void popBack() { _ids = _ids[0 .. $ - 1]; }
305     ref T opIndex(size_t i) { return _dict[_ids[i]]; }
306     size_t length() @property { return _ids.length; }
307     ValueRange save() @property { return ValueRange(_dict, _ids[]); }
308 }
309 
310 /// Common class for storing header lines
311 class HeaderLineDictionary(T) {
312 
313     invariant() {
314         assert(_index_to_id.length == _dict.length);
315         assert(_id_to_index.length == _dict.length);
316         /*
317         foreach(id, index; _id_to_index) {
318             assert(_index_to_id[index] == id);
319         }
320         */
321     }
322 
323     ///
324     ref inout(T) opIndex(string id) inout {
325         return _dict[id];
326     }
327 
328     ///
329     void opIndexAssign(T line, string id) {
330         _dict[id] = line;
331     }
332 
333     ///
334     const(T)* opIn_r(string id) const {
335         return id in _dict;
336     }
337 
338     /// Append a line
339     bool add(T line) {
340         auto id = line.getID();
341         if (id !in _dict) {
342             _dict[id] = line;
343             _id_to_index[id] = _index_to_id.length;
344             _index_to_id ~= id;
345             return true;
346         }
347         return false;
348     }
349 
350     /// Remove a line with identifier $(D id).
351     bool remove(string id) {
352         if (id in _dict) {
353             auto old_len = _dict.length;
354 
355             for (auto j = _id_to_index[id]; j < old_len - 1; ++j) {
356                 _index_to_id[j] = _index_to_id[j + 1];
357                 _id_to_index[_index_to_id[j]] = j;
358             }
359 
360             _index_to_id.length = _index_to_id.length - 1;
361 
362             _dict.remove(id);
363             _id_to_index.remove(id);
364 
365             return true;
366         }
367 
368         return false;
369     }
370 
371     ///
372     int opApply(scope int delegate(ref T line) dg) {
373         foreach (size_t i; 0 .. _dict.length) {
374             auto res = dg(_dict[_index_to_id[i]]);
375             if (res != 0) {
376                 return res;
377             }
378         }
379         return 0;
380     }
381 
382     ///
383     int opApply(scope int delegate(T line) dg) const {
384         foreach (size_t i; 0 .. _dict.length) {
385             auto res = dg(_dict[_index_to_id[i]]);
386             if (res != 0) {
387                 return res;
388             }
389         }
390         return 0;
391     }
392 
393     ///
394     int opApply(scope int delegate(ref size_t index, ref T line) dg) {
395         foreach (size_t i; 0 .. _dict.length) {
396             auto res = dg(i, _dict[_index_to_id[i]]);
397             if (res != 0) {
398                 return res;
399             }
400         }
401         return 0;
402     }
403 
404     /// Clear the dictionary
405     void clear() {
406         _dict = null;
407         _id_to_index = null;
408         _index_to_id.length = 0;
409     }
410 
411     static assert(isRandomAccessRange!(ValueRange!T));
412 
413     /// Returns: range of lines
414     ValueRange!T values() @property {
415         return ValueRange!T(_dict, _index_to_id);
416     }
417 
418     /// Returns: number of stored lines
419     size_t length() @property const {
420         return _dict.length;
421     }
422 
423     protected {
424         T[string] _dict;
425         string[] _index_to_id;
426         size_t[string] _id_to_index;
427     }
428 }
429 
430 /// Dictionary of @SQ lines.
431 final class SqLineDictionary : HeaderLineDictionary!SqLine
432 {
433     ///
434     ref inout(SqLine) getSequence(size_t index) inout {
435         return _dict[_index_to_id[index]];
436     }
437 
438     ///
439     int getSequenceIndex(string sequence_name) {
440         size_t* ind = sequence_name in _id_to_index;
441         return (ind is null) ? -1 : cast(int)(*ind);
442     }
443 }
444 
445 /// Dictionary of @RG lines
446 alias HeaderLineDictionary!RgLine RgLineDictionary;
447 
448 /// Dictionary of @PG lines
449 alias HeaderLineDictionary!PgLine PgLineDictionary;
450 
451 /// Represents SAM header
452 class SamHeader {
453 
454     ///
455     enum DEFAULT_FORMAT_VERSION = "1.3";
456 
457     /// Construct empty SAM header
458     this() {
459         sequences = new SqLineDictionary();
460         read_groups = new RgLineDictionary();
461         programs = new PgLineDictionary();
462 
463         format_version = DEFAULT_FORMAT_VERSION;
464     }
465 
466     /// Parse SAM header given in plain text.
467     this(string header_text) {
468         read_groups = new RgLineDictionary();
469         programs = new PgLineDictionary();
470         format_version = DEFAULT_FORMAT_VERSION;
471 
472         import core.memory;
473         core.memory.GC.disable();
474 
475         bool parsed_first_line = false;
476         size_t n_sq_lines = 0;
477 
478         foreach (line; splitter(header_text, '\n')) {
479             if (line.length < 3) {
480                 continue;
481             }
482             if (!parsed_first_line && line[0..3] == "@HD") {
483                 auto header_line = HdLine.parse(line);
484                 if (header_line.sorting_order.length > 0) {
485                     try {
486                         sorting_order = to!SortingOrder(header_line.sorting_order);
487                     } catch (ConvException e) {
488                         sorting_order = SortingOrder.unknown;
489                         // FIXME: should we do that silently?
490                     }
491                 } else {
492                     sorting_order = SortingOrder.unknown;
493                 }
494                 format_version = header_line.format_version;
495             }
496             enforce(line[0] == '@', "Header lines must start with @");
497             switch (line[1]) {
498                 case 'S':
499                     enforce(line[2] == 'Q');
500                     ++n_sq_lines;
501                     break;
502                 case 'R':
503                     enforce(line[2] == 'G');
504                     auto rg_line = RgLine.parse(line);
505                     if (!read_groups.add(rg_line)) {
506                         stderr.writeln("duplicating @RG line ",  rg_line.identifier);
507                     }
508                     break;
509                 case 'P':
510                     enforce(line[2] == 'G');
511                     auto pg_line = PgLine.parse(line);
512                     if (!programs.add(pg_line)) {
513                         stderr.writeln("duplicating @PG line ", pg_line.identifier);
514                     }
515                     break;
516                 case 'H':
517                     enforce(line[2] == 'D');
518                     break;
519                 case 'C':
520                     enforce(line[2] == 'O');
521                     comments ~= line[4..$];
522                     break;
523                 default:
524                     assert(0);
525             }
526 
527             parsed_first_line = true;
528         }
529 
530         if (!parsed_first_line) {
531             format_version = DEFAULT_FORMAT_VERSION;
532         }
533 
534         _header_text = header_text;
535         if (n_sq_lines <= 1000000)
536             _parseSqLines(); // parse immediately for typical files
537 
538         core.memory.GC.enable();
539     }
540 
541     /// Format version
542     string format_version;
543 
544     /// Sorting order
545     SortingOrder sorting_order = SortingOrder.unknown;
546 
547     /// Dictionary of @SQ lines.
548     /// Removal is not allowed, you can only replace the whole dictionary.
549     SqLineDictionary sequences() @property {
550         if (_sequences is null)
551             _parseSqLines();
552         return _sequences;
553     }
554 
555     void sequences(SqLineDictionary dict) @property {
556         _sequences = dict;
557     }
558 
559     private SqLineDictionary _sequences;
560     private string _header_text;
561     private void _parseSqLines() {
562         import core.memory;
563         core.memory.GC.disable();
564 
565         _sequences = new SqLineDictionary();
566 
567         foreach (line; splitter(_header_text, '\n')) {
568             if (line.length < 3)
569                 continue;
570             if (line[0 .. 3] != "@SQ")
571                 continue;
572 
573             auto sq_line = SqLine.parse(line);
574             if (!_sequences.add(sq_line)) {
575                 stderr.writeln("duplicating @SQ line ",  sq_line.name);
576             }
577         }
578 
579         _header_text = null;
580         core.memory.GC.enable();
581     }
582 
583     /// Dictionary of @RG lines
584     RgLineDictionary read_groups;
585 
586     /// Dictionary of @PG lines
587     PgLineDictionary programs;
588 
589     /// Array of @CO lines
590     string[] comments;
591 
592     /// Zero-based index of sequence.
593     /// If such sequence does not exist in the header, returns -1.
594     int getSequenceIndex(string sequence_name) {
595         return sequences.getSequenceIndex(sequence_name);
596     }
597 
598     ///
599     SqLine getSequence(size_t index) {
600         return sequences.getSequence(index);
601     }
602 
603     /// Get header text representation in SAM format (includes trailing '\n')
604     string text() @property {
605         return to!string(this);
606     }
607 
608     /// Header text representation in SAM ("%s") or JSON format ("%j").
609     /// $(BR)
610     /// Includes trailing '\n'.
611     void toString(scope void delegate(const(char)[]) sink, FormatSpec!char fmt) {
612         if (fmt.spec == 's')
613             toSam(sink);
614         else if (fmt.spec == 'j')
615             toJson(sink);
616         else
617             throw new FormatException("unknown format specifier");
618     }
619 
620     void toSam(Sink)(auto ref Sink sink) if (isSomeSink!Sink) {
621         sink.write("@HD\tVN:");
622         sink.write(format_version);
623         if (sorting_order != SortingOrder.unknown) {
624             sink.write("\tSO:");
625             sink.write(to!string(sorting_order));
626         }
627         sink.write('\n');
628 
629         for (size_t i = 0; i < sequences.length; i++) {
630             auto sq_line = getSequence(i);
631             sq_line.toSam(sink);
632             sink.write('\n');
633         }
634 
635         foreach (rg_line; read_groups) {
636             rg_line.toSam(sink);
637             sink.write('\n');
638         }
639 
640         foreach (pg_line; programs) {
641             pg_line.toSam(sink);
642             sink.write('\n');
643         }
644 
645         foreach (comment; comments) {
646             sink.write("@CO\t");
647             sink.write(comment);
648             sink.write('\n');
649         }
650     }
651 
652     void toJson(Sink)(auto ref Sink sink) if (isSomeSink!Sink) {
653         JSONValue[string] result;
654 
655         result["format_version"] = format_version;
656 
657         if (sorting_order != SortingOrder.unknown) {
658           result["sorting_order"] = sorting_order.to!string;
659         }
660 
661         auto tmp = new JSONValue[sequences.length];
662         for (auto i = 0; i < sequences.length; i++) {
663           auto line = getSequence(i);
664           JSONValue[string] sq;
665           sq["sequence_name"] = line.name;
666           sq["sequence_length"] = line.length;
667           sq["assembly"] = line.assembly;
668           sq["md5"] = line.md5;
669           sq["species"] = line.species;
670           sq["uri"] = line.uri;
671           tmp[i].object = sq;
672         }
673         result["sq_lines"] = tmp.dup;
674         tmp = null;
675 
676         auto tmp2 = new JSONValue[read_groups.length];
677         foreach (i, line; read_groups) {
678           JSONValue[string] sq;
679           sq["identifier"] = line.identifier;
680           sq["sequencing_center"] = line.sequencing_center;
681           sq["description"] = line.description;
682           sq["date"] = line.date;
683           sq["flow_order"] = line.flow_order;
684           sq["key_sequence"] = line.key_sequence;
685           sq["library"] = line.library;
686           sq["programs"] = line.programs;
687           sq["predicted_insert_size"] = line.predicted_insert_size;
688           sq["platform"] = line.platform;
689           sq["platform_unit"] = line.platform_unit;
690           sq["sample"] = line.sample;
691           tmp2[i].object = sq;
692         }
693         result["rg_lines"] = tmp2;
694         tmp2 = null;
695 
696         auto tmp3 = new JSONValue[programs.length];
697         foreach (i, line; programs) {
698           JSONValue[string] sq;
699           sq["identifier"] = line.identifier;
700           sq["program_name"] = line.name;
701           sq["command_line"] = line.command_line;
702           sq["previous_program"] = line.previous_program;
703           sq["program_version"] = line.program_version;
704           tmp3[i].object = sq;
705         }
706         result["pg_lines"] = tmp3;
707 
708         JSONValue json;
709         json.object = result;
710         static if (__VERSION__ < 2072)
711             sink.write(toJSON(&json));
712         else
713           sink.write(toJSON(json,true));
714     }
715 
716     /// Packs message in the following format:
717     /// $(BR)
718     /// MsgPack array with elements
719     ///   $(OL
720     ///     $(LI format version - string)
721     ///     $(LI sorting order - string)
722     ///     $(LI @SQ lines - array of dictionaries)
723     ///     $(LI @RG lines - array of dictionaries)
724     ///     $(LI @PG lines - array of dictionaries))
725     /// $(BR)
726     /// Dictionary keys are the same as in SAM format.
727     void toMsgpack(Packer)(ref Packer packer) const {
728         enforce(_sequences !is null, "failed to call msgpack");
729         packer.beginArray(5);
730         packer.pack(format_version);
731         packer.pack(to!string(sorting_order));
732         packer.beginArray(_sequences.length);
733         foreach (sq; _sequences)
734             packer.pack(sq);
735         packer.beginArray(read_groups.length);
736         foreach (rg; read_groups)
737             packer.pack(rg);
738         packer.beginArray(programs.length);
739         foreach (pg; programs)
740             packer.pack(pg);
741     }
742 }
743 
744 /// Sorting order
745 enum SortingOrder {
746     unknown,    ///
747     unsorted,   ///
748     coordinate, ///
749     queryname   ///
750 }
751 
752 unittest {
753     auto header = new SamHeader();
754     import std.stdio;
755     assert(header.text == "@HD\tVN:1.3\n");
756 
757     auto sequence = SqLine("abc", 123123);
758     header.sequences.add(sequence);
759     assert(header.text == "@HD\tVN:1.3\n@SQ\tSN:abc\tLN:123123\n");
760 
761     header.sorting_order = SortingOrder.coordinate;
762     header.format_version = "1.2";
763     assert(header.text == "@HD\tVN:1.2\tSO:coordinate\n@SQ\tSN:abc\tLN:123123\n");
764     assert(header.getSequenceIndex("abc") == 0);
765     assert(header.getSequenceIndex("bcd") == -1);
766 
767     header.sequences.clear();
768     sequence = SqLine("bcd", 678);
769     sequence.uri = "http://lorem.ipsum";
770     header.sequences.add(sequence);
771     header.format_version = "1.4";
772     assert(header.text == "@HD\tVN:1.4\tSO:coordinate\n@SQ\tSN:bcd\tLN:678\tUR:http://lorem.ipsum\n");
773 
774     header.sequences.add(SqLine("def", 321));
775     assert(header.getSequenceIndex("abc") == -1);
776     assert(header.getSequenceIndex("bcd") == 0);
777     assert(header.getSequenceIndex("def") == 1);
778 
779     header.sequences.remove("bcd");
780     assert(header.getSequenceIndex("abc") == -1);
781     assert(header.getSequenceIndex("bcd") == -1);
782     assert(header.getSequenceIndex("def") == 0);
783 
784     assert(header.text == "@HD\tVN:1.4\tSO:coordinate\n@SQ\tSN:def\tLN:321\n");
785 
786     auto dict = new SqLineDictionary();
787     dict.add(SqLine("yay", 111));
788     dict.add(SqLine("zzz", 222));
789 
790     auto zzz = dict["zzz"];     // TODO: make 'dict["zzz"].uri = ...' work
791     zzz.uri = "ftp://nyan.cat";
792     dict["zzz"] = zzz;
793     header.sequences = dict;
794 
795     assert(header.text ==
796       "@HD\tVN:1.4\tSO:coordinate\n@SQ\tSN:yay\tLN:111\n@SQ\tSN:zzz\tLN:222\tUR:ftp://nyan.cat\n");
797     assert(header.sequences == dict);
798 
799     header.sequences.remove("yay");
800     header.sequences.remove("zzz");
801     header.comments ~= "this is a comment";
802 
803     assert(header.text == "@HD\tVN:1.4\tSO:coordinate\n@CO\tthis is a comment\n");
804 
805     JSONValue[string] result;
806     result["format_version"] = "1.2";
807     assert(to!string(result) == "[\"format_version\":\"1.2\"]", to!string(result));
808 }