1 /*
2     This file is part of BioD.
3     Copyright (C) 2012-2016    Artem Tarasov <lomereiter@gmail.com>
4 
5     Permission is hereby granted, free of charge, to any person obtaining a
6     copy of this software and associated documentation files (the "Software"),
7     to deal in the Software without restriction, including without limitation
8     the rights to use, copy, modify, merge, publish, distribute, sublicense,
9     and/or sell copies of the Software, and to permit persons to whom the
10     Software is furnished to do so, subject to the following conditions:
11 
12     The above copyright notice and this permission notice shall be included in
13     all copies or substantial portions of the Software.
14 
15     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21     DEALINGS IN THE SOFTWARE.
22 
23 */
24 module bio.sam.header;
25 
26 import bio.bam.thirdparty.msgpack;
27 import bio.core.utils.format;
28 
29 import std.algorithm;
30 import std.conv;
31 import std.format;
32 import std.json;
33 import std.exception;
34 import std.array;
35 import std.range;
36 import std.traits;
37 import std.stdio : stderr;
38 
39 private {
40 
41     struct Field(string _name, string _abbr, T = string) {
42         enum name = _name;
43         enum abbr = _abbr;
44         alias T FieldType;
45     }
46 
47     mixin template structFields(T...) {
48         static if (!(T.length == 0)) {
49             mixin(T[0].FieldType.stringof ~ " " ~  T[0].name ~ ";");
50             mixin structFields!(T[1..$]);
51         }
52     }
53 
54     string makeSwitchStatements(F...)() {
55         /* certain assumptions about variable names are being made here,
56          * namely, 'record' and 'contents'
57          */
58         char[] result;
59         foreach (t; F) {
60           static if (t.FieldType.stringof == "string")
61             result ~= `case "`~t.abbr~`":`~
62                         `record.`~t.name~`=cast(string)(contents);`~
63                       `break;`.dup;
64           else
65             result ~= `case "`~t.abbr~`":`~
66                         `record.`~t.name~`=to!(`~t.FieldType.stringof~`)(contents);`~
67                       `break;`.dup;
68         }
69         result ~= `default: break;`.dup;
70         return cast(string)result;
71     }
72 
73     auto fields(string header_line) {
74         return splitter(header_line[3..$], '\t');
75     }
76 
77     /*
78         generates 'parse' method which parses given string and
79         fills corresponding struct fields
80      */
81     mixin template parseStaticMethod(string struct_name, Field...) {
82 
83         static auto parse(string line) {
84             mixin(struct_name ~ " record;");
85             foreach (field; fields(line)) {
86                 if (field.length < 3) {
87                     continue;
88                 }
89                 if (field[2] != ':') {
90                     continue;
91                 }
92                 string contents = field[3..$];
93                 switch (field[0..2]) {
94                     mixin(makeSwitchStatements!Field());
95                 }
96             }
97             return record;
98         }
99     }
100 
101     string serializeFields(Field...)() {
102         static if (Field.length > 0) {
103             char[] str = `if (`~Field[0].name~` != `~Field[0].FieldType.stringof~`.init) {`.dup;
104             str ~= `sink.write("\t` ~ Field[0].abbr ~ `:");`.dup;
105             str ~= `sink.write(`~Field[0].name~`);`.dup;
106             str ~= `}`.dup;
107             return str.idup ~ serializeFields!(Field[1..$])();
108         } else {
109             return "";
110         }
111     }
112 
113     /*
114         generates 'toSam' method which converts a struct
115         to SAM header line
116      */
117     mixin template toSamMethod(string line_prefix, Field...) {
118         void toSam(Sink)(auto ref Sink sink) const if (isSomeSink!Sink) {
119             sink.write(line_prefix);
120             mixin(serializeFields!Field());
121         }
122     }
123 
124     string generateHashExpression(Field...)() {
125         char[] res;
126         foreach (t; Field) {
127             res ~= "result = 31 * result + " ~
128                    "typeid(" ~ t.name ~ ").getHash(&" ~ t.name ~ ");".dup;
129         }
130         return res.idup;
131     }
132 
133     mixin template toHashMethod(string struct_name, string id_field, Field...) {
134         static if (id_field != null) {
135             hash_t toHash() const nothrow @safe{
136                 hash_t result = 1;
137                 mixin(generateHashExpression!Field());
138                 return result;
139             }
140 
141             mixin("int opCmp(const ref " ~ struct_name ~ " other) " ~
142                   "    const pure nothrow @safe" ~
143                   "{" ~
144                   "    return " ~ id_field ~ " < other." ~ id_field ~ " ? -1 : " ~
145                   "           " ~ id_field ~ " > other." ~ id_field ~ " ? 1 : 0;" ~
146                   "}");
147         }
148     }
149 
150     string opEqualsExpression(Field...)() {
151         char[] result = Field[0].name ~ " == other.".dup ~ Field[0].name;
152         foreach (t; Field[1..$]) {
153             result ~= " && " ~ t.name ~ " == other.".dup ~ t.name;
154         }
155         return result.idup;
156     }
157 
158     mixin template opEqualsMethod(string struct_name, Field...) {
159         mixin("bool opEquals(const ref " ~ struct_name ~ " other)" ~
160               "    pure const @safe nothrow" ~
161               "{" ~
162               "    return " ~ opEqualsExpression!Field() ~ ";" ~
163               "}");
164 
165         mixin("bool opEquals(" ~ struct_name ~ " other)" ~
166               "    pure const @safe nothrow" ~
167               "{" ~
168               "    return " ~ opEqualsExpression!Field() ~ ";" ~
169               "}");
170     }
171 
172     mixin template getSetIDMethods(string id_field) {
173         static if (id_field != null) {
174             auto getID() const pure nothrow @safe {
175                 mixin("return " ~ id_field ~";");
176             }
177 
178             mixin("void setID(typeof("~id_field~") id) pure nothrow @safe { " ~ id_field ~ " = id; }");
179         }
180     }
181 
182     string generateToMsgpackMethod(Field...)() {
183         char[] method = "packer.beginMap(" ~ to!string(Field.length) ~ ");".dup;
184         foreach (t; Field) {
185             method ~= "packer.pack(`" ~ t.abbr ~ "`);".dup;
186             method ~= "packer.pack(" ~ t.name ~ ");".dup;
187         }
188         return method.idup;
189     }
190 
191     mixin template toMsgpackMethod(Field...) {
192 
193         void toMsgpack(Packer)(ref Packer packer) const {
194             mixin(generateToMsgpackMethod!Field());
195         }
196     }
197 
198     mixin template HeaderLineStruct(string struct_name,
199                                     string line_prefix,
200                                     string id_field,
201                                     Field...)
202     {
203          mixin(`struct `~struct_name~`{
204                     mixin structFields!Field;
205                     mixin parseStaticMethod!(struct_name, Field);
206                     mixin toSamMethod!(line_prefix, Field);
207                     mixin toHashMethod!(struct_name, id_field, Field);
208                     mixin opEqualsMethod!(struct_name, Field);
209                     mixin getSetIDMethods!id_field;
210                     mixin toMsgpackMethod!Field;
211                 }`);
212     }
213 
214 }
215 
216 mixin HeaderLineStruct!("HdLine", "@HD", null,
217           Field!("format_version", "VN"),
218           Field!("sorting_order", "SO"));
219 
220 mixin HeaderLineStruct!("SqLine", "@SQ", "name",
221           Field!("name", "SN"),
222           Field!("length", "LN", uint),
223           Field!("assembly", "AS"),
224           Field!("md5", "M5"),
225           Field!("species", "SP"),
226           Field!("uri", "UR"),
227           Field!("alternate_haplotype", "AH"));
228 
229 mixin HeaderLineStruct!("RgLine", "@RG", "identifier",
230           Field!("identifier", "ID"),
231           Field!("sequencing_center", "CN"),
232           Field!("description", "DS"),
233           Field!("date", "DT"),
234           Field!("flow_order", "FO"),
235           Field!("key_sequence", "KS"),
236           Field!("library", "LB"),
237           Field!("programs", "PG"),
238           Field!("predicted_insert_size", "PI", int),
239           Field!("platform", "PL"),
240           Field!("platform_unit", "PU"),
241           Field!("sample", "SM"));
242 
243 mixin HeaderLineStruct!("PgLine", "@PG", "identifier",
244           Field!("identifier", "ID"),
245           Field!("name", "PN"),
246           Field!("command_line", "CL"),
247           Field!("previous_program", "PP"),
248           Field!("program_version", "VN")); // version is a keyword in D
249 
250 unittest {
251     import std.algorithm;
252     import std.stdio;
253 
254     // stderr.writeln("Testing @HD line parsing...");
255     auto hd_line = HdLine.parse("@HD\tVN:1.0\tSO:coordinate");
256     assert(hd_line.format_version == "1.0");
257     assert(hd_line.sorting_order == "coordinate");
258 
259     // stderr.writeln("Testing @SQ line parsing...");
260     auto sq_line = SqLine.parse("@SQ\tSN:NC_007605\tLN:171823\tM5:6743bd63b3ff2b5b8985d8933c53290a\tUR:ftp://.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz\tAS:NCBI37\tSP:HUMAN");
261     assert(sq_line.name == "NC_007605");
262     assert(sq_line.length == 171823);
263     assert(sq_line.md5 == "6743bd63b3ff2b5b8985d8933c53290a");
264     assert(sq_line.uri.endsWith("hs37d5.fa.gz"));
265     assert(sq_line.assembly == "NCBI37");
266     assert(sq_line.species == "HUMAN");
267 
268     // stderr.writeln("Testing @RG line parsing...");
269     auto rg_line = RgLine.parse("@RG\tID:ERR016155\tLB:HUMgdtRAGDIAAPE\tSM:HG00125\tPI:488\tCN:BGI\tPL:ILLUMINA\tDS:SRP001294");
270     assert(rg_line.identifier == "ERR016155");
271     assert(rg_line.library == "HUMgdtRAGDIAAPE");
272     assert(rg_line.sample == "HG00125");
273     assert(rg_line.predicted_insert_size == 488);
274     assert(rg_line.sequencing_center == "BGI");
275     assert(rg_line.platform == "ILLUMINA");
276     assert(rg_line.description == "SRP001294");
277 
278     // stderr.writeln("Testing @PG line parsing...");
279     auto pg_line = PgLine.parse("@PG\tID:bam_calculate_bq\tPN:samtools\tPP:bam_recalibrate_quality_scores\tVN:0.1.17 (r973:277)\tCL:samtools calmd -Erb $bam_file $reference_fasta > $bq_bam_file");
280     assert(pg_line.identifier == "bam_calculate_bq");
281     assert(pg_line.name == "samtools");
282     assert(pg_line.previous_program == "bam_recalibrate_quality_scores");
283     assert(pg_line.program_version == "0.1.17 (r973:277)");
284     assert(pg_line.command_line.endsWith("$bq_bam_file"));
285 }
286 
287 // workaround for LDC bug #217
288 struct ValueRange(T) {
289     this(T[string] dict, string[] ids) {
290         _dict = dict;
291         _ids = ids;
292     }
293 
294     private {
295         T[string] _dict;
296         string[] _ids;
297     }
298 
299     ref T front() @property { return _dict[_ids[0]]; }
300     ref T back() @property { return _dict[_ids[$-1]]; }
301     bool empty() @property { return _ids.length == 0; }
302     void popFront() { _ids = _ids[1 .. $]; }
303     void popBack() { _ids = _ids[0 .. $ - 1]; }
304     ref T opIndex(size_t i) { return _dict[_ids[i]]; }
305     size_t length() @property { return _ids.length; }
306     ValueRange save() @property { return ValueRange(_dict, _ids[]); }
307 }
308 
309 /// Common class for storing header lines
310 class HeaderLineDictionary(T) {
311 
312     invariant() {
313         assert(_index_to_id.length == _dict.length);
314         assert(_id_to_index.length == _dict.length);
315         /*
316         foreach(id, index; _id_to_index) {
317             assert(_index_to_id[index] == id);
318         }
319         */
320     }
321 
322     ///
323     ref inout(T) opIndex(string id) inout {
324         return _dict[id];
325     }
326 
327     ///
328     void opIndexAssign(T line, string id) {
329         _dict[id] = line;
330     }
331 
332     ///
333     const(T)* opIn_r(string id) const {
334         return id in _dict;
335     }
336 
337     /// Append a line
338     bool add(T line) {
339         auto id = line.getID();
340         if (id !in _dict) {
341             _dict[id] = line;
342             _id_to_index[id] = _index_to_id.length;
343             _index_to_id ~= id;
344             return true;
345         }
346         return false;
347     }
348 
349     /// Remove a line with identifier $(D id).
350     bool remove(string id) {
351         if (id in _dict) {
352             auto old_len = _dict.length;
353 
354             for (auto j = _id_to_index[id]; j < old_len - 1; ++j) {
355                 _index_to_id[j] = _index_to_id[j + 1];
356                 _id_to_index[_index_to_id[j]] = j;
357             }
358 
359             _index_to_id.length = _index_to_id.length - 1;
360 
361             _dict.remove(id);
362             _id_to_index.remove(id);
363 
364             return true;
365         }
366 
367         return false;
368     }
369 
370     ///
371     int opApply(scope int delegate(ref T line) dg) {
372         foreach (size_t i; 0 .. _dict.length) {
373             auto res = dg(_dict[_index_to_id[i]]);
374             if (res != 0) {
375                 return res;
376             }
377         }
378         return 0;
379     }
380 
381     ///
382     int opApply(scope int delegate(T line) dg) const {
383         foreach (size_t i; 0 .. _dict.length) {
384             auto res = dg(_dict[_index_to_id[i]]);
385             if (res != 0) {
386                 return res;
387             }
388         }
389         return 0;
390     }
391 
392     ///
393     int opApply(scope int delegate(ref size_t index, ref T line) dg) {
394         foreach (size_t i; 0 .. _dict.length) {
395             auto res = dg(i, _dict[_index_to_id[i]]);
396             if (res != 0) {
397                 return res;
398             }
399         }
400         return 0;
401     }
402 
403     /// Clear the dictionary
404     void clear() {
405         _dict = null;
406         _id_to_index = null;
407         _index_to_id.length = 0;
408     }
409 
410     static assert(isRandomAccessRange!(ValueRange!T));
411 
412     /// Returns: range of lines
413     ValueRange!T values() @property {
414         return ValueRange!T(_dict, _index_to_id);
415     }
416 
417     /// Returns: number of stored lines
418     size_t length() @property const {
419         return _dict.length;
420     }
421 
422     protected {
423         T[string] _dict;
424         string[] _index_to_id;
425         size_t[string] _id_to_index;
426     }
427 }
428 
429 /// Dictionary of @SQ lines.
430 final class SqLineDictionary : HeaderLineDictionary!SqLine
431 {
432     ///
433     ref inout(SqLine) getSequence(size_t index) inout {
434         return _dict[_index_to_id[index]];
435     }
436 
437     ///
438     int getSequenceIndex(string sequence_name) {
439         size_t* ind = sequence_name in _id_to_index;
440         return (ind is null) ? -1 : cast(int)(*ind);
441     }
442 }
443 
444 /// Dictionary of @RG lines
445 alias HeaderLineDictionary!RgLine RgLineDictionary;
446 
447 /// Dictionary of @PG lines
448 alias HeaderLineDictionary!PgLine PgLineDictionary;
449 
450 /// Represents SAM header
451 class SamHeader {
452 
453     ///
454     enum DEFAULT_FORMAT_VERSION = "1.3";
455 
456     /// Construct empty SAM header
457     this() {
458         sequences = new SqLineDictionary();
459         read_groups = new RgLineDictionary();
460         programs = new PgLineDictionary();
461 
462         format_version = DEFAULT_FORMAT_VERSION;
463     }
464 
465     /// Parse SAM header given in plain text.
466     this(string header_text) {
467         read_groups = new RgLineDictionary();
468         programs = new PgLineDictionary();
469         format_version = DEFAULT_FORMAT_VERSION;
470 
471         import core.memory;
472         core.memory.GC.disable();
473 
474         bool parsed_first_line = false;
475         size_t n_sq_lines = 0;
476 
477         foreach (line; splitter(header_text, '\n')) {
478             if (line.length < 3) {
479                 continue;
480             }
481             if (!parsed_first_line && line[0..3] == "@HD") {
482                 auto header_line = HdLine.parse(line);
483                 if (header_line.sorting_order.length > 0) {
484                     try {
485                         sorting_order = to!SortingOrder(header_line.sorting_order);
486                     } catch (ConvException e) {
487                         sorting_order = SortingOrder.unknown;
488                         // FIXME: should we do that silently?
489                     }
490                 } else {
491                     sorting_order = SortingOrder.unknown;
492                 }
493                 format_version = header_line.format_version;
494             }
495             enforce(line[0] == '@', "Header lines must start with @");
496             switch (line[1]) {
497                 case 'S':
498                     enforce(line[2] == 'Q');
499                     ++n_sq_lines;
500                     break;
501                 case 'R':
502                     enforce(line[2] == 'G');
503                     auto rg_line = RgLine.parse(line);
504                     if (!read_groups.add(rg_line)) {
505                         stderr.writeln("duplicating @RG line ",  rg_line.identifier);
506                     }
507                     break;
508                 case 'P':
509                     enforce(line[2] == 'G');
510                     auto pg_line = PgLine.parse(line);
511                     if (!programs.add(pg_line)) {
512                         stderr.writeln("duplicating @PG line ", pg_line.identifier);
513                     }
514                     break;
515                 case 'H':
516                     enforce(line[2] == 'D');
517                     break;
518                 case 'C':
519                     enforce(line[2] == 'O');
520                     comments ~= line[4..$];
521                     break;
522                 default:
523                     assert(0);
524             }
525 
526             parsed_first_line = true;
527         }
528 
529         if (!parsed_first_line) {
530             format_version = DEFAULT_FORMAT_VERSION;
531         }
532 
533         _header_text = header_text;
534         if (n_sq_lines <= 1000000)
535             _parseSqLines(); // parse immediately for typical files
536 
537         core.memory.GC.enable();
538     }
539 
540     /// Format version
541     string format_version;
542 
543     /// Sorting order
544     SortingOrder sorting_order = SortingOrder.unknown;
545 
546     /// Dictionary of @SQ lines.
547     /// Removal is not allowed, you can only replace the whole dictionary.
548     SqLineDictionary sequences() @property {
549         if (_sequences is null)
550             _parseSqLines();
551         return _sequences;
552     }
553 
554     void sequences(SqLineDictionary dict) @property {
555         _sequences = dict;
556     }
557 
558     private SqLineDictionary _sequences;
559     private string _header_text;
560     private void _parseSqLines() {
561         import core.memory;
562         core.memory.GC.disable();
563 
564         _sequences = new SqLineDictionary();
565 
566         foreach (line; splitter(_header_text, '\n')) {
567             if (line.length < 3)
568                 continue;
569             if (line[0 .. 3] != "@SQ")
570                 continue;
571 
572             auto sq_line = SqLine.parse(line);
573             if (!_sequences.add(sq_line)) {
574                 stderr.writeln("duplicating @SQ line ",  sq_line.name);
575             }
576         }
577 
578         _header_text = null;
579         core.memory.GC.enable();
580     }
581 
582     /// Dictionary of @RG lines
583     RgLineDictionary read_groups;
584 
585     /// Dictionary of @PG lines
586     PgLineDictionary programs;
587 
588     /// Array of @CO lines
589     string[] comments;
590 
591     /// Zero-based index of sequence.
592     /// If such sequence does not exist in the header, returns -1.
593     int getSequenceIndex(string sequence_name) {
594         return sequences.getSequenceIndex(sequence_name);
595     }
596 
597     ///
598     SqLine getSequence(size_t index) {
599         return sequences.getSequence(index);
600     }
601 
602     /// Get header text representation in SAM format (includes trailing '\n')
603     string text() @property {
604         return to!string(this);
605     }
606 
607     /// Header text representation in SAM ("%s") or JSON format ("%j").
608     /// $(BR)
609     /// Includes trailing '\n'.
610     void toString(scope void delegate(const(char)[]) sink, FormatSpec!char fmt) {
611         if (fmt.spec == 's')
612             toSam(sink);
613         else if (fmt.spec == 'j')
614             toJson(sink);
615         else
616             throw new FormatException("unknown format specifier");
617     }
618 
619     void toSam(Sink)(auto ref Sink sink) if (isSomeSink!Sink) {
620         sink.write("@HD\tVN:");
621         sink.write(format_version);
622         if (sorting_order != SortingOrder.unknown) {
623             sink.write("\tSO:");
624             sink.write(to!string(sorting_order));
625         }
626         sink.write('\n');
627 
628         for (size_t i = 0; i < sequences.length; i++) {
629             auto sq_line = getSequence(i);
630             sq_line.toSam(sink);
631             sink.write('\n');
632         }
633 
634         foreach (rg_line; read_groups) {
635             rg_line.toSam(sink);
636             sink.write('\n');
637         }
638 
639         foreach (pg_line; programs) {
640             pg_line.toSam(sink);
641             sink.write('\n');
642         }
643 
644         foreach (comment; comments) {
645             sink.write("@CO\t");
646             sink.write(comment);
647             sink.write('\n');
648         }
649     }
650 
651     void toJson(Sink)(auto ref Sink sink) if (isSomeSink!Sink) {
652         JSONValue[string] result;
653 
654         result["format_version"] = format_version;
655 
656         if (sorting_order != SortingOrder.unknown) {
657           result["sorting_order"] = sorting_order.to!string;
658         }
659 
660         auto tmp = new JSONValue[sequences.length];
661         for (auto i = 0; i < sequences.length; i++) {
662           auto line = getSequence(i);
663           JSONValue[string] sq;
664           sq["sequence_name"] = line.name;
665           sq["sequence_length"] = line.length;
666           sq["assembly"] = line.assembly;
667           sq["md5"] = line.md5;
668           sq["species"] = line.species;
669           sq["uri"] = line.uri;
670           tmp[i].object = sq;
671         }
672         result["sq_lines"] = tmp.dup;
673         tmp = null;
674 
675         auto tmp2 = new JSONValue[read_groups.length];
676         foreach (i, line; read_groups) {
677           JSONValue[string] sq;
678           sq["identifier"] = line.identifier;
679           sq["sequencing_center"] = line.sequencing_center;
680           sq["description"] = line.description;
681           sq["date"] = line.date;
682           sq["flow_order"] = line.flow_order;
683           sq["key_sequence"] = line.key_sequence;
684           sq["library"] = line.library;
685           sq["programs"] = line.programs;
686           sq["predicted_insert_size"] = line.predicted_insert_size;
687           sq["platform"] = line.platform;
688           sq["platform_unit"] = line.platform_unit;
689           sq["sample"] = line.sample;
690           tmp2[i].object = sq;
691         }
692         result["rg_lines"] = tmp2;
693         tmp2 = null;
694 
695         auto tmp3 = new JSONValue[programs.length];
696         foreach (i, line; programs) {
697           JSONValue[string] sq;
698           sq["identifier"] = line.identifier;
699           sq["program_name"] = line.name;
700           sq["command_line"] = line.command_line;
701           sq["previous_program"] = line.previous_program;
702           sq["program_version"] = line.program_version;
703           tmp3[i].object = sq;
704         }
705         result["pg_lines"] = tmp3;
706 
707         JSONValue json;
708         json.object = result;
709         static if (__VERSION__ < 2072)
710             sink.write(toJSON(&json));
711         else
712           sink.write(toJSON(json,true));
713     }
714 
715     /// Packs message in the following format:
716     /// $(BR)
717     /// MsgPack array with elements
718     ///   $(OL
719     ///     $(LI format version - string)
720     ///     $(LI sorting order - string)
721     ///     $(LI @SQ lines - array of dictionaries)
722     ///     $(LI @RG lines - array of dictionaries)
723     ///     $(LI @PG lines - array of dictionaries))
724     /// $(BR)
725     /// Dictionary keys are the same as in SAM format.
726     void toMsgpack(Packer)(ref Packer packer) const {
727         enforce(_sequences !is null, "failed to call msgpack");
728         packer.beginArray(5);
729         packer.pack(format_version);
730         packer.pack(to!string(sorting_order));
731         packer.beginArray(_sequences.length);
732         foreach (sq; _sequences)
733             packer.pack(sq);
734         packer.beginArray(read_groups.length);
735         foreach (rg; read_groups)
736             packer.pack(rg);
737         packer.beginArray(programs.length);
738         foreach (pg; programs)
739             packer.pack(pg);
740     }
741 }
742 
743 /// Sorting order
744 enum SortingOrder {
745     unknown,    ///
746     unsorted,   ///
747     coordinate, ///
748     queryname   ///
749 }
750 
751 unittest {
752     auto header = new SamHeader();
753     import std.stdio;
754     assert(header.text == "@HD\tVN:1.3\n");
755 
756     auto sequence = SqLine("abc", 123123);
757     header.sequences.add(sequence);
758     assert(header.text == "@HD\tVN:1.3\n@SQ\tSN:abc\tLN:123123\n");
759 
760     header.sorting_order = SortingOrder.coordinate;
761     header.format_version = "1.2";
762     assert(header.text == "@HD\tVN:1.2\tSO:coordinate\n@SQ\tSN:abc\tLN:123123\n");
763     assert(header.getSequenceIndex("abc") == 0);
764     assert(header.getSequenceIndex("bcd") == -1);
765 
766     header.sequences.clear();
767     sequence = SqLine("bcd", 678);
768     sequence.uri = "http://lorem.ipsum";
769     header.sequences.add(sequence);
770     header.format_version = "1.4";
771     assert(header.text == "@HD\tVN:1.4\tSO:coordinate\n@SQ\tSN:bcd\tLN:678\tUR:http://lorem.ipsum\n");
772 
773     header.sequences.add(SqLine("def", 321));
774     assert(header.getSequenceIndex("abc") == -1);
775     assert(header.getSequenceIndex("bcd") == 0);
776     assert(header.getSequenceIndex("def") == 1);
777 
778     header.sequences.remove("bcd");
779     assert(header.getSequenceIndex("abc") == -1);
780     assert(header.getSequenceIndex("bcd") == -1);
781     assert(header.getSequenceIndex("def") == 0);
782 
783     assert(header.text == "@HD\tVN:1.4\tSO:coordinate\n@SQ\tSN:def\tLN:321\n");
784 
785     auto dict = new SqLineDictionary();
786     dict.add(SqLine("yay", 111));
787     dict.add(SqLine("zzz", 222));
788 
789     auto zzz = dict["zzz"];     // TODO: make 'dict["zzz"].uri = ...' work
790     zzz.uri = "ftp://nyan.cat";
791     dict["zzz"] = zzz;
792     header.sequences = dict;
793 
794     assert(header.text ==
795       "@HD\tVN:1.4\tSO:coordinate\n@SQ\tSN:yay\tLN:111\n@SQ\tSN:zzz\tLN:222\tUR:ftp://nyan.cat\n");
796     assert(header.sequences == dict);
797 
798     header.sequences.remove("yay");
799     header.sequences.remove("zzz");
800     header.comments ~= "this is a comment";
801 
802     assert(header.text == "@HD\tVN:1.4\tSO:coordinate\n@CO\tthis is a comment\n");
803 
804     JSONValue[string] result;
805     result["format_version"] = "1.2";
806     assert(to!string(result) == "[\"format_version\":\"1.2\"]", to!string(result));
807 }