1 /* 2 This file is part of BioD. 3 Copyright (C) 2012-2016 Artem Tarasov <lomereiter@gmail.com> 4 5 Permission is hereby granted, free of charge, to any person obtaining a 6 copy of this software and associated documentation files (the "Software"), 7 to deal in the Software without restriction, including without limitation 8 the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 and/or sell copies of the Software, and to permit persons to whom the 10 Software is furnished to do so, subject to the following conditions: 11 12 The above copyright notice and this permission notice shall be included in 13 all copies or substantial portions of the Software. 14 15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 DEALINGS IN THE SOFTWARE. 22 23 */ 24 module bio.sam.header; 25 26 import bio.bam.thirdparty.msgpack; 27 import bio.core.utils.format; 28 29 import std.algorithm; 30 import std.conv; 31 import std.format; 32 import std.json; 33 import std.exception; 34 import std.array; 35 import std.range; 36 import std.traits; 37 import std.stdio : stderr; 38 39 private { 40 41 struct Field(string _name, string _abbr, T = string) { 42 enum name = _name; 43 enum abbr = _abbr; 44 alias T FieldType; 45 } 46 47 mixin template structFields(T...) { 48 static if (!(T.length == 0)) { 49 mixin(T[0].FieldType.stringof ~ " " ~ T[0].name ~ ";"); 50 mixin structFields!(T[1..$]); 51 } 52 } 53 54 string makeSwitchStatements(F...)() { 55 /* certain assumptions about variable names are being made here, 56 * namely, 'record' and 'contents' 57 */ 58 char[] result; 59 foreach (t; F) { 60 static if (t.FieldType.stringof == "string") 61 result ~= `case "`~t.abbr~`":`~ 62 `record.`~t.name~`=cast(string)(contents);`~ 63 `break;`.dup; 64 else 65 result ~= `case "`~t.abbr~`":`~ 66 `record.`~t.name~`=to!(`~t.FieldType.stringof~`)(contents);`~ 67 `break;`.dup; 68 } 69 result ~= `default: break;`.dup; 70 return cast(string)result; 71 } 72 73 auto fields(string header_line) { 74 return splitter(header_line[3..$], '\t'); 75 } 76 77 /* 78 generates 'parse' method which parses given string and 79 fills corresponding struct fields 80 */ 81 mixin template parseStaticMethod(string struct_name, Field...) { 82 83 static auto parse(string line) { 84 mixin(struct_name ~ " record;"); 85 foreach (field; fields(line)) { 86 if (field.length < 3) { 87 continue; 88 } 89 if (field[2] != ':') { 90 continue; 91 } 92 string contents = field[3..$]; 93 switch (field[0..2]) { 94 mixin(makeSwitchStatements!Field()); 95 } 96 } 97 return record; 98 } 99 } 100 101 string serializeFields(Field...)() { 102 static if (Field.length > 0) { 103 char[] str = `if (`~Field[0].name~` != `~Field[0].FieldType.stringof~`.init) {`.dup; 104 str ~= `sink.write("\t` ~ Field[0].abbr ~ `:");`.dup; 105 str ~= `sink.write(`~Field[0].name~`);`.dup; 106 str ~= `}`.dup; 107 return str.idup ~ serializeFields!(Field[1..$])(); 108 } else { 109 return ""; 110 } 111 } 112 113 /* 114 generates 'toSam' method which converts a struct 115 to SAM header line 116 */ 117 mixin template toSamMethod(string line_prefix, Field...) { 118 void toSam(Sink)(auto ref Sink sink) const if (isSomeSink!Sink) { 119 sink.write(line_prefix); 120 mixin(serializeFields!Field()); 121 } 122 } 123 124 string generateHashExpression(Field...)() { 125 char[] res; 126 foreach (t; Field) { 127 res ~= "result = 31 * result + " ~ 128 "typeid(" ~ t.name ~ ").getHash(&" ~ t.name ~ ");".dup; 129 } 130 return res.idup; 131 } 132 133 mixin template toHashMethod(string struct_name, string id_field, Field...) { 134 static if (id_field != null) { 135 hash_t toHash() const nothrow @safe{ 136 hash_t result = 1; 137 mixin(generateHashExpression!Field()); 138 return result; 139 } 140 141 mixin("int opCmp(const ref " ~ struct_name ~ " other) " ~ 142 " const pure nothrow @safe" ~ 143 "{" ~ 144 " return " ~ id_field ~ " < other." ~ id_field ~ " ? -1 : " ~ 145 " " ~ id_field ~ " > other." ~ id_field ~ " ? 1 : 0;" ~ 146 "}"); 147 } 148 } 149 150 string opEqualsExpression(Field...)() { 151 char[] result = Field[0].name ~ " == other.".dup ~ Field[0].name; 152 foreach (t; Field[1..$]) { 153 result ~= " && " ~ t.name ~ " == other.".dup ~ t.name; 154 } 155 return result.idup; 156 } 157 158 mixin template opEqualsMethod(string struct_name, Field...) { 159 mixin("bool opEquals(const ref " ~ struct_name ~ " other)" ~ 160 " pure const @safe nothrow" ~ 161 "{" ~ 162 " return " ~ opEqualsExpression!Field() ~ ";" ~ 163 "}"); 164 165 mixin("bool opEquals(" ~ struct_name ~ " other)" ~ 166 " pure const @safe nothrow" ~ 167 "{" ~ 168 " return " ~ opEqualsExpression!Field() ~ ";" ~ 169 "}"); 170 } 171 172 mixin template getSetIDMethods(string id_field) { 173 static if (id_field != null) { 174 auto getID() const pure nothrow @safe { 175 mixin("return " ~ id_field ~";"); 176 } 177 178 mixin("void setID(typeof("~id_field~") id) pure nothrow @safe { " ~ id_field ~ " = id; }"); 179 } 180 } 181 182 string generateToMsgpackMethod(Field...)() { 183 char[] method = "packer.beginMap(" ~ to!string(Field.length) ~ ");".dup; 184 foreach (t; Field) { 185 method ~= "packer.pack(`" ~ t.abbr ~ "`);".dup; 186 method ~= "packer.pack(" ~ t.name ~ ");".dup; 187 } 188 return method.idup; 189 } 190 191 mixin template toMsgpackMethod(Field...) { 192 193 void toMsgpack(Packer)(ref Packer packer) const { 194 mixin(generateToMsgpackMethod!Field()); 195 } 196 } 197 198 mixin template HeaderLineStruct(string struct_name, 199 string line_prefix, 200 string id_field, 201 Field...) 202 { 203 mixin(`struct `~struct_name~`{ 204 mixin structFields!Field; 205 mixin parseStaticMethod!(struct_name, Field); 206 mixin toSamMethod!(line_prefix, Field); 207 mixin toHashMethod!(struct_name, id_field, Field); 208 mixin opEqualsMethod!(struct_name, Field); 209 mixin getSetIDMethods!id_field; 210 mixin toMsgpackMethod!Field; 211 }`); 212 } 213 214 } 215 216 mixin HeaderLineStruct!("HdLine", "@HD", null, 217 Field!("format_version", "VN"), 218 Field!("sorting_order", "SO")); 219 220 mixin HeaderLineStruct!("SqLine", "@SQ", "name", 221 Field!("name", "SN"), 222 Field!("length", "LN", uint), 223 Field!("assembly", "AS"), 224 Field!("md5", "M5"), 225 Field!("species", "SP"), 226 Field!("uri", "UR"), 227 Field!("alternate_haplotype", "AH")); 228 229 mixin HeaderLineStruct!("RgLine", "@RG", "identifier", 230 Field!("identifier", "ID"), 231 Field!("sequencing_center", "CN"), 232 Field!("description", "DS"), 233 Field!("date", "DT"), 234 Field!("flow_order", "FO"), 235 Field!("key_sequence", "KS"), 236 Field!("library", "LB"), 237 Field!("programs", "PG"), 238 Field!("predicted_insert_size", "PI", int), 239 Field!("platform", "PL"), 240 Field!("platform_unit", "PU"), 241 Field!("sample", "SM")); 242 243 mixin HeaderLineStruct!("PgLine", "@PG", "identifier", 244 Field!("identifier", "ID"), 245 Field!("name", "PN"), 246 Field!("command_line", "CL"), 247 Field!("previous_program", "PP"), 248 Field!("program_version", "VN")); // version is a keyword in D 249 250 unittest { 251 import std.algorithm; 252 import std.stdio; 253 254 // stderr.writeln("Testing @HD line parsing..."); 255 auto hd_line = HdLine.parse("@HD\tVN:1.0\tSO:coordinate"); 256 assert(hd_line.format_version == "1.0"); 257 assert(hd_line.sorting_order == "coordinate"); 258 259 // stderr.writeln("Testing @SQ line parsing..."); 260 auto sq_line = SqLine.parse("@SQ\tSN:NC_007605\tLN:171823\tM5:6743bd63b3ff2b5b8985d8933c53290a\tUR:ftp://.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz\tAS:NCBI37\tSP:HUMAN"); 261 assert(sq_line.name == "NC_007605"); 262 assert(sq_line.length == 171823); 263 assert(sq_line.md5 == "6743bd63b3ff2b5b8985d8933c53290a"); 264 assert(sq_line.uri.endsWith("hs37d5.fa.gz")); 265 assert(sq_line.assembly == "NCBI37"); 266 assert(sq_line.species == "HUMAN"); 267 268 // stderr.writeln("Testing @RG line parsing..."); 269 auto rg_line = RgLine.parse("@RG\tID:ERR016155\tLB:HUMgdtRAGDIAAPE\tSM:HG00125\tPI:488\tCN:BGI\tPL:ILLUMINA\tDS:SRP001294"); 270 assert(rg_line.identifier == "ERR016155"); 271 assert(rg_line.library == "HUMgdtRAGDIAAPE"); 272 assert(rg_line.sample == "HG00125"); 273 assert(rg_line.predicted_insert_size == 488); 274 assert(rg_line.sequencing_center == "BGI"); 275 assert(rg_line.platform == "ILLUMINA"); 276 assert(rg_line.description == "SRP001294"); 277 278 // stderr.writeln("Testing @PG line parsing..."); 279 auto pg_line = PgLine.parse("@PG\tID:bam_calculate_bq\tPN:samtools\tPP:bam_recalibrate_quality_scores\tVN:0.1.17 (r973:277)\tCL:samtools calmd -Erb $bam_file $reference_fasta > $bq_bam_file"); 280 assert(pg_line.identifier == "bam_calculate_bq"); 281 assert(pg_line.name == "samtools"); 282 assert(pg_line.previous_program == "bam_recalibrate_quality_scores"); 283 assert(pg_line.program_version == "0.1.17 (r973:277)"); 284 assert(pg_line.command_line.endsWith("$bq_bam_file")); 285 } 286 287 // workaround for LDC bug #217 288 struct ValueRange(T) { 289 this(T[string] dict, string[] ids) { 290 _dict = dict; 291 _ids = ids; 292 } 293 294 private { 295 T[string] _dict; 296 string[] _ids; 297 } 298 299 ref T front() @property { return _dict[_ids[0]]; } 300 ref T back() @property { return _dict[_ids[$-1]]; } 301 bool empty() @property { return _ids.length == 0; } 302 void popFront() { _ids = _ids[1 .. $]; } 303 void popBack() { _ids = _ids[0 .. $ - 1]; } 304 ref T opIndex(size_t i) { return _dict[_ids[i]]; } 305 size_t length() @property { return _ids.length; } 306 ValueRange save() @property { return ValueRange(_dict, _ids[]); } 307 } 308 309 /// Common class for storing header lines 310 class HeaderLineDictionary(T) { 311 312 invariant() { 313 assert(_index_to_id.length == _dict.length); 314 assert(_id_to_index.length == _dict.length); 315 /* 316 foreach(id, index; _id_to_index) { 317 assert(_index_to_id[index] == id); 318 } 319 */ 320 } 321 322 /// 323 ref inout(T) opIndex(string id) inout { 324 return _dict[id]; 325 } 326 327 /// 328 void opIndexAssign(T line, string id) { 329 _dict[id] = line; 330 } 331 332 /// 333 const(T)* opIn_r(string id) const { 334 return id in _dict; 335 } 336 337 /// Append a line 338 bool add(T line) { 339 auto id = line.getID(); 340 if (id !in _dict) { 341 _dict[id] = line; 342 _id_to_index[id] = _index_to_id.length; 343 _index_to_id ~= id; 344 return true; 345 } 346 return false; 347 } 348 349 /// Remove a line with identifier $(D id). 350 bool remove(string id) { 351 if (id in _dict) { 352 auto old_len = _dict.length; 353 354 for (auto j = _id_to_index[id]; j < old_len - 1; ++j) { 355 _index_to_id[j] = _index_to_id[j + 1]; 356 _id_to_index[_index_to_id[j]] = j; 357 } 358 359 _index_to_id.length = _index_to_id.length - 1; 360 361 _dict.remove(id); 362 _id_to_index.remove(id); 363 364 return true; 365 } 366 367 return false; 368 } 369 370 /// 371 int opApply(scope int delegate(ref T line) dg) { 372 foreach (size_t i; 0 .. _dict.length) { 373 auto res = dg(_dict[_index_to_id[i]]); 374 if (res != 0) { 375 return res; 376 } 377 } 378 return 0; 379 } 380 381 /// 382 int opApply(scope int delegate(T line) dg) const { 383 foreach (size_t i; 0 .. _dict.length) { 384 auto res = dg(_dict[_index_to_id[i]]); 385 if (res != 0) { 386 return res; 387 } 388 } 389 return 0; 390 } 391 392 /// 393 int opApply(scope int delegate(ref size_t index, ref T line) dg) { 394 foreach (size_t i; 0 .. _dict.length) { 395 auto res = dg(i, _dict[_index_to_id[i]]); 396 if (res != 0) { 397 return res; 398 } 399 } 400 return 0; 401 } 402 403 /// Clear the dictionary 404 void clear() { 405 _dict = null; 406 _id_to_index = null; 407 _index_to_id.length = 0; 408 } 409 410 static assert(isRandomAccessRange!(ValueRange!T)); 411 412 /// Returns: range of lines 413 ValueRange!T values() @property { 414 return ValueRange!T(_dict, _index_to_id); 415 } 416 417 /// Returns: number of stored lines 418 size_t length() @property const { 419 return _dict.length; 420 } 421 422 protected { 423 T[string] _dict; 424 string[] _index_to_id; 425 size_t[string] _id_to_index; 426 } 427 } 428 429 /// Dictionary of @SQ lines. 430 final class SqLineDictionary : HeaderLineDictionary!SqLine 431 { 432 /// 433 ref inout(SqLine) getSequence(size_t index) inout { 434 return _dict[_index_to_id[index]]; 435 } 436 437 /// 438 int getSequenceIndex(string sequence_name) { 439 size_t* ind = sequence_name in _id_to_index; 440 return (ind is null) ? -1 : cast(int)(*ind); 441 } 442 } 443 444 /// Dictionary of @RG lines 445 alias HeaderLineDictionary!RgLine RgLineDictionary; 446 447 /// Dictionary of @PG lines 448 alias HeaderLineDictionary!PgLine PgLineDictionary; 449 450 /// Represents SAM header 451 class SamHeader { 452 453 /// 454 enum DEFAULT_FORMAT_VERSION = "1.3"; 455 456 /// Construct empty SAM header 457 this() { 458 sequences = new SqLineDictionary(); 459 read_groups = new RgLineDictionary(); 460 programs = new PgLineDictionary(); 461 462 format_version = DEFAULT_FORMAT_VERSION; 463 } 464 465 /// Parse SAM header given in plain text. 466 this(string header_text) { 467 read_groups = new RgLineDictionary(); 468 programs = new PgLineDictionary(); 469 format_version = DEFAULT_FORMAT_VERSION; 470 471 import core.memory; 472 core.memory.GC.disable(); 473 474 bool parsed_first_line = false; 475 size_t n_sq_lines = 0; 476 477 foreach (line; splitter(header_text, '\n')) { 478 if (line.length < 3) { 479 continue; 480 } 481 if (!parsed_first_line && line[0..3] == "@HD") { 482 auto header_line = HdLine.parse(line); 483 if (header_line.sorting_order.length > 0) { 484 try { 485 sorting_order = to!SortingOrder(header_line.sorting_order); 486 } catch (ConvException e) { 487 sorting_order = SortingOrder.unknown; 488 // FIXME: should we do that silently? 489 } 490 } else { 491 sorting_order = SortingOrder.unknown; 492 } 493 format_version = header_line.format_version; 494 } 495 enforce(line[0] == '@', "Header lines must start with @"); 496 switch (line[1]) { 497 case 'S': 498 enforce(line[2] == 'Q'); 499 ++n_sq_lines; 500 break; 501 case 'R': 502 enforce(line[2] == 'G'); 503 auto rg_line = RgLine.parse(line); 504 if (!read_groups.add(rg_line)) { 505 stderr.writeln("duplicating @RG line ", rg_line.identifier); 506 } 507 break; 508 case 'P': 509 enforce(line[2] == 'G'); 510 auto pg_line = PgLine.parse(line); 511 if (!programs.add(pg_line)) { 512 stderr.writeln("duplicating @PG line ", pg_line.identifier); 513 } 514 break; 515 case 'H': 516 enforce(line[2] == 'D'); 517 break; 518 case 'C': 519 enforce(line[2] == 'O'); 520 comments ~= line[4..$]; 521 break; 522 default: 523 assert(0); 524 } 525 526 parsed_first_line = true; 527 } 528 529 if (!parsed_first_line) { 530 format_version = DEFAULT_FORMAT_VERSION; 531 } 532 533 _header_text = header_text; 534 if (n_sq_lines <= 1000000) 535 _parseSqLines(); // parse immediately for typical files 536 537 core.memory.GC.enable(); 538 } 539 540 /// Format version 541 string format_version; 542 543 /// Sorting order 544 SortingOrder sorting_order = SortingOrder.unknown; 545 546 /// Dictionary of @SQ lines. 547 /// Removal is not allowed, you can only replace the whole dictionary. 548 SqLineDictionary sequences() @property { 549 if (_sequences is null) 550 _parseSqLines(); 551 return _sequences; 552 } 553 554 void sequences(SqLineDictionary dict) @property { 555 _sequences = dict; 556 } 557 558 private SqLineDictionary _sequences; 559 private string _header_text; 560 private void _parseSqLines() { 561 import core.memory; 562 core.memory.GC.disable(); 563 564 _sequences = new SqLineDictionary(); 565 566 foreach (line; splitter(_header_text, '\n')) { 567 if (line.length < 3) 568 continue; 569 if (line[0 .. 3] != "@SQ") 570 continue; 571 572 auto sq_line = SqLine.parse(line); 573 if (!_sequences.add(sq_line)) { 574 stderr.writeln("duplicating @SQ line ", sq_line.name); 575 } 576 } 577 578 _header_text = null; 579 core.memory.GC.enable(); 580 } 581 582 /// Dictionary of @RG lines 583 RgLineDictionary read_groups; 584 585 /// Dictionary of @PG lines 586 PgLineDictionary programs; 587 588 /// Array of @CO lines 589 string[] comments; 590 591 /// Zero-based index of sequence. 592 /// If such sequence does not exist in the header, returns -1. 593 int getSequenceIndex(string sequence_name) { 594 return sequences.getSequenceIndex(sequence_name); 595 } 596 597 /// 598 SqLine getSequence(size_t index) { 599 return sequences.getSequence(index); 600 } 601 602 /// Get header text representation in SAM format (includes trailing '\n') 603 string text() @property { 604 return to!string(this); 605 } 606 607 /// Header text representation in SAM ("%s") or JSON format ("%j"). 608 /// $(BR) 609 /// Includes trailing '\n'. 610 void toString(scope void delegate(const(char)[]) sink, FormatSpec!char fmt) { 611 if (fmt.spec == 's') 612 toSam(sink); 613 else if (fmt.spec == 'j') 614 toJson(sink); 615 else 616 throw new FormatException("unknown format specifier"); 617 } 618 619 void toSam(Sink)(auto ref Sink sink) if (isSomeSink!Sink) { 620 sink.write("@HD\tVN:"); 621 sink.write(format_version); 622 if (sorting_order != SortingOrder.unknown) { 623 sink.write("\tSO:"); 624 sink.write(to!string(sorting_order)); 625 } 626 sink.write('\n'); 627 628 for (size_t i = 0; i < sequences.length; i++) { 629 auto sq_line = getSequence(i); 630 sq_line.toSam(sink); 631 sink.write('\n'); 632 } 633 634 foreach (rg_line; read_groups) { 635 rg_line.toSam(sink); 636 sink.write('\n'); 637 } 638 639 foreach (pg_line; programs) { 640 pg_line.toSam(sink); 641 sink.write('\n'); 642 } 643 644 foreach (comment; comments) { 645 sink.write("@CO\t"); 646 sink.write(comment); 647 sink.write('\n'); 648 } 649 } 650 651 void toJson(Sink)(auto ref Sink sink) if (isSomeSink!Sink) { 652 JSONValue[string] result; 653 654 result["format_version"] = format_version; 655 656 if (sorting_order != SortingOrder.unknown) { 657 result["sorting_order"] = sorting_order.to!string; 658 } 659 660 auto tmp = new JSONValue[sequences.length]; 661 for (auto i = 0; i < sequences.length; i++) { 662 auto line = getSequence(i); 663 JSONValue[string] sq; 664 sq["sequence_name"] = line.name; 665 sq["sequence_length"] = line.length; 666 sq["assembly"] = line.assembly; 667 sq["md5"] = line.md5; 668 sq["species"] = line.species; 669 sq["uri"] = line.uri; 670 tmp[i].object = sq; 671 } 672 result["sq_lines"] = tmp.dup; 673 tmp = null; 674 675 auto tmp2 = new JSONValue[read_groups.length]; 676 foreach (i, line; read_groups) { 677 JSONValue[string] sq; 678 sq["identifier"] = line.identifier; 679 sq["sequencing_center"] = line.sequencing_center; 680 sq["description"] = line.description; 681 sq["date"] = line.date; 682 sq["flow_order"] = line.flow_order; 683 sq["key_sequence"] = line.key_sequence; 684 sq["library"] = line.library; 685 sq["programs"] = line.programs; 686 sq["predicted_insert_size"] = line.predicted_insert_size; 687 sq["platform"] = line.platform; 688 sq["platform_unit"] = line.platform_unit; 689 sq["sample"] = line.sample; 690 tmp2[i].object = sq; 691 } 692 result["rg_lines"] = tmp2; 693 tmp2 = null; 694 695 auto tmp3 = new JSONValue[programs.length]; 696 foreach (i, line; programs) { 697 JSONValue[string] sq; 698 sq["identifier"] = line.identifier; 699 sq["program_name"] = line.name; 700 sq["command_line"] = line.command_line; 701 sq["previous_program"] = line.previous_program; 702 sq["program_version"] = line.program_version; 703 tmp3[i].object = sq; 704 } 705 result["pg_lines"] = tmp3; 706 707 JSONValue json; 708 json.object = result; 709 static if (__VERSION__ < 2072) 710 sink.write(toJSON(&json)); 711 else 712 sink.write(toJSON(json,true)); 713 } 714 715 /// Packs message in the following format: 716 /// $(BR) 717 /// MsgPack array with elements 718 /// $(OL 719 /// $(LI format version - string) 720 /// $(LI sorting order - string) 721 /// $(LI @SQ lines - array of dictionaries) 722 /// $(LI @RG lines - array of dictionaries) 723 /// $(LI @PG lines - array of dictionaries)) 724 /// $(BR) 725 /// Dictionary keys are the same as in SAM format. 726 void toMsgpack(Packer)(ref Packer packer) const { 727 enforce(_sequences !is null, "failed to call msgpack"); 728 packer.beginArray(5); 729 packer.pack(format_version); 730 packer.pack(to!string(sorting_order)); 731 packer.beginArray(_sequences.length); 732 foreach (sq; _sequences) 733 packer.pack(sq); 734 packer.beginArray(read_groups.length); 735 foreach (rg; read_groups) 736 packer.pack(rg); 737 packer.beginArray(programs.length); 738 foreach (pg; programs) 739 packer.pack(pg); 740 } 741 } 742 743 /// Sorting order 744 enum SortingOrder { 745 unknown, /// 746 unsorted, /// 747 coordinate, /// 748 queryname /// 749 } 750 751 unittest { 752 auto header = new SamHeader(); 753 import std.stdio; 754 assert(header.text == "@HD\tVN:1.3\n"); 755 756 auto sequence = SqLine("abc", 123123); 757 header.sequences.add(sequence); 758 assert(header.text == "@HD\tVN:1.3\n@SQ\tSN:abc\tLN:123123\n"); 759 760 header.sorting_order = SortingOrder.coordinate; 761 header.format_version = "1.2"; 762 assert(header.text == "@HD\tVN:1.2\tSO:coordinate\n@SQ\tSN:abc\tLN:123123\n"); 763 assert(header.getSequenceIndex("abc") == 0); 764 assert(header.getSequenceIndex("bcd") == -1); 765 766 header.sequences.clear(); 767 sequence = SqLine("bcd", 678); 768 sequence.uri = "http://lorem.ipsum"; 769 header.sequences.add(sequence); 770 header.format_version = "1.4"; 771 assert(header.text == "@HD\tVN:1.4\tSO:coordinate\n@SQ\tSN:bcd\tLN:678\tUR:http://lorem.ipsum\n"); 772 773 header.sequences.add(SqLine("def", 321)); 774 assert(header.getSequenceIndex("abc") == -1); 775 assert(header.getSequenceIndex("bcd") == 0); 776 assert(header.getSequenceIndex("def") == 1); 777 778 header.sequences.remove("bcd"); 779 assert(header.getSequenceIndex("abc") == -1); 780 assert(header.getSequenceIndex("bcd") == -1); 781 assert(header.getSequenceIndex("def") == 0); 782 783 assert(header.text == "@HD\tVN:1.4\tSO:coordinate\n@SQ\tSN:def\tLN:321\n"); 784 785 auto dict = new SqLineDictionary(); 786 dict.add(SqLine("yay", 111)); 787 dict.add(SqLine("zzz", 222)); 788 789 auto zzz = dict["zzz"]; // TODO: make 'dict["zzz"].uri = ...' work 790 zzz.uri = "ftp://nyan.cat"; 791 dict["zzz"] = zzz; 792 header.sequences = dict; 793 794 assert(header.text == 795 "@HD\tVN:1.4\tSO:coordinate\n@SQ\tSN:yay\tLN:111\n@SQ\tSN:zzz\tLN:222\tUR:ftp://nyan.cat\n"); 796 assert(header.sequences == dict); 797 798 header.sequences.remove("yay"); 799 header.sequences.remove("zzz"); 800 header.comments ~= "this is a comment"; 801 802 assert(header.text == "@HD\tVN:1.4\tSO:coordinate\n@CO\tthis is a comment\n"); 803 804 JSONValue[string] result; 805 result["format_version"] = "1.2"; 806 assert(to!string(result) == "[\"format_version\":\"1.2\"]", to!string(result)); 807 }