1 /* 2 This file is part of BioD. 3 Copyright (C) 2012-2016 Artem Tarasov <lomereiter@gmail.com> 4 5 Permission is hereby granted, free of charge, to any person obtaining a 6 copy of this software and associated documentation files (the "Software"), 7 to deal in the Software without restriction, including without limitation 8 the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 and/or sell copies of the Software, and to permit persons to whom the 10 Software is furnished to do so, subject to the following conditions: 11 12 The above copyright notice and this permission notice shall be included in 13 all copies or substantial portions of the Software. 14 15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 DEALINGS IN THE SOFTWARE. 22 23 */ 24 module bio.std.hts.sam.header; 25 26 import bio.std.hts.thirdparty.msgpack; 27 import bio.core.utils.format; 28 29 import std.algorithm; 30 import std.conv; 31 import std.format; 32 import std.json; 33 import std.exception; 34 import std.array; 35 import std.range; 36 import std.traits; 37 import std.stdio : stderr; 38 39 private { 40 41 struct Field(string _name, string _abbr, T = string) { 42 enum name = _name; 43 enum abbr = _abbr; 44 alias T FieldType; 45 } 46 47 mixin template structFields(T...) { 48 static if (!(T.length == 0)) { 49 mixin(T[0].FieldType.stringof ~ " " ~ T[0].name ~ ";"); 50 mixin structFields!(T[1..$]); 51 } 52 } 53 54 string makeSwitchStatements(F...)() { 55 /* certain assumptions about variable names are being made here, 56 * namely, 'record' and 'contents' 57 */ 58 char[] result; 59 foreach (t; F) { 60 static if (t.FieldType.stringof == "string") 61 result ~= `case "`~t.abbr~`":`~ 62 `record.`~t.name~`=cast(string)(contents);`~ 63 `break;`.dup; 64 else 65 result ~= `case "`~t.abbr~`":`~ 66 `record.`~t.name~`=to!(`~t.FieldType.stringof~`)(contents);`~ 67 `break;`.dup; 68 } 69 result ~= `default: break;`.dup; 70 return cast(string)result; 71 } 72 73 auto fields(string header_line) { 74 return splitter(header_line[3..$], '\t'); 75 } 76 77 /* 78 generates 'parse' method which parses given string and 79 fills corresponding struct fields 80 */ 81 mixin template parseStaticMethod(string struct_name, Field...) { 82 83 static auto parse(string line) { 84 mixin(struct_name ~ " record;"); 85 foreach (field; fields(line)) { 86 if (field.length < 3) { 87 continue; 88 } 89 if (field[2] != ':') { 90 continue; 91 } 92 string contents = field[3..$]; 93 switch (field[0..2]) { 94 mixin(makeSwitchStatements!Field()); 95 } 96 } 97 return record; 98 } 99 } 100 101 string serializeFields(Field...)() { 102 static if (Field.length > 0) { 103 char[] str = `if (`~Field[0].name~` != `~Field[0].FieldType.stringof~`.init) {`.dup; 104 str ~= `sink.write("\t` ~ Field[0].abbr ~ `:");`.dup; 105 str ~= `sink.write(`~Field[0].name~`);`.dup; 106 str ~= `}`.dup; 107 return str.idup ~ serializeFields!(Field[1..$])(); 108 } else { 109 return ""; 110 } 111 } 112 113 /* 114 generates 'toSam' method which converts a struct 115 to SAM header line 116 */ 117 mixin template toSamMethod(string line_prefix, Field...) { 118 void toSam(Sink)(auto ref Sink sink) const if (isSomeSink!Sink) { 119 sink.write(line_prefix); 120 mixin(serializeFields!Field()); 121 } 122 } 123 124 string generateHashExpression(Field...)() { 125 char[] res; 126 foreach (t; Field) { 127 res ~= "result = 31 * result + " ~ 128 "typeid(" ~ t.name ~ ").getHash(&" ~ t.name ~ ");".dup; 129 } 130 return res.idup; 131 } 132 133 mixin template toHashMethod(string struct_name, string id_field, Field...) { 134 static if (id_field != null) { 135 hash_t toHash() const nothrow @safe{ 136 hash_t result = 1; 137 mixin(generateHashExpression!Field()); 138 return result; 139 } 140 141 mixin("int opCmp(const ref " ~ struct_name ~ " other) " ~ 142 " const pure nothrow @safe" ~ 143 "{" ~ 144 " return " ~ id_field ~ " < other." ~ id_field ~ " ? -1 : " ~ 145 " " ~ id_field ~ " > other." ~ id_field ~ " ? 1 : 0;" ~ 146 "}"); 147 } 148 } 149 150 string opEqualsExpression(Field...)() { 151 char[] result = Field[0].name ~ " == other.".dup ~ Field[0].name; 152 foreach (t; Field[1..$]) { 153 result ~= " && " ~ t.name ~ " == other.".dup ~ t.name; 154 } 155 return result.idup; 156 } 157 158 mixin template opEqualsMethod(string struct_name, Field...) { 159 mixin("bool opEquals(const ref " ~ struct_name ~ " other)" ~ 160 " pure const @safe nothrow" ~ 161 "{" ~ 162 " return " ~ opEqualsExpression!Field() ~ ";" ~ 163 "}"); 164 165 mixin("bool opEquals(" ~ struct_name ~ " other)" ~ 166 " pure const @safe nothrow" ~ 167 "{" ~ 168 " return " ~ opEqualsExpression!Field() ~ ";" ~ 169 "}"); 170 } 171 172 mixin template getSetIDMethods(string id_field) { 173 static if (id_field != null) { 174 auto getID() const pure nothrow @safe { 175 mixin("return " ~ id_field ~";"); 176 } 177 178 mixin("void setID(typeof("~id_field~") id) pure nothrow @safe { " ~ id_field ~ " = id; }"); 179 } 180 } 181 182 string generateToMsgpackMethod(Field...)() { 183 char[] method = "packer.beginMap(" ~ to!string(Field.length) ~ ");".dup; 184 foreach (t; Field) { 185 method ~= "packer.pack(`" ~ t.abbr ~ "`);".dup; 186 method ~= "packer.pack(" ~ t.name ~ ");".dup; 187 } 188 return method.idup; 189 } 190 191 mixin template toMsgpackMethod(Field...) { 192 193 void toMsgpack(Packer)(ref Packer packer) const { 194 mixin(generateToMsgpackMethod!Field()); 195 } 196 } 197 198 mixin template HeaderLineStruct(string struct_name, 199 string line_prefix, 200 string id_field, 201 Field...) 202 { 203 mixin(`struct `~struct_name~`{ 204 mixin structFields!Field; 205 mixin parseStaticMethod!(struct_name, Field); 206 mixin toSamMethod!(line_prefix, Field); 207 mixin toHashMethod!(struct_name, id_field, Field); 208 mixin opEqualsMethod!(struct_name, Field); 209 mixin getSetIDMethods!id_field; 210 mixin toMsgpackMethod!Field; 211 }`); 212 } 213 214 } 215 216 mixin HeaderLineStruct!("HdLine", "@HD", null, 217 Field!("format_version", "VN"), 218 Field!("sorting_order", "SO")); 219 220 mixin HeaderLineStruct!("SqLine", "@SQ", "name", 221 Field!("name", "SN"), 222 Field!("length", "LN", uint), 223 Field!("assembly", "AS"), 224 Field!("md5", "M5"), 225 Field!("species", "SP"), 226 Field!("uri", "UR"), 227 Field!("alternate_haplotype", "AH")); 228 229 mixin HeaderLineStruct!("RgLine", "@RG", "identifier", 230 Field!("identifier", "ID"), 231 Field!("sequencing_center", "CN"), 232 Field!("description", "DS"), 233 Field!("date", "DT"), 234 Field!("flow_order", "FO"), 235 Field!("key_sequence", "KS"), 236 Field!("library", "LB"), 237 Field!("programs", "PG"), 238 Field!("predicted_insert_size", "PI", int), 239 Field!("platform", "PL"), 240 Field!("platform_unit", "PU"), 241 Field!("sample", "SM"), 242 Field!("platform_model", "PM")); 243 244 mixin HeaderLineStruct!("PgLine", "@PG", "identifier", 245 Field!("identifier", "ID"), 246 Field!("name", "PN"), 247 Field!("command_line", "CL"), 248 Field!("previous_program", "PP"), 249 Field!("program_version", "VN")); // version is a keyword in D 250 251 unittest { 252 import std.algorithm; 253 import std.stdio; 254 255 // stderr.writeln("Testing @HD line parsing..."); 256 auto hd_line = HdLine.parse("@HD\tVN:1.0\tSO:coordinate"); 257 assert(hd_line.format_version == "1.0"); 258 assert(hd_line.sorting_order == "coordinate"); 259 260 // stderr.writeln("Testing @SQ line parsing..."); 261 auto sq_line = SqLine.parse("@SQ\tSN:NC_007605\tLN:171823\tM5:6743bd63b3ff2b5b8985d8933c53290a\tUR:ftp://.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz\tAS:NCBI37\tSP:HUMAN"); 262 assert(sq_line.name == "NC_007605"); 263 assert(sq_line.length == 171823); 264 assert(sq_line.md5 == "6743bd63b3ff2b5b8985d8933c53290a"); 265 assert(sq_line.uri.endsWith("hs37d5.fa.gz")); 266 assert(sq_line.assembly == "NCBI37"); 267 assert(sq_line.species == "HUMAN"); 268 269 // stderr.writeln("Testing @RG line parsing..."); 270 auto rg_line = RgLine.parse("@RG\tID:ERR016155\tLB:HUMgdtRAGDIAAPE\tSM:HG00125\tPI:488\tCN:BGI\tPL:ILLUMINA\tDS:SRP001294"); 271 assert(rg_line.identifier == "ERR016155"); 272 assert(rg_line.library == "HUMgdtRAGDIAAPE"); 273 assert(rg_line.sample == "HG00125"); 274 assert(rg_line.predicted_insert_size == 488); 275 assert(rg_line.sequencing_center == "BGI"); 276 assert(rg_line.platform == "ILLUMINA"); 277 assert(rg_line.description == "SRP001294"); 278 279 // stderr.writeln("Testing @PG line parsing..."); 280 auto pg_line = PgLine.parse("@PG\tID:bam_calculate_bq\tPN:samtools\tPP:bam_recalibrate_quality_scores\tVN:0.1.17 (r973:277)\tCL:samtools calmd -Erb $bam_file $reference_fasta > $bq_bam_file"); 281 assert(pg_line.identifier == "bam_calculate_bq"); 282 assert(pg_line.name == "samtools"); 283 assert(pg_line.previous_program == "bam_recalibrate_quality_scores"); 284 assert(pg_line.program_version == "0.1.17 (r973:277)"); 285 assert(pg_line.command_line.endsWith("$bq_bam_file")); 286 } 287 288 // workaround for LDC bug #217 289 struct ValueRange(T) { 290 this(T[string] dict, string[] ids) { 291 _dict = dict; 292 _ids = ids; 293 } 294 295 private { 296 T[string] _dict; 297 string[] _ids; 298 } 299 300 ref T front() @property { return _dict[_ids[0]]; } 301 ref T back() @property { return _dict[_ids[$-1]]; } 302 bool empty() @property { return _ids.length == 0; } 303 void popFront() { _ids = _ids[1 .. $]; } 304 void popBack() { _ids = _ids[0 .. $ - 1]; } 305 ref T opIndex(size_t i) { return _dict[_ids[i]]; } 306 size_t length() @property { return _ids.length; } 307 ValueRange save() @property { return ValueRange(_dict, _ids[]); } 308 } 309 310 /// Common class for storing header lines 311 class HeaderLineDictionary(T) { 312 313 invariant() { 314 assert(_index_to_id.length == _dict.length); 315 assert(_id_to_index.length == _dict.length); 316 /* 317 foreach(id, index; _id_to_index) { 318 assert(_index_to_id[index] == id); 319 } 320 */ 321 } 322 323 /// 324 ref inout(T) opIndex(string id) inout { 325 return _dict[id]; 326 } 327 328 /// 329 void opIndexAssign(T line, string id) { 330 _dict[id] = line; 331 } 332 333 /// 334 const(T)* opIn_r(string id) const { 335 return id in _dict; 336 } 337 338 /// Append a line 339 bool add(T line) { 340 auto id = line.getID(); 341 if (id !in _dict) { 342 _dict[id] = line; 343 _id_to_index[id] = _index_to_id.length; 344 _index_to_id ~= id; 345 return true; 346 } 347 return false; 348 } 349 350 /// Remove a line with identifier $(D id). 351 bool remove(string id) { 352 if (id in _dict) { 353 auto old_len = _dict.length; 354 355 for (auto j = _id_to_index[id]; j < old_len - 1; ++j) { 356 _index_to_id[j] = _index_to_id[j + 1]; 357 _id_to_index[_index_to_id[j]] = j; 358 } 359 360 _index_to_id.length = _index_to_id.length - 1; 361 362 _dict.remove(id); 363 _id_to_index.remove(id); 364 365 return true; 366 } 367 368 return false; 369 } 370 371 /// 372 int opApply(scope int delegate(ref T line) dg) { 373 foreach (size_t i; 0 .. _dict.length) { 374 auto res = dg(_dict[_index_to_id[i]]); 375 if (res != 0) { 376 return res; 377 } 378 } 379 return 0; 380 } 381 382 /// 383 int opApply(scope int delegate(T line) dg) const { 384 foreach (size_t i; 0 .. _dict.length) { 385 auto res = dg(_dict[_index_to_id[i]]); 386 if (res != 0) { 387 return res; 388 } 389 } 390 return 0; 391 } 392 393 /// 394 int opApply(scope int delegate(ref size_t index, ref T line) dg) { 395 foreach (size_t i; 0 .. _dict.length) { 396 auto res = dg(i, _dict[_index_to_id[i]]); 397 if (res != 0) { 398 return res; 399 } 400 } 401 return 0; 402 } 403 404 /// Clear the dictionary 405 void clear() { 406 _dict = null; 407 _id_to_index = null; 408 _index_to_id.length = 0; 409 } 410 411 static assert(isRandomAccessRange!(ValueRange!T)); 412 413 /// Returns: range of lines 414 ValueRange!T values() @property { 415 return ValueRange!T(_dict, _index_to_id); 416 } 417 418 /// Returns: number of stored lines 419 size_t length() @property const { 420 return _dict.length; 421 } 422 423 protected { 424 T[string] _dict; 425 string[] _index_to_id; 426 size_t[string] _id_to_index; 427 } 428 } 429 430 /// Dictionary of @SQ lines. 431 final class SqLineDictionary : HeaderLineDictionary!SqLine 432 { 433 /// 434 ref inout(SqLine) getSequence(size_t index) inout { 435 return _dict[_index_to_id[index]]; 436 } 437 438 /// 439 int getSequenceIndex(string sequence_name) { 440 size_t* ind = sequence_name in _id_to_index; 441 return (ind is null) ? -1 : cast(int)(*ind); 442 } 443 } 444 445 /// Dictionary of @RG lines 446 alias HeaderLineDictionary!RgLine RgLineDictionary; 447 448 /// Dictionary of @PG lines 449 alias HeaderLineDictionary!PgLine PgLineDictionary; 450 451 /// Represents SAM header 452 class SamHeader { 453 454 /// 455 enum DEFAULT_FORMAT_VERSION = "1.3"; 456 457 /// Construct empty SAM header 458 this() { 459 sequences = new SqLineDictionary(); 460 read_groups = new RgLineDictionary(); 461 programs = new PgLineDictionary(); 462 463 format_version = DEFAULT_FORMAT_VERSION; 464 } 465 466 /// Parse SAM header given in plain text. 467 this(string header_text) { 468 read_groups = new RgLineDictionary(); 469 programs = new PgLineDictionary(); 470 format_version = DEFAULT_FORMAT_VERSION; 471 472 import core.memory; 473 core.memory.GC.disable(); 474 475 bool parsed_first_line = false; 476 size_t n_sq_lines = 0; 477 478 foreach (line; splitter(header_text, '\n')) { 479 if (line.length < 3) { 480 continue; 481 } 482 if (!parsed_first_line && line[0..3] == "@HD") { 483 auto header_line = HdLine.parse(line); 484 if (header_line.sorting_order.length > 0) { 485 try { 486 sorting_order = to!SortingOrder(header_line.sorting_order); 487 } catch (ConvException e) { 488 sorting_order = SortingOrder.unknown; 489 // FIXME: should we do that silently? 490 } 491 } else { 492 sorting_order = SortingOrder.unknown; 493 } 494 format_version = header_line.format_version; 495 } 496 enforce(line[0] == '@', "Header lines must start with @"); 497 switch (line[1]) { 498 case 'S': 499 enforce(line[2] == 'Q'); 500 ++n_sq_lines; 501 break; 502 case 'R': 503 enforce(line[2] == 'G'); 504 auto rg_line = RgLine.parse(line); 505 if (!read_groups.add(rg_line)) { 506 stderr.writeln("duplicating @RG line ", rg_line.identifier); 507 } 508 break; 509 case 'P': 510 enforce(line[2] == 'G'); 511 auto pg_line = PgLine.parse(line); 512 if (!programs.add(pg_line)) { 513 stderr.writeln("duplicating @PG line ", pg_line.identifier); 514 } 515 break; 516 case 'H': 517 enforce(line[2] == 'D'); 518 break; 519 case 'C': 520 enforce(line[2] == 'O'); 521 comments ~= line[4..$]; 522 break; 523 default: 524 assert(0); 525 } 526 527 parsed_first_line = true; 528 } 529 530 if (!parsed_first_line) { 531 format_version = DEFAULT_FORMAT_VERSION; 532 } 533 534 _header_text = header_text; 535 if (n_sq_lines <= 1000000) 536 _parseSqLines(); // parse immediately for typical files 537 538 core.memory.GC.enable(); 539 } 540 541 /// Format version 542 string format_version; 543 544 /// Sorting order 545 SortingOrder sorting_order = SortingOrder.unknown; 546 547 /// Dictionary of @SQ lines. 548 /// Removal is not allowed, you can only replace the whole dictionary. 549 SqLineDictionary sequences() @property { 550 if (_sequences is null) 551 _parseSqLines(); 552 return _sequences; 553 } 554 555 void sequences(SqLineDictionary dict) @property { 556 _sequences = dict; 557 } 558 559 private SqLineDictionary _sequences; 560 private string _header_text; 561 private void _parseSqLines() { 562 import core.memory; 563 core.memory.GC.disable(); 564 565 _sequences = new SqLineDictionary(); 566 567 foreach (line; splitter(_header_text, '\n')) { 568 if (line.length < 3) 569 continue; 570 if (line[0 .. 3] != "@SQ") 571 continue; 572 573 auto sq_line = SqLine.parse(line); 574 if (!_sequences.add(sq_line)) { 575 stderr.writeln("duplicating @SQ line ", sq_line.name); 576 } 577 } 578 579 _header_text = null; 580 core.memory.GC.enable(); 581 } 582 583 /// Dictionary of @RG lines 584 RgLineDictionary read_groups; 585 586 /// Dictionary of @PG lines 587 PgLineDictionary programs; 588 589 /// Array of @CO lines 590 string[] comments; 591 592 /// Zero-based index of sequence. 593 /// If such sequence does not exist in the header, returns -1. 594 int getSequenceIndex(string sequence_name) { 595 return sequences.getSequenceIndex(sequence_name); 596 } 597 598 /// 599 SqLine getSequence(size_t index) { 600 return sequences.getSequence(index); 601 } 602 603 /// Get header text representation in SAM format (includes trailing '\n') 604 string text() @property { 605 return to!string(this); 606 } 607 608 /// Header text representation in SAM ("%s") or JSON format ("%j"). 609 /// $(BR) 610 /// Includes trailing '\n'. 611 void toString(scope void delegate(const(char)[]) sink, FormatSpec!char fmt) { 612 if (fmt.spec == 's') 613 toSam(sink); 614 else if (fmt.spec == 'j') 615 toJson(sink); 616 else 617 throw new FormatException("unknown format specifier"); 618 } 619 620 void toSam(Sink)(auto ref Sink sink) if (isSomeSink!Sink) { 621 sink.write("@HD\tVN:"); 622 sink.write(format_version); 623 if (sorting_order != SortingOrder.unknown) { 624 sink.write("\tSO:"); 625 sink.write(to!string(sorting_order)); 626 } 627 sink.write('\n'); 628 629 for (size_t i = 0; i < sequences.length; i++) { 630 auto sq_line = getSequence(i); 631 sq_line.toSam(sink); 632 sink.write('\n'); 633 } 634 635 foreach (rg_line; read_groups) { 636 rg_line.toSam(sink); 637 sink.write('\n'); 638 } 639 640 foreach (pg_line; programs) { 641 pg_line.toSam(sink); 642 sink.write('\n'); 643 } 644 645 foreach (comment; comments) { 646 sink.write("@CO\t"); 647 sink.write(comment); 648 sink.write('\n'); 649 } 650 } 651 652 void toJson(Sink)(auto ref Sink sink) if (isSomeSink!Sink) { 653 JSONValue[string] result; 654 655 result["format_version"] = format_version; 656 657 if (sorting_order != SortingOrder.unknown) { 658 result["sorting_order"] = sorting_order.to!string; 659 } 660 661 auto tmp = new JSONValue[sequences.length]; 662 for (auto i = 0; i < sequences.length; i++) { 663 auto line = getSequence(i); 664 JSONValue[string] sq; 665 sq["sequence_name"] = line.name; 666 sq["sequence_length"] = line.length; 667 sq["assembly"] = line.assembly; 668 sq["md5"] = line.md5; 669 sq["species"] = line.species; 670 sq["uri"] = line.uri; 671 tmp[i].object = sq; 672 } 673 result["sq_lines"] = tmp.dup; 674 tmp = null; 675 676 auto tmp2 = new JSONValue[read_groups.length]; 677 foreach (i, line; read_groups) { 678 JSONValue[string] sq; 679 sq["identifier"] = line.identifier; 680 sq["sequencing_center"] = line.sequencing_center; 681 sq["description"] = line.description; 682 sq["date"] = line.date; 683 sq["flow_order"] = line.flow_order; 684 sq["key_sequence"] = line.key_sequence; 685 sq["library"] = line.library; 686 sq["programs"] = line.programs; 687 sq["predicted_insert_size"] = line.predicted_insert_size; 688 sq["platform"] = line.platform; 689 sq["platform_unit"] = line.platform_unit; 690 sq["sample"] = line.sample; 691 tmp2[i].object = sq; 692 } 693 result["rg_lines"] = tmp2; 694 tmp2 = null; 695 696 auto tmp3 = new JSONValue[programs.length]; 697 foreach (i, line; programs) { 698 JSONValue[string] sq; 699 sq["identifier"] = line.identifier; 700 sq["program_name"] = line.name; 701 sq["command_line"] = line.command_line; 702 sq["previous_program"] = line.previous_program; 703 sq["program_version"] = line.program_version; 704 tmp3[i].object = sq; 705 } 706 result["pg_lines"] = tmp3; 707 708 JSONValue json; 709 json.object = result; 710 static if (__VERSION__ < 2072) 711 sink.write(toJSON(&json)); 712 else 713 sink.write(toJSON(json,true)); 714 } 715 716 /// Packs message in the following format: 717 /// $(BR) 718 /// MsgPack array with elements 719 /// $(OL 720 /// $(LI format version - string) 721 /// $(LI sorting order - string) 722 /// $(LI @SQ lines - array of dictionaries) 723 /// $(LI @RG lines - array of dictionaries) 724 /// $(LI @PG lines - array of dictionaries)) 725 /// $(BR) 726 /// Dictionary keys are the same as in SAM format. 727 void toMsgpack(Packer)(ref Packer packer) const { 728 enforce(_sequences !is null, "failed to call msgpack"); 729 packer.beginArray(5); 730 packer.pack(format_version); 731 packer.pack(to!string(sorting_order)); 732 packer.beginArray(_sequences.length); 733 foreach (sq; _sequences) 734 packer.pack(sq); 735 packer.beginArray(read_groups.length); 736 foreach (rg; read_groups) 737 packer.pack(rg); 738 packer.beginArray(programs.length); 739 foreach (pg; programs) 740 packer.pack(pg); 741 } 742 } 743 744 /// Sorting order 745 enum SortingOrder { 746 unknown, /// 747 unsorted, /// 748 coordinate, /// 749 queryname /// 750 } 751 752 unittest { 753 auto header = new SamHeader(); 754 import std.stdio; 755 assert(header.text == "@HD\tVN:1.3\n"); 756 757 auto sequence = SqLine("abc", 123123); 758 header.sequences.add(sequence); 759 assert(header.text == "@HD\tVN:1.3\n@SQ\tSN:abc\tLN:123123\n"); 760 761 header.sorting_order = SortingOrder.coordinate; 762 header.format_version = "1.2"; 763 assert(header.text == "@HD\tVN:1.2\tSO:coordinate\n@SQ\tSN:abc\tLN:123123\n"); 764 assert(header.getSequenceIndex("abc") == 0); 765 assert(header.getSequenceIndex("bcd") == -1); 766 767 header.sequences.clear(); 768 sequence = SqLine("bcd", 678); 769 sequence.uri = "http://lorem.ipsum"; 770 header.sequences.add(sequence); 771 header.format_version = "1.4"; 772 assert(header.text == "@HD\tVN:1.4\tSO:coordinate\n@SQ\tSN:bcd\tLN:678\tUR:http://lorem.ipsum\n"); 773 774 header.sequences.add(SqLine("def", 321)); 775 assert(header.getSequenceIndex("abc") == -1); 776 assert(header.getSequenceIndex("bcd") == 0); 777 assert(header.getSequenceIndex("def") == 1); 778 779 header.sequences.remove("bcd"); 780 assert(header.getSequenceIndex("abc") == -1); 781 assert(header.getSequenceIndex("bcd") == -1); 782 assert(header.getSequenceIndex("def") == 0); 783 784 assert(header.text == "@HD\tVN:1.4\tSO:coordinate\n@SQ\tSN:def\tLN:321\n"); 785 786 auto dict = new SqLineDictionary(); 787 dict.add(SqLine("yay", 111)); 788 dict.add(SqLine("zzz", 222)); 789 790 auto zzz = dict["zzz"]; // TODO: make 'dict["zzz"].uri = ...' work 791 zzz.uri = "ftp://nyan.cat"; 792 dict["zzz"] = zzz; 793 header.sequences = dict; 794 795 assert(header.text == 796 "@HD\tVN:1.4\tSO:coordinate\n@SQ\tSN:yay\tLN:111\n@SQ\tSN:zzz\tLN:222\tUR:ftp://nyan.cat\n"); 797 assert(header.sequences == dict); 798 799 header.sequences.remove("yay"); 800 header.sequences.remove("zzz"); 801 header.comments ~= "this is a comment"; 802 803 assert(header.text == "@HD\tVN:1.4\tSO:coordinate\n@CO\tthis is a comment\n"); 804 805 JSONValue[string] result; 806 result["format_version"] = "1.2"; 807 assert(to!string(result) == "[\"format_version\":\"1.2\"]", to!string(result)); 808 }