1 /* 2 This file is part of BioD. 3 4 Copyright (C) 2018 Pjotr Prins <pjotr.prins@thebird.nl> 5 */ 6 7 module bio.std.genotype.snp; 8 9 import bio.std.range.splitter; 10 11 import std.algorithm; 12 import std.array; 13 import std.conv; 14 import std.exception; 15 import std.experimental.logger; 16 import std.stdio; 17 18 /* 19 20 Functions around SNPs 21 22 */ 23 24 struct SNP { 25 string name; 26 string chr; 27 long pos; // -1 if NA 28 double cm; // centimorgan, -1 if NA 29 } 30 31 immutable NullSNP = SNP("unknown","NA",-1,-1.0); 32 33 /* 34 Fetch SNP annotations from tab delimited file that looks like name, 35 pos, chr, cM 36 37 rs3668922 111771071 13 65.0648 38 rs13480515 17261714 10 4.72355 39 rs13483034 53249416 17 30.175 40 41 NA values can be used and the last cM column is optional. 42 43 Note: this function should be generalized to return a named tuple, based on a 44 named list 45 */ 46 47 SNP[] fetch_snp_annotations(string filen) { 48 SNP[] list; 49 50 bool[string] names; 51 52 info("Parsing ",filen); 53 File f = File(filen); 54 55 try { 56 foreach(line; f.byLine) { 57 auto fields = array(SimpleSplitConv!(char[])(line)); 58 auto name = to!string(fields[0]); 59 if (name in names) 60 throw new Exception("SNP name "~name~" appears multiple times in "~filen); 61 auto pos = (fields[1] == "NA" ? -1 : to!ulong(fields[1])); 62 auto chr = to!string(fields[2]); 63 double cm = -1.0; 64 if (fields.length > 3) 65 cm = (fields[3] == "NA" ? -1.0 : to!double(fields[3])); 66 auto snp = SNP(name,chr,pos,cm); 67 list ~= snp; 68 } 69 } 70 catch(Exception e) { 71 writeln(e.msg); // need to test and give proper output FIXME 72 throw e; 73 } 74 75 return list; 76 }