1 /*
2     This file is part of BioD.
3 
4     Copyright (C) 2018 Pjotr Prins <pjotr.prins@thebird.nl>
5 */
6 
7 module bio.std.genotype.snp;
8 
9 import bio.std.range.splitter;
10 
11 import std.algorithm;
12 import std.array;
13 import std.conv;
14 import std.exception;
15 import std.experimental.logger;
16 import std.stdio;
17 
18 /*
19 
20   Functions around SNPs
21 
22 */
23 
24 struct SNP {
25   string name;
26   string chr;
27   long pos;   // -1 if NA
28   double cm;  // centimorgan, -1 if NA
29 }
30 
31 immutable NullSNP = SNP("unknown","NA",-1,-1.0);
32 
33 /*
34   Fetch SNP annotations from tab delimited file that looks like name,
35   pos, chr, cM
36 
37   rs3668922       111771071       13      65.0648
38   rs13480515      17261714        10      4.72355
39   rs13483034      53249416        17      30.175
40 
41   NA values can be used and the last cM column is optional.
42 
43   Note: this function should be generalized to return a named tuple, based on a
44   named list
45 */
46 
47 SNP[] fetch_snp_annotations(string filen) {
48   SNP[] list;
49 
50   bool[string] names;
51 
52   info("Parsing ",filen);
53   File f = File(filen);
54 
55   try {
56     foreach(line; f.byLine) {
57       auto fields = array(SimpleSplitConv!(char[])(line));
58       auto name = to!string(fields[0]);
59       if (name in names)
60         throw new Exception("SNP name "~name~" appears multiple times in "~filen);
61       auto pos = (fields[1] == "NA" ? -1 : to!ulong(fields[1]));
62       auto chr = to!string(fields[2]);
63       double cm = -1.0;
64       if (fields.length > 3)
65         cm = (fields[3] == "NA" ? -1.0 : to!double(fields[3]));
66       auto snp = SNP(name,chr,pos,cm);
67       list ~= snp;
68     }
69   }
70   catch(Exception e) {
71     writeln(e.msg); // need to test and give proper output FIXME
72     throw e;
73   }
74 
75   return list;
76 }