1 /*
2     This file is part of BioD.
3 
4     Copyright (C) 2018 Pjotr Prins <pjotr.prins@thebird.nl>
5 */
6 
7 module bio.std.genotype.maf;
8 
9 import std.algorithm;
10 import std.array;
11 import std.conv;
12 import std.stdio;
13 
14 /*
15 
16    Functions around multi-allelic frequencies (MAF). Allele frequencies are usually
17    listed as a range of values between 0.0-1.0 and 0.0-2.0.
18 
19 */
20 
21 /*
22    Return (multi-allelele) frequencies of values in gs as an associative array. E.g.
23 
24       double[] g2 = [1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0];
25 
26    returns
27 
28       double[double] r = [ 0.0: 0.1, 1.0: 0.7, 2.0: 0.2];
29 
30    Note you can use any type, so this will work
31 
32       assert(maf(["AA", "AB", "BB", "AA", "BB" ]) == [ "AA": 0.4, "BB": 0.4, "AB": 0.2]);
33 */
34 
35 double[T] maf(T)(T[] gs) {
36   uint[T] list;
37   foreach (g ; gs) {
38     list[g] += 1;
39   }
40   double[T] freq;
41   foreach (k ; list.keys)
42     freq[k] = 1.0 * list[k] / gs.length;
43   return freq;
44 }
45 
46 unittest {
47   // List of values between 0.0 and 2.0
48   double[] g2 = [1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0];
49   double[double] r = [ 0.0: 0.1, 1.0: 0.7, 2.0: 0.2];
50   assert(maf(g2) == [ 0.0: 0.1, 1.0: 0.7, 2.0: 0.2]);
51   // List of values between 0.0 and 1.0
52   double[] g1 = array(g2.map!(a => a/2.0));
53   assert(maf(g1) == [ 0.0: 0.1, 0.5: 0.7, 1.0: 0.2]);
54   assert(maf(["AA", "AB", "BB", "AA", "BB" ]) == [ "AA": 0.4, "BB": 0.4, "AB": 0.2]);
55 }