1 /* 2 This file is part of BioD. 3 4 Copyright (C) 2018 Pjotr Prins <pjotr.prins@thebird.nl> 5 */ 6 7 module bio.std.genotype.maf; 8 9 import std.algorithm; 10 import std.array; 11 import std.conv; 12 import std.stdio; 13 14 /* 15 16 Functions around multi-allelic frequencies (MAF). Allele frequencies are usually 17 listed as a range of values between 0.0-1.0 and 0.0-2.0. 18 19 */ 20 21 /* 22 Return (multi-allelele) frequencies of values in gs as an associative array. E.g. 23 24 double[] g2 = [1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0]; 25 26 returns 27 28 double[double] r = [ 0.0: 0.1, 1.0: 0.7, 2.0: 0.2]; 29 30 Note you can use any type, so this will work 31 32 assert(maf(["AA", "AB", "BB", "AA", "BB" ]) == [ "AA": 0.4, "BB": 0.4, "AB": 0.2]); 33 */ 34 35 double[T] maf(T)(T[] gs) { 36 uint[T] list; 37 foreach (g ; gs) { 38 list[g] += 1; 39 } 40 double[T] freq; 41 foreach (k ; list.keys) 42 freq[k] = 1.0 * list[k] / gs.length; 43 return freq; 44 } 45 46 unittest { 47 // List of values between 0.0 and 2.0 48 double[] g2 = [1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0]; 49 double[double] r = [ 0.0: 0.1, 1.0: 0.7, 2.0: 0.2]; 50 assert(maf(g2) == [ 0.0: 0.1, 1.0: 0.7, 2.0: 0.2]); 51 // List of values between 0.0 and 1.0 52 double[] g1 = array(g2.map!(a => a/2.0)); 53 assert(maf(g1) == [ 0.0: 0.1, 0.5: 0.7, 1.0: 0.2]); 54 assert(maf(["AA", "AB", "BB", "AA", "BB" ]) == [ "AA": 0.4, "BB": 0.4, "AB": 0.2]); 55 }