module bio.std.hts.bam.snpcallers.simple;

import bio.std.hts.bam.pileup;
import bio.core.utils.algo;

import std.algorithm;

struct SimpleCallerSettings {
    int minimum_coverage = 5;
    int minimum_witnesses = 2;
}

SimpleCallerSettings defaultSettings;

bool isSNP(C)(C column, ref SimpleCallerSettings settings)
{
    if (column.coverage < settings.minimum_coverage) {
        return false;
    }

    int[char] bases_count;

    foreach (read; column.reads) {
        if (read.current_base != '-') {
            bases_count[read.current_base] += 1;
        }
    }

    if (bases_count.length == 0) {
        // e.g. all overlapping reads have deletions at this location
        return false;
    }

    auto consensus = argmax!(base => bases_count[base])(bases_count.byKey());

    if (bases_count[consensus] < settings.minimum_witnesses) {
        return false;
    }

    return consensus != column.reference_base;
}

auto findSNPs(R)(R reads, ref SimpleCallerSettings settings=defaultSettings) {
    auto columns = pileupWithReferenceBases(reads);

    return filter!(column => isSNP(column, settings))(filter!"a.coverage > 0"(columns));
}