/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;

import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector.FrequencyModeSelector;
import org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector.GLBasedSampleSelector;
import org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector.GTBasedSampleSelector;
import org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector.KeepAFSpectrumFrequencySelector;
import org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector.NullSampleSelector;
import org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector.SampleSelector;
import org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector.UniformSamplingFrequencySelector;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.variant.vcf.VCFHeader;
import org.broadinstitute.variant.vcf.VCFHeaderLine;

@DocumentedGATKFeature(groupName="Validation Utilities", extraDocs={CommandLineGATK.class})
public class ValidationSiteSelector
extends RodWalker<Integer, Integer> {
    @Input(fullName="variant", shortName="V", doc="Input VCF file, can be specified multiple times", required=true)
    public List<RodBinding<VariantContext>> variants;
    @Output(doc="File to which variants should be written", required=true)
    protected VariantContextWriter vcfWriter = null;
    @Argument(fullName="sample_name", shortName="sn", doc="Include genotypes from this sample. Can be specified multiple times", required=false)
    public Set<String> sampleNames = new HashSet<String>(0);
    @Argument(fullName="sample_expressions", shortName="se", doc="Regular expression to select many samples from the ROD tracks provided. Can be specified multiple times", required=false)
    public Set<String> sampleExpressions;
    @Input(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line) to include. Can be specified multiple times", required=false)
    public Set<File> sampleFiles;
    @Argument(fullName="sampleMode", shortName="sampleMode", doc="Sample selection mode", required=false)
    private SAMPLE_SELECTION_MODE sampleMode = SAMPLE_SELECTION_MODE.NONE;
    @Argument(shortName="samplePNonref", fullName="samplePNonref", doc="GL-based selection mode only: the probability that a site is non-reference in the samples for which to include the site", required=false)
    private double samplePNonref = 0.99;
    @Argument(fullName="numValidationSites", shortName="numSites", doc="Number of output validation sites", required=true)
    private int numValidationSites;
    @Argument(fullName="includeFilteredSites", shortName="ifs", doc="If true, will include filtered sites in set to choose variants from", required=false)
    private boolean INCLUDE_FILTERED_SITES = false;
    @Argument(fullName="ignoreGenotypes", shortName="ignoreGenotypes", doc="If true, will ignore genotypes in VCF, will take AC,AF from annotations and will make no sample selection", required=false)
    private boolean IGNORE_GENOTYPES = false;
    @Argument(fullName="ignorePolymorphicStatus", shortName="ignorePolymorphicStatus", doc="If true, will ignore polymorphic status in VCF, and will take VCF record directly without pre-selection", required=false)
    private boolean IGNORE_POLYMORPHIC = false;
    @Hidden
    @Argument(fullName="numFrequencyBins", shortName="numBins", doc="Number of frequency bins if we're to match AF distribution", required=false)
    private int numFrequencyBins = 20;
    @Argument(fullName="frequencySelectionMode", shortName="freqMode", doc="Allele Frequency selection mode", required=false)
    private AF_COMPUTATION_MODE freqMode = AF_COMPUTATION_MODE.KEEP_AF_SPECTRUM;
    @Argument(fullName="selectTypeToInclude", shortName="selectType", doc="Select only a certain type of variants from the input file. Valid types are INDEL, SNP, MIXED, MNP, SYMBOLIC, NO_VARIATION. Can be specified multiple times", required=false)
    private List<VariantContext.Type> TYPES_TO_INCLUDE = new ArrayList<VariantContext.Type>();
    private TreeSet<String> samples = new TreeSet();
    SampleSelector sampleSelector = null;
    FrequencyModeSelector frequencyModeSelector = null;
    private ArrayList<VariantContext.Type> selectedTypes = new ArrayList();

    @Override
    public void initialize() {
        Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(this.getToolkit());
        TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
        Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(this.sampleFiles);
        Collection<String> samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, this.sampleExpressions);
        this.samples.addAll(samplesFromFile);
        this.samples.addAll(samplesFromExpressions);
        this.samples.addAll(this.sampleNames);
        if (this.samples.isEmpty()) {
            this.samples.addAll(vcfSamples);
        }
        this.sampleSelector = this.getSampleSelectorObject(this.sampleMode, this.samples);
        this.frequencyModeSelector = this.getFrequencyModeSelectorObject(this.freqMode, this.getToolkit().getGenomeLocParser());
        if (this.TYPES_TO_INCLUDE.isEmpty()) {
            for (VariantContext.Type t : VariantContext.Type.values()) {
                this.selectedTypes.add(t);
            }
        } else {
            for (VariantContext.Type t : this.TYPES_TO_INCLUDE) {
                this.selectedTypes.add(t);
            }
        }
        HashSet<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>();
        headerLines.add(new VCFHeaderLine("source", "ValidationSiteSelector"));
        this.vcfWriter.writeHeader(new VCFHeader(headerLines));
    }

    @Override
    public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
        if (tracker == null) {
            return 0;
        }
        List vcs = tracker.getValues(this.variants, context.getLocation());
        if (vcs == null || vcs.size() == 0) {
            return 0;
        }
        for (VariantContext vc : vcs) {
            if (!this.selectedTypes.contains((Object)vc.getType()) || !vc.isPolymorphicInSamples() && !this.IGNORE_POLYMORPHIC || !this.INCLUDE_FILTERED_SITES && vc.filtersWereApplied() && vc.isFiltered()) continue;
            boolean passesSampleSelectionCriteria = this.samples.isEmpty() ? true : this.sampleSelector.selectSiteInSamples(vc);
            this.frequencyModeSelector.logCurrentSiteData(vc, passesSampleSelectionCriteria, this.IGNORE_GENOTYPES, this.IGNORE_POLYMORPHIC);
        }
        return 1;
    }

    @Override
    public Integer reduceInit() {
        return 0;
    }

    @Override
    public Integer reduce(Integer value, Integer sum) {
        return value + sum;
    }

    @Override
    public void onTraversalDone(Integer result) {
        logger.info("Outputting validation sites...");
        ArrayList<VariantContext> selectedSites = this.frequencyModeSelector.selectValidationSites(this.numValidationSites);
        for (VariantContext vc : selectedSites) {
            this.vcfWriter.add(vc);
        }
        logger.info(result + " records processed.");
    }

    private SampleSelector getSampleSelectorObject(SAMPLE_SELECTION_MODE sampleMode, TreeSet<String> samples) {
        SampleSelector sm;
        switch (sampleMode) {
            case POLY_BASED_ON_GL: {
                sm = new GLBasedSampleSelector(samples, Math.log10(1.0 - this.samplePNonref));
                break;
            }
            case POLY_BASED_ON_GT: {
                sm = new GTBasedSampleSelector(samples);
                break;
            }
            case NONE: {
                sm = new NullSampleSelector(samples);
                break;
            }
            default: {
                throw new IllegalArgumentException("Unsupported Sample Selection Mode: " + (Object)((Object)sampleMode));
            }
        }
        return sm;
    }

    private FrequencyModeSelector getFrequencyModeSelectorObject(AF_COMPUTATION_MODE freqMode, GenomeLocParser parser) {
        FrequencyModeSelector fm;
        switch (freqMode) {
            case KEEP_AF_SPECTRUM: {
                fm = new KeepAFSpectrumFrequencySelector(this.numFrequencyBins, parser);
                break;
            }
            case UNIFORM: {
                fm = new UniformSamplingFrequencySelector(parser);
                break;
            }
            default: {
                throw new IllegalArgumentException("Unexpected Frequency Selection Mode: " + (Object)((Object)freqMode));
            }
        }
        return fm;
    }

    public static enum SAMPLE_SELECTION_MODE {
        NONE,
        POLY_BASED_ON_GT,
        POLY_BASED_ON_GL;

    }

    public static enum AF_COMPUTATION_MODE {
        KEEP_AF_SPECTRUM,
        UNIFORM;

    }
}

