/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.sting.gatk.walkers.variantutils;

import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCountConstants;
import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.Allele;
import org.broadinstitute.variant.variantcontext.Genotype;
import org.broadinstitute.variant.variantcontext.GenotypeBuilder;
import org.broadinstitute.variant.variantcontext.GenotypesContext;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.variant.vcf.VCFHeader;
import org.broadinstitute.variant.vcf.VCFHeaderLine;
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.variant.vcf.VCFStandardHeaderLines;
import org.broadinstitute.variant.vcf.VCFUtils;

@DocumentedGATKFeature(groupName="Variant Evaluation and Manipulation Tools", extraDocs={CommandLineGATK.class})
public class SelectVariants
extends RodWalker<Integer, Integer>
implements TreeReducible<Integer> {
    @ArgumentCollection
    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
    @Input(fullName="discordance", shortName="disc", doc="Output variants that were not called in this comparison track", required=false)
    protected RodBinding<VariantContext> discordanceTrack;
    @Input(fullName="concordance", shortName="conc", doc="Output variants that were also called in this comparison track", required=false)
    protected RodBinding<VariantContext> concordanceTrack;
    @Output(doc="File to which variants should be written", required=true)
    protected VariantContextWriter vcfWriter = null;
    @Argument(fullName="sample_name", shortName="sn", doc="Include genotypes from this sample. Can be specified multiple times", required=false)
    public Set<String> sampleNames = new HashSet<String>(0);
    @Argument(fullName="sample_expressions", shortName="se", doc="Regular expression to select many samples from the ROD tracks provided. Can be specified multiple times", required=false)
    public Set<String> sampleExpressions;
    @Input(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line) to include. Can be specified multiple times", required=false)
    public Set<File> sampleFiles;
    @Argument(fullName="exclude_sample_name", shortName="xl_sn", doc="Exclude genotypes from this sample. Can be specified multiple times", required=false)
    public Set<String> XLsampleNames = new HashSet<String>(0);
    @Input(fullName="exclude_sample_file", shortName="xl_sf", doc="File containing a list of samples (one per line) to exclude. Can be specified multiple times", required=false)
    public Set<File> XLsampleFiles = new HashSet<File>(0);
    @Argument(shortName="select", doc="One or more criteria to use when selecting the data", required=false)
    public ArrayList<String> SELECT_EXPRESSIONS = new ArrayList();
    @Argument(fullName="excludeNonVariants", shortName="env", doc="Don't include loci found to be non-variant after the subsetting procedure", required=false)
    protected boolean EXCLUDE_NON_VARIANTS = false;
    @Argument(fullName="excludeFiltered", shortName="ef", doc="Don't include filtered loci in the analysis", required=false)
    protected boolean EXCLUDE_FILTERED = false;
    @Argument(fullName="restrictAllelesTo", shortName="restrictAllelesTo", doc="Select only variants of a particular allelicity. Valid options are ALL (default), MULTIALLELIC or BIALLELIC", required=false)
    private NumberAlleleRestriction alleleRestriction = NumberAlleleRestriction.ALL;
    @Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Store the original AC, AF, and AN values in the INFO field after selecting (using keys AC_Orig, AF_Orig, and AN_Orig)", required=false)
    private boolean KEEP_ORIGINAL_CHR_COUNTS = false;
    @Argument(fullName="mendelianViolation", shortName="mv", doc="output mendelian violation sites only", required=false)
    private Boolean MENDELIAN_VIOLATIONS = false;
    @Argument(fullName="mendelianViolationQualThreshold", shortName="mvq", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
    protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 0.0;
    @Argument(fullName="select_random_fraction", shortName="fraction", doc="Selects a fraction (a number between 0 and 1) of the total variants at random from the variant track", required=false)
    protected double fractionRandom = 0.0;
    @Argument(fullName="remove_fraction_genotypes", shortName="fractionGenotypes", doc="Selects a fraction (a number between 0 and 1) of the total genotypes at random from the variant track and sets them to nocall", required=false)
    protected double fractionGenotypes = 0.0;
    @Argument(fullName="selectTypeToInclude", shortName="selectType", doc="Select only a certain type of variants from the input file. Valid types are INDEL, SNP, MIXED, MNP, SYMBOLIC, NO_VARIATION. Can be specified multiple times", required=false)
    private List<VariantContext.Type> TYPES_TO_INCLUDE = new ArrayList<VariantContext.Type>();
    @Argument(fullName="keepIDs", shortName="IDs", doc="Only emit sites whose ID is found in this file (one ID per line)", required=false)
    private File rsIDFile = null;
    @Hidden
    @Argument(fullName="fullyDecode", doc="If true, the incoming VariantContext will be fully decoded", required=false)
    private boolean fullyDecode = false;
    @Hidden
    @Argument(fullName="forceGenotypesDecode", doc="If true, the incoming VariantContext will have its genotypes forcibly decoded by computing AC across all genotypes.  For efficiency testing only", required=false)
    private boolean forceGenotypesDecode = false;
    @Hidden
    @Argument(fullName="justRead", doc="If true, we won't actually write the output file.  For efficiency testing only", required=false)
    private boolean justRead = false;
    @Argument(doc="indel size select", required=false, fullName="maxIndelSize")
    private int maxIndelSize = Integer.MAX_VALUE;
    @Argument(doc="Allow a samples other than those in the VCF to be specified on the command line. These samples will be ignored.", required=false, fullName="ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES")
    private boolean ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES = false;
    private ArrayList<VariantContext.Type> selectedTypes = new ArrayList();
    private ArrayList<String> selectNames = new ArrayList();
    private List<VariantContextUtils.JexlVCMatchExp> jexls = null;
    private TreeSet<String> samples = new TreeSet();
    private boolean NO_SAMPLES_SPECIFIED = false;
    private boolean DISCORDANCE_ONLY = false;
    private boolean CONCORDANCE_ONLY = false;
    private MendelianViolation mv;
    private boolean SELECT_RANDOM_FRACTION = false;
    private Random randomGenotypes = new Random();
    private Set<String> IDsToKeep = null;
    private Map<String, VCFHeader> vcfRods;

    @Override
    public void initialize() {
        List<String> rodNames = Arrays.asList(this.variantCollection.variants.getName());
        this.vcfRods = GATKVCFUtils.getVCFHeadersFromRods(this.getToolkit(), rodNames);
        TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(this.vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
        Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(this.sampleFiles);
        Collection<String> samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, this.sampleExpressions);
        HashSet<String> commandLineUniqueSamples = new HashSet<String>(samplesFromFile.size() + samplesFromExpressions.size() + this.sampleNames.size());
        commandLineUniqueSamples.addAll(samplesFromFile);
        commandLineUniqueSamples.addAll(samplesFromExpressions);
        commandLineUniqueSamples.addAll(this.sampleNames);
        commandLineUniqueSamples.removeAll(vcfSamples);
        this.samples.addAll(this.sampleNames);
        this.samples.addAll(samplesFromExpressions);
        this.samples.addAll(samplesFromFile);
        logger.debug(Utils.join(",", commandLineUniqueSamples));
        if (commandLineUniqueSamples.size() > 0 && this.ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES) {
            logger.warn("Samples present on command line input that are not present in the VCF. These samples will be ignored.");
            this.samples.removeAll(commandLineUniqueSamples);
        } else if (commandLineUniqueSamples.size() > 0) {
            throw new UserException.BadInput(String.format("%s%n%n%s%n%n%s%n%n%s", "Samples entered on command line (through -sf or -sn) that are not present in the VCF.", "A list of these samples:", Utils.join(",", commandLineUniqueSamples), "To ignore these samples, run with --ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES"));
        }
        if (this.samples.isEmpty()) {
            this.samples.addAll(vcfSamples);
            this.NO_SAMPLES_SPECIFIED = true;
        }
        Collection<String> XLsamplesFromFile = SampleUtils.getSamplesFromFiles(this.XLsampleFiles);
        this.samples.removeAll(XLsamplesFromFile);
        this.samples.removeAll(this.XLsampleNames);
        boolean bl = this.NO_SAMPLES_SPECIFIED = this.NO_SAMPLES_SPECIFIED && this.XLsampleNames.isEmpty();
        if (this.samples.size() == 0 && !this.NO_SAMPLES_SPECIFIED) {
            throw new UserException("All samples requested to be included were also requested to be excluded.");
        }
        if (!this.NO_SAMPLES_SPECIFIED) {
            for (String sample : this.samples) {
                logger.info("Including sample '" + sample + "'");
            }
        }
        if (this.TYPES_TO_INCLUDE.isEmpty()) {
            for (VariantContext.Type t : VariantContext.Type.values()) {
                this.selectedTypes.add(t);
            }
        } else {
            for (VariantContext.Type t : this.TYPES_TO_INCLUDE) {
                this.selectedTypes.add(t);
            }
        }
        Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(this.vcfRods.values(), true);
        headerLines.add(new VCFHeaderLine("source", "SelectVariants"));
        if (this.KEEP_ORIGINAL_CHR_COUNTS) {
            headerLines.add(new VCFInfoHeaderLine("AC_Orig", 1, VCFHeaderLineType.Integer, "Original AC"));
            headerLines.add(new VCFInfoHeaderLine("AF_Orig", 1, VCFHeaderLineType.Float, "Original AF"));
            headerLines.add(new VCFInfoHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN"));
        }
        headerLines.addAll(Arrays.asList(ChromosomeCountConstants.descriptions));
        headerLines.add(VCFStandardHeaderLines.getInfoLine("DP"));
        for (int i = 0; i < this.SELECT_EXPRESSIONS.size(); ++i) {
            this.selectNames.add(String.format("select-%d", i));
        }
        this.jexls = VariantContextUtils.initializeMatchExps(this.selectNames, this.SELECT_EXPRESSIONS);
        this.DISCORDANCE_ONLY = this.discordanceTrack.isBound();
        if (this.DISCORDANCE_ONLY) {
            logger.info("Selecting only variants discordant with the track: " + this.discordanceTrack.getName());
        }
        this.CONCORDANCE_ONLY = this.concordanceTrack.isBound();
        if (this.CONCORDANCE_ONLY) {
            logger.info("Selecting only variants concordant with the track: " + this.concordanceTrack.getName());
        }
        if (this.MENDELIAN_VIOLATIONS.booleanValue()) {
            this.mv = new MendelianViolation(this.MENDELIAN_VIOLATION_QUAL_THRESHOLD, false, true);
        }
        boolean bl2 = this.SELECT_RANDOM_FRACTION = this.fractionRandom > 0.0;
        if (this.SELECT_RANDOM_FRACTION) {
            logger.info("Selecting approximately " + 100.0 * this.fractionRandom + "% of the variants at random from the variant track");
        }
        if (this.rsIDFile != null) {
            this.IDsToKeep = new HashSet<String>();
            try {
                for (String line : new XReadLines(this.rsIDFile).readLines()) {
                    this.IDsToKeep.add(line.trim());
                }
                logger.info("Selecting only variants with one of " + this.IDsToKeep.size() + " IDs from " + this.rsIDFile);
            }
            catch (FileNotFoundException e) {
                throw new UserException.CouldNotReadInputFile(this.rsIDFile, (Exception)e);
            }
        }
        this.vcfWriter.writeHeader(new VCFHeader(headerLines, this.samples));
    }

    @Override
    public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
        if (tracker == null) {
            return 0;
        }
        List<VariantContext> vcs = tracker.getValues(this.variantCollection.variants, context.getLocation());
        if (vcs == null || vcs.size() == 0) {
            return 0;
        }
        for (VariantContext vc : vcs) {
            List<VariantContext> compVCs;
            if (this.fullyDecode) {
                vc = vc.fullyDecode(this.vcfRods.get(vc.getSource()), this.getToolkit().lenientVCFProcessing());
            }
            if (this.forceGenotypesDecode) {
                int x = vc.getCalledChrCount();
            }
            if (this.IDsToKeep != null && !this.IDsToKeep.contains(vc.getID())) continue;
            if (this.MENDELIAN_VIOLATIONS.booleanValue() && this.mv.countViolations(this.getSampleDB().getFamilies(this.samples), vc) < 1) break;
            if (this.DISCORDANCE_ONLY && !this.isDiscordant(vc, compVCs = tracker.getValues(this.discordanceTrack, context.getLocation())) || this.CONCORDANCE_ONLY && !this.isConcordant(vc, compVCs = tracker.getValues(this.concordanceTrack, context.getLocation())) || this.alleleRestriction.equals((Object)NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic() || this.alleleRestriction.equals((Object)NumberAlleleRestriction.MULTIALLELIC) && vc.isBiallelic() || !this.selectedTypes.contains((Object)vc.getType()) || this.badIndelSize(vc)) continue;
            VariantContext sub = this.subsetRecord(vc, this.EXCLUDE_NON_VARIANTS);
            if (this.EXCLUDE_NON_VARIANTS && !sub.isPolymorphicInSamples() || this.EXCLUDE_FILTERED && sub.isFiltered()) continue;
            boolean failedJexlMatch = false;
            for (VariantContextUtils.JexlVCMatchExp jexl : this.jexls) {
                if (VariantContextUtils.match(sub, jexl)) continue;
                failedJexlMatch = true;
                break;
            }
            if (failedJexlMatch || this.justRead || this.SELECT_RANDOM_FRACTION && !(GenomeAnalysisEngine.getRandomGenerator().nextDouble() < this.fractionRandom)) continue;
            this.vcfWriter.add(sub);
        }
        return 1;
    }

    private boolean badIndelSize(VariantContext vc) {
        List<Integer> lengths = vc.getIndelLengths();
        if (lengths == null) {
            return false;
        }
        for (Integer indelLength : vc.getIndelLengths()) {
            if (indelLength <= this.maxIndelSize) continue;
            return true;
        }
        return false;
    }

    private boolean isDiscordant(VariantContext vc, Collection<VariantContext> compVCs) {
        if (vc == null) {
            return false;
        }
        if (this.NO_SAMPLES_SPECIFIED) {
            return compVCs == null || compVCs.isEmpty();
        }
        GenotypesContext genotypes = vc.getGenotypes((Set<String>)this.samples);
        for (Genotype g : genotypes) {
            if (!this.sampleHasVariant(g)) continue;
            if (compVCs == null) {
                return true;
            }
            boolean foundVariant = false;
            for (VariantContext compVC : compVCs) {
                if (!this.haveSameGenotypes(g, compVC.getGenotype(g.getSampleName()))) continue;
                foundVariant = true;
                break;
            }
            if (foundVariant) continue;
            return true;
        }
        return false;
    }

    private boolean isConcordant(VariantContext vc, Collection<VariantContext> compVCs) {
        if (vc == null || compVCs == null || compVCs.isEmpty()) {
            return false;
        }
        if (this.NO_SAMPLES_SPECIFIED) {
            return true;
        }
        Set<String> variantSamples = vc.getSampleNames();
        variantSamples.retainAll(this.samples);
        for (String sample : variantSamples) {
            boolean foundSample = false;
            for (VariantContext compVC : compVCs) {
                Genotype compG;
                Genotype varG = vc.getGenotype(sample);
                if (!this.haveSameGenotypes(varG, compG = compVC.getGenotype(sample))) continue;
                foundSample = true;
                break;
            }
            if (foundSample) continue;
            return false;
        }
        return true;
    }

    private boolean sampleHasVariant(Genotype g) {
        return g != null && !g.isHomRef() && (g.isCalled() || g.isFiltered() && !this.EXCLUDE_FILTERED);
    }

    private boolean haveSameGenotypes(Genotype g1, Genotype g2) {
        List<Allele> a2s;
        if (g1 == null || g2 == null) {
            return false;
        }
        if (g1.isCalled() && g2.isFiltered() || g2.isCalled() && g1.isFiltered() || g1.isFiltered() && g2.isFiltered() && this.EXCLUDE_FILTERED) {
            return false;
        }
        List<Allele> a1s = g1.getAlleles();
        return a1s.containsAll(a2s = g2.getAlleles()) && a2s.containsAll(a1s);
    }

    @Override
    public Integer reduceInit() {
        return 0;
    }

    @Override
    public Integer reduce(Integer value, Integer sum) {
        return value + sum;
    }

    @Override
    public Integer treeReduce(Integer lhs, Integer rhs) {
        return lhs + rhs;
    }

    @Override
    public void onTraversalDone(Integer result) {
        logger.info(result + " records processed.");
    }

    private VariantContext subsetRecord(VariantContext vc, boolean excludeNonVariants) {
        if (this.NO_SAMPLES_SPECIFIED || this.samples.isEmpty()) {
            return vc;
        }
        VariantContext sub = vc.subContextFromSamples(this.samples, excludeNonVariants);
        VariantContextBuilder builder = new VariantContextBuilder(sub);
        GenotypesContext newGC = sub.getGenotypes();
        if (vc.getAlleles().size() != sub.getAlleles().size()) {
            newGC = GATKVariantContextUtils.stripPLsAndAD(sub.getGenotypes());
        }
        if (vc.getNSamples() != sub.getNSamples()) {
            builder.rmAttribute("MLEAC");
            builder.rmAttribute("MLEAF");
        }
        if (this.fractionGenotypes > 0.0) {
            ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
            for (Genotype genotype : newGC) {
                if (this.fractionGenotypes > 0.0 && this.randomGenotypes.nextDouble() < this.fractionGenotypes) {
                    List<Allele> alleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
                    genotypes.add(new GenotypeBuilder(genotype).alleles(alleles).noGQ().make());
                    continue;
                }
                genotypes.add(genotype);
            }
            newGC = GenotypesContext.create(genotypes);
        }
        builder.genotypes(newGC);
        this.addAnnotations(builder, sub);
        return builder.make();
    }

    private void addAnnotations(VariantContextBuilder builder, VariantContext originalVC) {
        if (this.fullyDecode) {
            return;
        }
        if (this.KEEP_ORIGINAL_CHR_COUNTS) {
            if (originalVC.hasAttribute("AC")) {
                builder.attribute("AC_Orig", originalVC.getAttribute("AC"));
            }
            if (originalVC.hasAttribute("AF")) {
                builder.attribute("AF_Orig", originalVC.getAttribute("AF"));
            }
            if (originalVC.hasAttribute("AN")) {
                builder.attribute("AN_Orig", originalVC.getAttribute("AN"));
            }
        }
        VariantContextUtils.calculateChromosomeCounts(builder, false);
        boolean sawDP = false;
        int depth = 0;
        for (String sample : originalVC.getSampleNames()) {
            Genotype g = originalVC.getGenotype(sample);
            if (g.isFiltered() || !g.hasDP()) continue;
            depth += g.getDP();
            sawDP = true;
        }
        if (sawDP) {
            builder.attribute("DP", depth);
        }
    }

    public static enum NumberAlleleRestriction {
        ALL,
        BIALLELIC,
        MULTIALLELIC;

    }
}

