/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.sting.gatk.downsampling;

import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.StringTokenizer;
import net.sf.samtools.SAMReadGroupRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.collections.DefaultHashMap;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.variant.variantcontext.Allele;

public class AlleleBiasedDownsamplingUtils {
    public static ReadBackedPileup createAlleleBiasedBasePileup(ReadBackedPileup pileup, double downsamplingFraction, PrintStream log) {
        int i;
        if (downsamplingFraction <= 0.0) {
            return pileup;
        }
        if (downsamplingFraction >= 1.0) {
            return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>());
        }
        ArrayList[] alleleStratifiedElements = new ArrayList[4];
        for (int i2 = 0; i2 < 4; ++i2) {
            alleleStratifiedElements[i2] = new ArrayList();
        }
        for (PileupElement pe : pileup) {
            int baseIndex;
            if (pe.getRead().isReducedRead() || (baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase())) == -1) continue;
            alleleStratifiedElements[baseIndex].add(pe);
        }
        int[] alleleCounts = new int[4];
        for (int i3 = 0; i3 < 4; ++i3) {
            alleleCounts[i3] = alleleStratifiedElements[i3].size();
        }
        int numReadsToRemove = (int)((double)pileup.getNumberOfElements() * downsamplingFraction);
        int[] targetAlleleCounts = AlleleBiasedDownsamplingUtils.runSmartDownsampling(alleleCounts, numReadsToRemove);
        HashSet readsToRemove = new HashSet(numReadsToRemove);
        for (i = 0; i < 4; ++i) {
            ArrayList alleleList = alleleStratifiedElements[i];
            if (alleleList.size() <= targetAlleleCounts[i]) continue;
            readsToRemove.addAll(AlleleBiasedDownsamplingUtils.downsampleElements(alleleList, alleleList.size() - targetAlleleCounts[i], log));
        }
        for (i = 0; i < 4; ++i) {
            alleleStratifiedElements[i].clear();
        }
        ArrayList<PileupElement> readsToKeep = new ArrayList<PileupElement>(pileup.getNumberOfElements() - numReadsToRemove);
        for (PileupElement pe : pileup) {
            if (readsToRemove.contains(pe)) continue;
            readsToKeep.add(pe);
        }
        return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>(readsToKeep));
    }

    private static int scoreAlleleCounts(int[] alleleCounts) {
        if (alleleCounts.length < 2) {
            return 0;
        }
        int[] alleleCountsCopy = (int[])alleleCounts.clone();
        Arrays.sort(alleleCountsCopy);
        int maxCount = alleleCountsCopy[alleleCounts.length - 1];
        int nextBestCount = alleleCountsCopy[alleleCounts.length - 2];
        int remainderCount = 0;
        for (int i = 0; i < alleleCounts.length - 2; ++i) {
            remainderCount += alleleCountsCopy[i];
        }
        return Math.min(maxCount - nextBestCount + remainderCount, Math.abs(nextBestCount + remainderCount));
    }

    protected static int[] runSmartDownsampling(int[] alleleCounts, int numReadsToRemove) {
        int numAlleles = alleleCounts.length;
        int maxScore = AlleleBiasedDownsamplingUtils.scoreAlleleCounts(alleleCounts);
        int[] alleleCountsOfMax = alleleCounts;
        int numReadsToRemovePerAllele = numReadsToRemove / 2;
        for (int i = 0; i < numAlleles; ++i) {
            for (int j = i; j < numAlleles; ++j) {
                int[] newCounts = (int[])alleleCounts.clone();
                if (i == j) {
                    newCounts[i] = Math.max(0, newCounts[i] - numReadsToRemove);
                } else {
                    newCounts[i] = Math.max(0, newCounts[i] - numReadsToRemovePerAllele);
                    newCounts[j] = Math.max(0, newCounts[j] - numReadsToRemovePerAllele);
                }
                int score = AlleleBiasedDownsamplingUtils.scoreAlleleCounts(newCounts);
                if (score >= maxScore) continue;
                maxScore = score;
                alleleCountsOfMax = newCounts;
            }
        }
        return alleleCountsOfMax;
    }

    private static <T> List<T> downsampleElements(List<T> elements, int numElementsToRemove, PrintStream log) {
        ArrayList<T> elementsToRemove = new ArrayList<T>(numElementsToRemove);
        if (numElementsToRemove == 0) {
            return elementsToRemove;
        }
        int pileupSize = elements.size();
        if (numElementsToRemove == pileupSize) {
            AlleleBiasedDownsamplingUtils.logAllElements(elements, log);
            elementsToRemove.addAll(elements);
            return elementsToRemove;
        }
        BitSet itemsToRemove = new BitSet(pileupSize);
        for (Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(pileupSize, numElementsToRemove)) {
            itemsToRemove.set(selectedIndex);
        }
        for (int i = 0; i < pileupSize; ++i) {
            if (!itemsToRemove.get(i)) continue;
            T element = elements.get(i);
            AlleleBiasedDownsamplingUtils.logElement(element, log);
            elementsToRemove.add(element);
        }
        return elementsToRemove;
    }

    public static List<GATKSAMRecord> selectAlleleBiasedReads(Map<Allele, List<GATKSAMRecord>> alleleReadMap, double downsamplingFraction, PrintStream log) {
        int totalReads = 0;
        for (List<GATKSAMRecord> reads : alleleReadMap.values()) {
            totalReads += reads.size();
        }
        int numReadsToRemove = (int)((double)totalReads * downsamplingFraction);
        ArrayList<Allele> alleles = new ArrayList<Allele>(alleleReadMap.keySet());
        alleles.remove(Allele.NO_CALL);
        int numAlleles = alleles.size();
        int[] alleleCounts = new int[numAlleles];
        for (int i = 0; i < numAlleles; ++i) {
            alleleCounts[i] = alleleReadMap.get(alleles.get(i)).size();
        }
        int[] targetAlleleCounts = AlleleBiasedDownsamplingUtils.runSmartDownsampling(alleleCounts, numReadsToRemove);
        ArrayList<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>(numReadsToRemove);
        for (int i = 0; i < numAlleles; ++i) {
            List<GATKSAMRecord> alleleBin = alleleReadMap.get(alleles.get(i));
            if (alleleBin.size() <= targetAlleleCounts[i]) continue;
            readsToRemove.addAll(AlleleBiasedDownsamplingUtils.downsampleElements(alleleBin, alleleBin.size() - targetAlleleCounts[i], log));
        }
        return readsToRemove;
    }

    private static <T> void logAllElements(List<T> elements, PrintStream log) {
        if (log != null) {
            for (T obj : elements) {
                AlleleBiasedDownsamplingUtils.logElement(obj, log);
            }
        }
    }

    private static <T> void logElement(T obj, PrintStream log) {
        if (log != null) {
            GATKSAMRecord read = obj instanceof PileupElement ? ((PileupElement)obj).getRead() : (GATKSAMRecord)obj;
            GATKSAMReadGroupRecord readGroup = read.getReadGroup();
            log.println(String.format("%s\t%s\t%s\t%s", read.getReadName(), ((SAMReadGroupRecord)readGroup).getSample(), readGroup.getLibrary(), readGroup.getPlatformUnit()));
        }
    }

    public static DefaultHashMap<String, Double> loadContaminationFile(File ContaminationFractionFile, Double defaultContaminationFraction, Set<String> AvailableSampleIDs, Logger logger) throws StingException {
        DefaultHashMap<String, Double> sampleContamination = new DefaultHashMap<String, Double>(defaultContaminationFraction);
        HashSet nonSamplesInContaminationFile = new HashSet(sampleContamination.keySet());
        try {
            XReadLines reader = new XReadLines(ContaminationFractionFile, true);
            for (String line : reader) {
                if (line.length() == 0) continue;
                StringTokenizer st = new StringTokenizer(line);
                String[] fields = new String[2];
                try {
                    fields[0] = st.nextToken();
                    fields[1] = st.nextToken();
                }
                catch (NoSuchElementException e) {
                    throw new UserException.MalformedFile("Contamination file must have exactly two columns. Offending line:\n" + line);
                }
                if (st.hasMoreTokens()) {
                    throw new UserException.MalformedFile("Contamination file must have exactly two columns. Offending line:\n" + line);
                }
                if (fields[0].length() == 0 || fields[1].length() == 0) {
                    throw new UserException.MalformedFile("Contamination file can not have empty strings in either column. Offending line:\n" + line);
                }
                if (sampleContamination.containsKey(fields[0])) {
                    throw new UserException.MalformedFile("Contamination file contains duplicate entries for input name " + fields[0]);
                }
                try {
                    Double contamination = Double.valueOf(fields[1]);
                    if (contamination < 0.0 || contamination > 1.0) {
                        throw new UserException.MalformedFile("Contamination file contains unacceptable contamination value (must be 0<=x<=1): " + line);
                    }
                    if (AvailableSampleIDs == null || AvailableSampleIDs.contains(fields[0])) {
                        sampleContamination.put(fields[0], contamination);
                        continue;
                    }
                    nonSamplesInContaminationFile.add(fields[0]);
                }
                catch (NumberFormatException e) {
                    throw new UserException.MalformedFile("Contamination file contains unparsable double in the second field. Offending line: " + line);
                }
            }
            if (sampleContamination.size() > 0) {
                logger.info(String.format("The following samples were found in the Contamination file and will be processed at the contamination level therein: %s", sampleContamination.keySet().toString()));
                if (AvailableSampleIDs != null) {
                    HashSet<String> samplesNotInContaminationFile = new HashSet<String>(AvailableSampleIDs);
                    samplesNotInContaminationFile.removeAll(sampleContamination.keySet());
                    if (samplesNotInContaminationFile.size() > 0) {
                        logger.info(String.format("The following samples were NOT found in the Contamination file and will be processed at the default contamination level: %s", ((Object)samplesNotInContaminationFile).toString()));
                    }
                }
            }
            if (nonSamplesInContaminationFile.size() > 0) {
                logger.info(String.format("The following entries were found in the Contamination file but were not SAMPLEIDs. They will be ignored: %s", ((Object)nonSamplesInContaminationFile).toString()));
            }
            return sampleContamination;
        }
        catch (IOException e) {
            throw new StingException("I/O Error while reading sample-contamination file " + ContaminationFractionFile.getName() + ": " + e.getMessage());
        }
    }
}

