/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;

import java.util.HashMap;
import java.util.LinkedList;
import java.util.SortedSet;
import java.util.TreeSet;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMProgramRecord;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.SequenceUtil;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
import org.broadinstitute.sting.gatk.filters.BadCigarFilter;
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckFilter;
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.Downsample;
import org.broadinstitute.sting.gatk.walkers.PartitionBy;
import org.broadinstitute.sting.gatk.walkers.PartitionType;
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.compression.reducereads.MultiSampleCompressor;
import org.broadinstitute.sting.gatk.walkers.compression.reducereads.ReduceReadsStash;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.BySampleSAMFileWriter;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;

@DocumentedGATKFeature(groupName="Sequence Data Processing Tools", extraDocs={CommandLineGATK.class})
@PartitionBy(value=PartitionType.CONTIG)
@ReadFilters(value={UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, BadCigarFilter.class})
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=40)
public class ReduceReads
extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceReadsStash> {
    @Output
    private StingSAMFileWriter out = null;
    private SAMFileWriter writerToUse = null;
    @Argument(fullName="context_size", shortName="cs", doc="", required=false)
    private int contextSize = 10;
    @Argument(fullName="minimum_mapping_quality", shortName="minmap", doc="", required=false)
    private int minMappingQuality = 20;
    @Argument(fullName="minimum_base_quality_to_consider", shortName="minqual", doc="", required=false)
    private byte minBaseQual = (byte)20;
    @Argument(fullName="minimum_tail_qualities", shortName="mintail", doc="", required=false)
    private byte minTailQuality = (byte)2;
    @Argument(fullName="allow_polyploid_reduction", shortName="polyploid", doc="", required=false)
    private boolean USE_POLYPLOID_REDUCTION = false;
    @Argument(fullName="dont_simplify_reads", shortName="nosimplify", doc="", required=false)
    private boolean DONT_SIMPLIFY_READS = false;
    @Argument(fullName="dont_hardclip_adaptor_sequences", shortName="noclip_ad", doc="", required=false)
    private boolean DONT_CLIP_ADAPTOR_SEQUENCES = false;
    @Argument(fullName="dont_hardclip_low_qual_tails", shortName="noclip_tail", doc="", required=false)
    private boolean DONT_CLIP_LOW_QUAL_TAILS = false;
    @Argument(fullName="dont_use_softclipped_bases", shortName="no_soft", doc="", required=false)
    private boolean DONT_USE_SOFTCLIPPED_BASES = false;
    @Argument(fullName="dont_compress_read_names", shortName="nocmp_names", doc="", required=false)
    private boolean DONT_COMPRESS_READ_NAMES = false;
    @Argument(fullName="hard_clip_to_interval", shortName="clip_int", doc="", required=false)
    private boolean HARD_CLIP_TO_INTERVAL = false;
    @Argument(fullName="minimum_alt_proportion_to_trigger_variant", shortName="minvar", doc="", required=false)
    private double minAltProportionToTriggerVariant = 0.05;
    @Argument(fullName="minimum_del_proportion_to_trigger_variant", shortName="mindel", doc="", required=false)
    private double minIndelProportionToTriggerVariant = 0.05;
    @Argument(fullName="downsample_coverage", shortName="ds", doc="", required=false)
    private int downsampleCoverage = 250;
    @Hidden
    @Argument(fullName="nwayout", shortName="nw", doc="", required=false)
    private boolean nwayout = false;
    @Hidden
    @Argument(fullName="", shortName="dl", doc="", required=false)
    private int debugLevel = 0;
    @Hidden
    @Argument(fullName="", shortName="dr", doc="", required=false)
    private String debugRead = "";
    @Hidden
    @Argument(fullName="downsample_strategy", shortName="dm", doc="", required=false)
    private DownsampleStrategy downsampleStrategy = DownsampleStrategy.Normal;
    @Hidden
    @Argument(fullName="no_pg_tag", shortName="npt", doc="", required=false)
    private boolean NO_PG_TAG = false;
    int nCompressedReads = 0;
    HashMap<String, Long> readNameHash;
    Long nextReadNumber = 1L;
    SortedSet<GenomeLoc> intervalList;
    public static final String PROGRAM_RECORD_NAME = "GATK ReduceReads";
    private static final String PROGRAM_FILENAME_EXTENSION = ".reduced.bam";

    @Override
    public void initialize() {
        super.initialize();
        GenomeAnalysisEngine toolkit = this.getToolkit();
        this.readNameHash = new HashMap();
        this.intervalList = new TreeSet<GenomeLoc>();
        if (toolkit.getIntervals() != null) {
            this.intervalList.addAll(toolkit.getIntervals());
        }
        boolean preSorted = true;
        boolean indexOnTheFly = true;
        boolean keep_records = true;
        SAMFileHeader.SortOrder sortOrder = SAMFileHeader.SortOrder.coordinate;
        if (this.nwayout) {
            SAMProgramRecord programRecord = this.NO_PG_TAG ? null : Utils.createProgramRecord(toolkit, this, PROGRAM_RECORD_NAME);
            this.writerToUse = new BySampleSAMFileWriter(toolkit, PROGRAM_FILENAME_EXTENSION, sortOrder, true, true, this.NO_PG_TAG, programRecord, true);
        } else {
            this.writerToUse = this.out;
            this.out.setPresorted(false);
            if (!this.NO_PG_TAG) {
                Utils.setupWriter(this.out, toolkit, toolkit.getSAMFileHeader(), false, true, this, PROGRAM_RECORD_NAME);
            }
        }
    }

    @Override
    public LinkedList<GATKSAMRecord> map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
        LinkedList<Object> mappedReads;
        if (!this.debugRead.isEmpty() && read.getReadName().contains(this.debugRead)) {
            System.out.println("Found debug read!");
        }
        if (this.debugLevel == 1) {
            System.out.printf("\nOriginal: %s %s %d %d\n", read, read.getCigar(), read.getAlignmentStart(), read.getAlignmentEnd());
        }
        read.setTemporaryAttribute("OP", read.getAlignmentStart());
        read.setTemporaryAttribute("OE", read.getAlignmentEnd());
        int chromosomeLength = ref.getGenomeLocParser().getContigInfo(read.getReferenceName()).getSequenceLength();
        if (read.getSoftStart() < 0) {
            read = ReadClipper.hardClipByReadCoordinates(read, 0, -read.getSoftStart());
        }
        if (read.getSoftEnd() > chromosomeLength) {
            read = ReadClipper.hardClipByReadCoordinates(read, chromosomeLength - read.getSoftStart() + 1, read.getReadLength() - 1);
        }
        if (!this.DONT_SIMPLIFY_READS) {
            read.simplify();
        }
        if (!this.DONT_CLIP_ADAPTOR_SEQUENCES) {
            read = ReadClipper.hardClipAdaptorSequence(read);
        }
        if (!this.DONT_CLIP_LOW_QUAL_TAILS) {
            read = ReadClipper.hardClipLowQualEnds(read, this.minTailQuality);
        }
        if (!this.isWholeGenome()) {
            if (this.HARD_CLIP_TO_INTERVAL) {
                mappedReads = this.hardClipReadToInterval(read);
            } else {
                mappedReads = new LinkedList();
                mappedReads.add(read);
            }
        } else {
            mappedReads = new LinkedList();
            if (!read.isEmpty()) {
                mappedReads.add(read);
            }
        }
        if (!mappedReads.isEmpty() && !this.DONT_USE_SOFTCLIPPED_BASES) {
            LinkedList<GATKSAMRecord> tempList = new LinkedList<GATKSAMRecord>();
            for (GATKSAMRecord mRead : mappedReads) {
                GATKSAMRecord clippedRead = ReadClipper.hardClipLowQualitySoftClips(mRead, this.minBaseQual);
                if (clippedRead.isEmpty()) continue;
                tempList.add(clippedRead);
            }
            mappedReads = tempList;
        }
        if (this.debugLevel == 1) {
            for (GATKSAMRecord mappedRead : mappedReads) {
                System.out.printf("MAPPED: %s %d %d\n", mappedRead.getCigar(), mappedRead.getAlignmentStart(), mappedRead.getAlignmentEnd());
            }
        }
        return mappedReads;
    }

    @Override
    public ReduceReadsStash reduceInit() {
        return new ReduceReadsStash(new MultiSampleCompressor(this.getToolkit().getSAMFileHeader(), this.contextSize, this.downsampleCoverage, this.minMappingQuality, this.minAltProportionToTriggerVariant, this.minIndelProportionToTriggerVariant, this.minBaseQual, this.downsampleStrategy, this.USE_POLYPLOID_REDUCTION));
    }

    @Override
    public ReduceReadsStash reduce(LinkedList<GATKSAMRecord> mappedReads, ReduceReadsStash stash) {
        if (this.debugLevel == 1) {
            stash.print();
        }
        boolean firstRead = true;
        for (GATKSAMRecord read : mappedReads) {
            boolean originalRead;
            boolean bl = originalRead = firstRead && this.isOriginalRead(mappedReads, read);
            if (read.getReadLength() == 0) {
                throw new ReviewedStingException("Empty read sent to reduce, this should never happen! " + read.getReadName() + " -- " + read.getCigar() + " -- " + read.getReferenceName() + ":" + read.getAlignmentStart() + "-" + read.getAlignmentEnd());
            }
            if (originalRead) {
                LinkedList<GATKSAMRecord> readsReady = new LinkedList<GATKSAMRecord>();
                readsReady.addAll(stash.getAllReadsBefore(read));
                readsReady.add(read);
                for (GATKSAMRecord readReady : readsReady) {
                    if (this.debugLevel == 1) {
                        System.out.println("REDUCE: " + readReady.getCigar() + " " + readReady.getAlignmentStart() + " " + readReady.getAlignmentEnd());
                    }
                    for (GATKSAMRecord compressedRead : stash.compress(readReady)) {
                        this.outputRead(compressedRead);
                    }
                }
            } else {
                stash.add(read);
            }
            firstRead = false;
        }
        return stash;
    }

    @Override
    public void onTraversalDone(ReduceReadsStash stash) {
        for (GATKSAMRecord read : stash.close()) {
            this.outputRead(read);
        }
        if (this.nwayout) {
            this.writerToUse.close();
        }
    }

    private LinkedList<GATKSAMRecord> hardClipReadToInterval(GATKSAMRecord read) {
        LinkedList<GATKSAMRecord> clippedReads = new LinkedList<GATKSAMRecord>();
        GenomeLoc intervalOverlapped = null;
        boolean originalRead = true;
        if (this.isWholeGenome()) {
            clippedReads.add(read);
        }
        for (GenomeLoc interval : this.intervalList) {
            boolean doneClipping;
            boolean overlap;
            if (read.isEmpty()) break;
            GATKSAMRecord clippedRead = null;
            switch (ReadUtils.getReadAndIntervalOverlapType(read, interval)) {
                case NO_OVERLAP_RIGHT: {
                    if (!originalRead) {
                        throw new ReviewedStingException("tail of the read should never NO_OVERLAP_RIGHT the following interval. " + read.getReadName() + " -- " + read.getReferenceName() + ":" + read.getAlignmentStart() + "-" + read.getAlignmentEnd() + " x " + interval.getLocation().toString());
                    }
                    overlap = false;
                    doneClipping = false;
                    break;
                }
                case NO_OVERLAP_HARDCLIPPED_RIGHT: {
                    if (originalRead) {
                        overlap = true;
                        clippedRead = GATKSAMRecord.emptyRead(read);
                    } else {
                        overlap = false;
                    }
                    doneClipping = false;
                    break;
                }
                case NO_OVERLAP_CONTIG: {
                    if (originalRead) {
                        if (read.getReferenceIndex() < interval.getContigIndex()) {
                            throw new ReviewedStingException("read is behind interval list. (contig) " + read.getReadName() + " -- " + read.getReferenceName() + ":" + read.getAlignmentStart() + "-" + read.getAlignmentEnd() + " x " + interval.getLocation().toString());
                        }
                        overlap = false;
                        doneClipping = false;
                        break;
                    }
                    if (read.getReferenceIndex() < interval.getContigIndex()) {
                        overlap = false;
                        doneClipping = true;
                        break;
                    }
                    throw new ReviewedStingException("Tail read is in bigger contig than interval traversal. " + read.getReadName() + " -- " + read.getReferenceName() + ":" + read.getAlignmentStart() + "-" + read.getAlignmentEnd() + " x " + interval.getLocation().toString());
                }
                case NO_OVERLAP_LEFT: {
                    if (originalRead) {
                        throw new ReviewedStingException("original read cannot be behind the first interval. (position) " + read.getReadName() + " -- " + read.getReferenceName() + ":" + read.getAlignmentStart() + "-" + read.getAlignmentEnd() + " x " + interval.getLocation().toString());
                    }
                    overlap = false;
                    doneClipping = true;
                    break;
                }
                case NO_OVERLAP_HARDCLIPPED_LEFT: {
                    overlap = originalRead;
                    doneClipping = true;
                    break;
                }
                case OVERLAP_LEFT: {
                    clippedRead = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, interval.getStart() - 1);
                    overlap = true;
                    doneClipping = true;
                    break;
                }
                case OVERLAP_RIGHT: {
                    clippedRead = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, interval.getStop() + 1);
                    read = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, interval.getStop());
                    overlap = true;
                    doneClipping = false;
                    break;
                }
                case OVERLAP_LEFT_AND_RIGHT: {
                    clippedRead = ReadClipper.hardClipBothEndsByReferenceCoordinates(read, interval.getStart() - 1, interval.getStop() + 1);
                    read = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, interval.getStop());
                    overlap = true;
                    doneClipping = false;
                    break;
                }
                case OVERLAP_CONTAINED: {
                    clippedRead = read;
                    overlap = true;
                    doneClipping = true;
                    break;
                }
                default: {
                    throw new ReviewedStingException("interval overlap returned an unknown / unhandled state. If new state was added to intervalOverlap, it should be handled by hardClipReadToInterval.");
                }
            }
            if (overlap && originalRead) {
                intervalOverlapped = interval;
            }
            if (clippedRead != null) {
                originalRead = false;
                if (!clippedRead.isEmpty()) {
                    clippedReads.add(clippedRead);
                }
            }
            if (!doneClipping) continue;
            break;
        }
        if (intervalOverlapped != null) {
            this.intervalList = this.intervalList.tailSet(intervalOverlapped);
        }
        return clippedReads;
    }

    private void outputRead(GATKSAMRecord read) {
        if (this.debugLevel == 2) {
            this.checkForHighMismatch(read);
            this.checkCigar(read);
        }
        if (read.isReducedRead()) {
            ++this.nCompressedReads;
        } else {
            int originalAlignmentStart = (Integer)read.getTemporaryAttribute("OP");
            int originalAlignmentEnd = (Integer)read.getTemporaryAttribute("OE");
            int startShift = originalAlignmentStart - read.getUnclippedStart();
            int endShift = read.getUnclippedEnd() - originalAlignmentEnd;
            if (startShift > 0) {
                read.setAttribute("OP", (Object)startShift);
            }
            if (endShift > 0) {
                read.setAttribute("OE", (Object)endShift);
            }
        }
        if (this.debugLevel == 1) {
            System.out.println("BAM: " + read.getCigar() + " " + read.getAlignmentStart() + " " + read.getAlignmentEnd());
        }
        if (!this.DONT_COMPRESS_READ_NAMES) {
            this.compressReadName(read);
        }
        this.writerToUse.addAlignment(read);
    }

    private void checkForHighMismatch(GATKSAMRecord read) {
        int readLen;
        int start = read.getAlignmentStart();
        int stop = read.getAlignmentEnd();
        byte[] ref = this.getToolkit().getReferenceDataSource().getReference().getSubsequenceAt(read.getReferenceName(), start, stop).getBases();
        int nm = SequenceUtil.countMismatches((SAMRecord)read, ref, start - 1);
        double nmFraction = (double)nm / (1.0 * (double)(readLen = read.getReadLength()));
        if (nmFraction > 0.4 && readLen > 20 && read.getAttribute("RR") != null && read.getReadName().startsWith("Consensus")) {
            throw new ReviewedStingException("BUG: High mismatch fraction found in read " + read.getReadName() + " position: " + read.getReferenceName() + ":" + read.getAlignmentStart() + "-" + read.getAlignmentEnd());
        }
    }

    private void checkCigar(GATKSAMRecord read) {
        if (read.getCigar().isValid(null, -1L) != null) {
            throw new ReviewedStingException("BUG: cigar string is not valid: " + read.getCigarString());
        }
    }

    private void compressReadName(GATKSAMRecord read) {
        String name = read.getReadName();
        String compressedName = read.isReducedRead() ? "C" : "";
        Long readNumber = this.readNameHash.get(name);
        if (readNumber != null) {
            compressedName = compressedName + readNumber.toString();
        } else {
            this.readNameHash.put(name, this.nextReadNumber);
            compressedName = compressedName + this.nextReadNumber.toString();
            Long l = this.nextReadNumber;
            Long l2 = this.nextReadNumber = Long.valueOf(this.nextReadNumber + 1L);
        }
        read.setReadName(compressedName);
    }

    private boolean isOriginalRead(LinkedList<GATKSAMRecord> list, GATKSAMRecord read) {
        return this.isWholeGenome() || list.getFirst().equals(read);
    }

    private boolean isWholeGenome() {
        return this.intervalList.isEmpty();
    }

    public static enum DownsampleStrategy {
        Normal,
        Adaptive;

    }
}

