/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.sting.gatk.walkers.variantutils;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.broad.tribble.TribbleException;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.Reference;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.variant.variantcontext.Allele;
import org.broadinstitute.variant.variantcontext.Genotype;
import org.broadinstitute.variant.variantcontext.GenotypeLikelihoods;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
import org.broadinstitute.variant.vcf.VCFHeader;

@DocumentedGATKFeature(groupName="Variant Evaluation and Manipulation Tools", extraDocs={CommandLineGATK.class})
@Reference(window=@Window(start=0, stop=100))
public class VariantsToBinaryPed
extends RodWalker<Integer, Integer> {
    @ArgumentCollection
    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
    @ArgumentCollection
    protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
    @Input(shortName="m", fullName="metaData", required=true, doc="Sample metadata file. You may specify a .fam file (in which case it will be copied to the file you provide as fam output).")
    File metaDataFile;
    @Input(shortName="mode", fullName="outputMode", required=false, doc="The output file mode (SNP major or individual major)")
    OutputMode mode = OutputMode.INDIVIDUAL_MAJOR;
    @Output(shortName="bed", fullName="bed", required=true, doc="output ped file")
    PrintStream outBed;
    @Output(shortName="bim", fullName="bim", required=true, doc="output map file")
    PrintStream outBim;
    @Output(shortName="fam", fullName="fam", required=true, doc="output fam file")
    PrintStream outFam;
    @Argument(shortName="mgq", fullName="minGenotypeQuality", required=true, doc="If genotype quality is lower than this value, output NO_CALL")
    int minGenotypeQuality = 0;
    @Argument(fullName="majorAlleleFirst", required=false, doc="Sets the major allele to be 'reference' for the bim file, rather than the ref allele")
    boolean majorAlleleFirst = false;
    @Argument(fullName="checkAlternateAlleles", required=false, doc="Checks that alternate alleles actually appear in samples, erroring out if they do not")
    boolean checkAlternateAlleles = false;
    private static double APPROX_CM_PER_BP = 1.3333333333333333;
    private static final byte HOM_REF = 0;
    private static final byte HOM_VAR = 3;
    private static final byte HET = 2;
    private static final byte NO_CALL = 1;
    private static final int BUFFER_SIZE = 1000;
    private static final String PLINK_DELETION_MARKER = "-";
    private Map<String, OutputStream> printMap = new HashMap<String, OutputStream>();
    private Map<String, File> tempFiles = new HashMap<String, File>();
    private Map<String, byte[]> genotypeBuffer = new HashMap<String, byte[]>();
    private int genotypeCount = 0;
    private int byteCount = 0;
    private List<String> famOrder = new ArrayList<String>();
    private long totalByteCount = 0L;
    private long totalGenotypeCount = 0L;

    @Override
    public void initialize() {
        this.writeBedHeader();
        Map<String, Map<String, String>> sampleMetaValues = this.parseMetaData();
        int dummyID = 0;
        Map<String, VCFHeader> headers = GATKVCFUtils.getVCFHeadersFromRods(this.getToolkit());
        for (Map.Entry<String, VCFHeader> header : headers.entrySet()) {
            if (!header.getKey().equals(this.variantCollection.variants.getName()) && !this.metaDataFile.getAbsolutePath().endsWith(".fam")) continue;
            for (String sample : header.getValue().getGenotypeSamples()) {
                if (!this.metaDataFile.getAbsolutePath().endsWith(".fam")) {
                    Map<String, String> mVals = sampleMetaValues.get(sample);
                    if (mVals == null) {
                        throw new UserException("No metadata provided for sample " + sample);
                    }
                    if (!mVals.containsKey("phenotype")) {
                        throw new UserException("No phenotype data provided for sample " + sample);
                    }
                    String fid = mVals.containsKey("fid") ? mVals.get("fid") : String.format("dummy_%d", ++dummyID);
                    String pid = mVals.containsKey("dad") ? mVals.get("dad") : String.format("dummy_%d", ++dummyID);
                    String mid = mVals.containsKey("mom") ? mVals.get("mom") : String.format("dummy_%d", ++dummyID);
                    String sex = mVals.containsKey("sex") ? mVals.get("sex") : "3";
                    String pheno = mVals.get("phenotype");
                    this.outFam.printf("%s\t%s\t%s\t%s\t%s\t%s%n", fid, sample, pid, mid, sex, pheno);
                } else if (!sampleMetaValues.containsKey(sample)) {
                    throw new UserException("No metadata provided for sample " + sample);
                }
                if (this.mode == OutputMode.INDIVIDUAL_MAJOR) {
                    try {
                        File temp = File.createTempFile("VariantsToBPed_" + sample, ".tmp");
                        this.printMap.put(sample, new PrintStream(temp));
                        this.tempFiles.put(sample, temp);
                    }
                    catch (IOException e) {
                        throw new ReviewedStingException("Error creating temporary file", e);
                    }
                    this.genotypeBuffer.put(sample, new byte[1000]);
                }
                this.famOrder.add(sample);
            }
        }
    }

    @Override
    public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
        boolean altMajor;
        String altOut;
        String refOut;
        if (tracker == null) {
            return 0;
        }
        VariantContext vc = tracker.getFirstValue(this.variantCollection.variants, context.getLocation());
        if (vc == null || vc.isFiltered() || !vc.isBiallelic()) {
            return 0;
        }
        try {
            this.validateVariantSite(vc, ref, context);
        }
        catch (TribbleException e) {
            throw new UserException("Input VCF file is invalid; we cannot guarantee the resulting ped file. Please run ValidateVariants for more detailed information. This error is: " + e.getMessage());
        }
        String vcRef = this.getReferenceAllele(vc);
        String vcAlt = this.getAlternateAllele(vc);
        if (this.majorAlleleFirst) {
            HashMap<String, Object> ats = new HashMap<String, Object>(vc.getAttributes());
            if (!vc.hasAttribute("AF")) {
                VariantContextUtils.calculateChromosomeCounts(vc, ats, true);
            }
            if (this.getAF(ats.get("AF")) > 0.5) {
                refOut = vcAlt;
                altOut = vcRef;
                altMajor = true;
            } else {
                refOut = vcRef;
                altOut = vcAlt;
                altMajor = false;
            }
        } else {
            refOut = vcRef;
            altOut = vcAlt;
            altMajor = false;
        }
        this.outBim.printf("%s\t%s\t%.2f\t%d\t%s\t%s%n", vc.getChr(), VariantsToBinaryPed.getID(vc), APPROX_CM_PER_BP * (double)vc.getStart(), vc.getStart(), refOut, altOut);
        if (this.mode == OutputMode.INDIVIDUAL_MAJOR) {
            this.writeIndividualMajor(vc, altMajor);
        } else {
            this.writeSNPMajor(vc, altMajor);
        }
        return 1;
    }

    public void writeIndividualMajor(VariantContext vc, boolean altMajor) {
        for (Genotype g : vc.getGenotypes()) {
            ++this.totalGenotypeCount;
            String sample = g.getSampleName();
            byte[] samBuf = this.genotypeBuffer.get(sample);
            byte enc = this.getEncoding(g, this.genotypeCount, altMajor);
            int n = this.byteCount;
            samBuf[n] = (byte)(samBuf[n] | enc);
        }
        ++this.genotypeCount;
        if (this.genotypeCount % 4 == 0) {
            ++this.byteCount;
            if (this.byteCount >= 1000) {
                for (String sample : this.printMap.keySet()) {
                    OutputStream samOut = this.printMap.get(sample);
                    try {
                        samOut.write(this.genotypeBuffer.get(sample));
                    }
                    catch (IOException e) {
                        throw new ReviewedStingException("Error writing to temporary bed file.", e);
                    }
                    this.genotypeBuffer.put(sample, new byte[1000]);
                }
                this.byteCount = 0;
            }
            this.genotypeCount = 0;
        }
    }

    public void writeSNPMajor(VariantContext vc, boolean altMajor) {
        this.genotypeCount = 0;
        this.byteCount = 0;
        byte[] bytes = new byte[(3 + this.famOrder.size()) / 4];
        for (Genotype g : vc.getGenotypesOrderedBy(this.famOrder)) {
            byte enc = this.getEncoding(g, this.genotypeCount, altMajor);
            int n = this.byteCount++;
            bytes[n] = (byte)(bytes[n] | enc);
            ++this.genotypeCount;
            if (this.genotypeCount % 4 != 0) continue;
            this.genotypeCount = 0;
        }
        this.totalGenotypeCount += (long)this.famOrder.size();
        this.totalByteCount += (long)bytes.length;
        try {
            this.outBed.write(bytes);
        }
        catch (IOException e) {
            throw new ReviewedStingException("Error writing to output bed file", e);
        }
    }

    @Override
    public Integer reduce(Integer m, Integer r) {
        return r + m;
    }

    @Override
    public Integer reduceInit() {
        return 0;
    }

    @Override
    public void onTraversalDone(Integer numSites) {
        logger.info(String.format("%d sites processed for a total of %d genotypes encoded in %d bytes", numSites, this.totalGenotypeCount, this.totalByteCount));
        if (this.mode == OutputMode.INDIVIDUAL_MAJOR) {
            this.mergeGenotypeTempFiles(numSites);
        }
    }

    private void mergeGenotypeTempFiles(int numSites) {
        for (String sample : this.printMap.keySet()) {
            try {
                int lim = this.byteCount + (this.genotypeCount > 0 ? 1 : 0);
                this.printMap.get(sample).write(this.genotypeBuffer.get(sample), 0, lim);
            }
            catch (IOException e) {
                throw new ReviewedStingException("Error closing temporary file.", e);
            }
            try {
                this.printMap.get(sample).close();
            }
            catch (IOException e) {
                throw new ReviewedStingException("Error closing temporary file.", e);
            }
        }
        for (String sample : this.famOrder) {
            FileInputStream inStream;
            logger.info("Merging genotypes for " + sample);
            try {
                inStream = new FileInputStream(this.tempFiles.get(sample));
            }
            catch (IOException e) {
                throw new ReviewedStingException("Error opening temp file for input.", e);
            }
            try {
                byte[] readGenotypes;
                int ttr;
                for (ttr = numSites / 4 + (this.genotypeCount > 0 ? 1 : 0); ttr > 1000; ttr -= 1000) {
                    readGenotypes = new byte[1000];
                    inStream.read(readGenotypes);
                    this.outBed.write(readGenotypes);
                    this.totalByteCount += 1000L;
                }
                if (ttr > 0) {
                    readGenotypes = new byte[ttr];
                    inStream.read(readGenotypes);
                    this.outBed.write(readGenotypes);
                    this.totalByteCount += (long)ttr;
                }
                inStream.close();
            }
            catch (IOException e) {
                throw new ReviewedStingException("Error reading form temp file for input.", e);
            }
        }
    }

    private byte getEncoding(Genotype g, int offset, boolean altMajor) {
        if (!altMajor) {
            return this.getStandardEncoding(g, offset);
        }
        return this.getFlippedEncoding(g, offset);
    }

    private byte getStandardEncoding(Genotype g, int offset) {
        int b = !this.checkGQIsGood(g) ? 1 : (g.isHomRef() ? 0 : (g.isHomVar() ? 3 : (g.isHet() ? 2 : 1)));
        return (byte)(b << 2 * offset);
    }

    private byte getFlippedEncoding(Genotype g, int offset) {
        int b = !this.checkGQIsGood(g) ? 1 : (g.isHomRef() ? 3 : (g.isHomVar() ? 0 : (g.isHet() ? 2 : 1)));
        return (byte)(b << 2 * offset);
    }

    private boolean checkGQIsGood(Genotype genotype) {
        if (genotype.hasGQ()) {
            return genotype.getGQ() >= this.minGenotypeQuality;
        }
        if (genotype.hasLikelihoods()) {
            double log10gq = GenotypeLikelihoods.getGQLog10FromLikelihoods(genotype.getType().ordinal() - 1, genotype.getLikelihoods().getAsVector());
            return QualityUtils.phredScaleLog10ErrorRate(log10gq) >= (double)this.minGenotypeQuality;
        }
        return this.minGenotypeQuality <= 0;
    }

    private static String getID(VariantContext v) {
        if (v.hasID()) {
            return v.getID();
        }
        return String.format("Var-%s-%d", v.getChr(), v.getStart());
    }

    private double getAF(Object o) {
        if (o instanceof String) {
            return Double.parseDouble((String)o);
        }
        if (o instanceof Double) {
            return (Double)o;
        }
        throw new UserException("Allele frequency appears to be neither String nor Double. Please check the header of your VCF.");
    }

    private void writeBedHeader() {
        try {
            this.outBed.write(new byte[]{108, 27, (byte)(this.mode != OutputMode.INDIVIDUAL_MAJOR ? 1 : 0)});
        }
        catch (IOException e) {
            throw new ReviewedStingException("error writing to output file.");
        }
    }

    private Map<String, Map<String, String>> parseMetaData() {
        HashMap<String, Map<String, String>> metaValues = new HashMap<String, Map<String, String>>();
        logger.debug("Reading in metadata...");
        try {
            if (this.metaDataFile.getAbsolutePath().endsWith(".fam")) {
                for (String line : new XReadLines(this.metaDataFile)) {
                    String[] famSplit = line.split("\\s+");
                    if (famSplit.length != 6) {
                        throw new UserException("Line of the fam file is malformatted. Expected 6 entries. Line is " + line);
                    }
                    String sid = famSplit[1];
                    String fid = famSplit[0];
                    String mom = famSplit[2];
                    String dad = famSplit[3];
                    String sex = famSplit[4];
                    String pheno = famSplit[5];
                    HashMap<String, String> values = new HashMap<String, String>();
                    values.put("mom", mom);
                    values.put("dad", dad);
                    values.put("fid", fid);
                    values.put("sex", sex);
                    values.put("phenotype", pheno);
                    metaValues.put(sid, values);
                    this.outFam.printf("%s%n", line);
                }
            } else {
                for (String line : new XReadLines(this.metaDataFile)) {
                    logger.debug(line);
                    String[] split = line.split("\\s+");
                    String sampleID = split[0];
                    String keyVals = split[1];
                    HashMap<String, String> values = new HashMap<String, String>();
                    for (String kvp : keyVals.split(";")) {
                        String[] kvp_split = kvp.split("=");
                        values.put(kvp_split[0], kvp_split[1]);
                    }
                    metaValues.put(sampleID, values);
                }
            }
        }
        catch (FileNotFoundException e) {
            throw new UserException("Meta data file not found: " + this.metaDataFile.getAbsolutePath(), e);
        }
        return metaValues;
    }

    private void validateVariantSite(VariantContext vc, ReferenceContext ref, AlignmentContext context) {
        Allele reportedRefAllele = vc.getReference();
        int refLength = reportedRefAllele.length();
        if (refLength > 100) {
            logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", refLength, vc.getChr(), vc.getStart()));
            return;
        }
        byte[] observedRefBases = new byte[refLength];
        System.arraycopy(ref.getBases(), 0, observedRefBases, 0, refLength);
        Allele observedRefAllele = Allele.create(observedRefBases);
        vc.validateReferenceBases(reportedRefAllele, observedRefAllele);
        if (this.checkAlternateAlleles) {
            vc.validateAlternateAlleles();
        }
    }

    private String getReferenceAllele(VariantContext vc) {
        if (vc.isSimpleInsertion()) {
            return PLINK_DELETION_MARKER;
        }
        if (vc.isSymbolic()) {
            return "1";
        }
        if (vc.isSimpleDeletion()) {
            return vc.getReference().getBaseString().substring(1);
        }
        return vc.getReference().getBaseString();
    }

    private String getAlternateAllele(VariantContext vc) {
        if (vc.isSimpleInsertion()) {
            return vc.getAlternateAllele(0).getBaseString().substring(1);
        }
        if (vc.isSymbolic()) {
            return "2";
        }
        if (vc.isSimpleDeletion()) {
            return PLINK_DELETION_MARKER;
        }
        return vc.getAlternateAllele(0).getBaseString();
    }

    static enum OutputMode {
        INDIVIDUAL_MAJOR,
        SNP_MAJOR;

    }
}

