/*
 * Decompiled with CFR 0.152.
 */
package projects.talecorrect;

import de.jstacs.data.AlphabetContainer;
import de.jstacs.data.DNADataSet;
import de.jstacs.data.alphabets.Alphabet;
import de.jstacs.data.alphabets.DiscreteAlphabet;
import de.jstacs.data.alphabets.GenericComplementableDiscreteAlphabet;
import de.jstacs.data.sequences.Sequence;
import de.jstacs.data.sequences.annotation.SimpleSequenceAnnotationParser;
import de.jstacs.parameters.FileParameter;
import de.jstacs.parameters.Parameter;
import de.jstacs.results.Result;
import de.jstacs.results.ResultSet;
import de.jstacs.results.TextResult;
import de.jstacs.tools.JstacsTool;
import de.jstacs.tools.ProgressUpdater;
import de.jstacs.tools.Protocol;
import de.jstacs.tools.ToolParameterSet;
import de.jstacs.tools.ToolResult;
import de.jstacs.tools.ui.cli.CLI;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.TreeMap;

public class CorrectTALESequences
implements JstacsTool {
    public static void main(String[] args) throws Exception {
        CLI cli = new CLI(new CorrectTALESequences());
        cli.run(args);
    }

    @Override
    public ToolParameterSet getToolParameters() {
        LinkedList<FileParameter> pars = new LinkedList<FileParameter>();
        try {
            pars.add(new FileParameter("Sequences", "File with TALE-containing ONT assembly or extracted TALE sequences", "fasta,fa", true));
            pars.add(new FileParameter("N-terminus nHMMER File", "The output of nHMMER N-terminus", "txt", true));
            pars.add(new FileParameter("Repeats nHMMER File", "The output of nHMMER repeats", "txt", true));
            pars.add(new FileParameter("C-terminus nHMMER File", "The output of nHMMER C-terminus", "txt", true));
        }
        catch (Exception e) {
            e.printStackTrace();
            return null;
        }
        return new ToolParameterSet(this.getShortName(), pars.toArray(new Parameter[0]));
    }

    @Override
    public ToolResult run(ToolParameterSet parameters, Protocol protocol, ProgressUpdater progress, int threads) throws Exception {
        FileParameter.FileRepresentation nanoAssemblyWithTALEs = ((FileParameter)parameters.getParameterAt(0)).getFileContents();
        FileParameter.FileRepresentation nHmmerNFile = ((FileParameter)parameters.getParameterAt(1)).getFileContents();
        FileParameter.FileRepresentation nHmmerRFile = ((FileParameter)parameters.getParameterAt(2)).getFileContents();
        FileParameter.FileRepresentation nHmmerCFile = ((FileParameter)parameters.getParameterAt(3)).getFileContents();
        int countDifferneces_commonChar_vs_HomopolymerSubstitution = 0;
        SimpleSequenceAnnotationParser parser = new SimpleSequenceAnnotationParser();
        System.out.println();
        DNADataSet ds = new DNADataSet(nanoAssemblyWithTALEs.getFilename(), '>', parser);
        String[] symbols_caseSensitive = new String[]{"A", "C", "G", "T", "a", "c", "g", "t"};
        AlphabetContainer conCaseSensitive = new AlphabetContainer((Alphabet)new DiscreteAlphabet(false, symbols_caseSensitive));
        String[] symbols = new String[]{"A", "C", "G", "T", "-"};
        DiscreteAlphabet abc = new DiscreteAlphabet(true, symbols);
        int[] revComp = new int[symbols.length];
        revComp[0] = 3;
        revComp[1] = 2;
        revComp[2] = 1;
        revComp[3] = 0;
        revComp[4] = 4;
        GenericComplementableDiscreteAlphabet abc2 = new GenericComplementableDiscreteAlphabet(true, symbols, revComp);
        AlphabetContainer con2 = new AlphabetContainer((Alphabet)abc2);
        HashMap<String, Sequence> seqTALEs = new HashMap<String, Sequence>();
        int i = 0;
        while (i < ds.getNumberOfElements()) {
            Sequence seq = ds.getElementAt(i);
            String header = seq.getAnnotation()[0].getResultAt(0).getValue().toString();
            String TALEname = header.split(" ")[0];
            seqTALEs.put(TALEname, seq);
            ++i;
        }
        ArrayList<nHMMERCorrection> correctionList = new ArrayList<nHMMERCorrection>();
        CorrectTALESequences.searchForCorrections(nHmmerNFile.getFilename(), 0.75, 200, seqTALEs, correctionList, 3, con2, "N");
        CorrectTALESequences.searchForCorrections(nHmmerRFile.getFilename(), 0.75, 45, seqTALEs, correctionList, 2, con2, "R");
        CorrectTALESequences.searchForCorrections(nHmmerCFile.getFilename(), 0.75, 200, seqTALEs, correctionList, 3, con2, "C");
        System.out.println("candidate Corrections:");
        System.out.println(correctionList.toString());
        StringBuffer subList = new StringBuffer();
        subList.append("seqName\tposition in uncorrected sequences\ttype\tsubstitution\n");
        for (String seqName : seqTALEs.keySet()) {
            HashMap<Integer, Integer> pos_index = new HashMap<Integer, Integer>();
            int index = 0;
            for (nHMMERCorrection nHcorr : correctionList) {
                if (nHcorr.getSeqName().equals(seqName)) {
                    pos_index.put(nHcorr.getPosition(), index);
                }
                ++index;
            }
            TreeMap sortedPos_index = new TreeMap(pos_index);
            for (Integer pos : sortedPos_index.descendingKeySet()) {
                System.out.println(correctionList.get((Integer)sortedPos_index.get(pos)).toString());
                int seqLength = seqTALEs.get(seqName).getLength();
                Sequence seqCorrected = null;
                System.out.println("pos: " + pos);
                if (correctionList.get((Integer)sortedPos_index.get(pos)).getType() == 'i') {
                    int correctionPosition = correctionList.get((Integer)sortedPos_index.get(pos)).getCorrectionPosition();
                    String seqBefore = seqTALEs.get(seqName).getSubSequence(0, correctionPosition - 1).toString();
                    System.out.println("seq: " + seqTALEs.get(seqName).getSubSequence(correctionPosition - 10, 20).toString());
                    String shortSeqBefore = seqTALEs.get(seqName).getSubSequence(correctionPosition - 11, 10).toString();
                    String homopolymerBefore = CorrectTALESequences.getHomopolymerBefore(shortSeqBefore.toUpperCase());
                    String seqAfter = seqTALEs.get(seqName).getSubSequence(correctionPosition - 1).toString();
                    String shortSeqAfter = seqTALEs.get(seqName).getSubSequence(correctionPosition - 1, 10).toString();
                    String homopolymerAfter = CorrectTALESequences.getHomopolymerAfter(shortSeqAfter.toUpperCase());
                    String longestHomopoylmer = "";
                    longestHomopoylmer = homopolymerAfter.charAt(0) == homopolymerBefore.charAt(0) ? String.valueOf(homopolymerAfter) + homopolymerBefore : (homopolymerAfter.length() > homopolymerBefore.length() ? homopolymerAfter : (homopolymerAfter.length() < homopolymerBefore.length() ? homopolymerBefore : homopolymerBefore));
                    if (longestHomopoylmer.charAt(0) != correctionList.get((Integer)sortedPos_index.get(pos)).getToChar()) {
                        System.err.println("Difference: HomopolymerChar (" + longestHomopoylmer.charAt(0) + ") vs. CommonNucl (" + correctionList.get((Integer)sortedPos_index.get(pos)).getToChar() + ")");
                        ++countDifferneces_commonChar_vs_HomopolymerSubstitution;
                    }
                    char substitution = correctionList.get((Integer)sortedPos_index.get(pos)).getToChar();
                    if (longestHomopoylmer.length() >= 5) {
                        substitution = longestHomopoylmer.charAt(0);
                    }
                    substitution = Character.toLowerCase(substitution);
                    subList.append(String.valueOf(seqName) + "\t" + pos + "\t" + "insertion" + "\t" + correctionList.get((Integer)sortedPos_index.get(pos)).getFromChar() + " -> " + substitution + "\n");
                    System.out.println("longestHomopoylmer: " + longestHomopoylmer);
                    System.out.println("--seq Parts--");
                    System.out.println("seqStart:");
                    System.out.println(seqBefore.substring(seqBefore.length() - 10));
                    System.out.println("insertion");
                    System.out.println(substitution);
                    System.out.println("seqEnd");
                    System.out.println(seqAfter.substring(0, 10));
                    System.out.println();
                    seqCorrected = Sequence.create(conCaseSensitive, String.valueOf(seqBefore) + substitution + seqAfter);
                } else if (correctionList.get((Integer)sortedPos_index.get(pos)).getType() == 'd') {
                    subList.append(String.valueOf(seqName) + "\t" + pos + "\t" + "deletion" + "\t" + correctionList.get((Integer)sortedPos_index.get(pos)).getFromChar() + " -> " + "-" + "\n");
                    String seqBefore = seqTALEs.get(seqName).getSubSequence(0, pos - 1).toString();
                    String seqAfter = seqTALEs.get(seqName).getSubSequence(pos).toString();
                    seqCorrected = Sequence.create(conCaseSensitive, String.valueOf(seqBefore) + seqAfter);
                    System.out.println("deletion");
                    System.out.println();
                }
                seqTALEs.replace(seqName, seqCorrected);
            }
        }
        StringBuffer correctedSequences = new StringBuffer();
        StringBuffer insertionList = new StringBuffer();
        StringBuffer igvToolsCountScript = new StringBuffer();
        igvToolsCountScript.append("#!/bin/bash\n");
        igvToolsCountScript.append("assembly=$1\n");
        igvToolsCountScript.append("bamFile=$2\n");
        igvToolsCountScript.append("pathFiles=$3\n");
        ArrayList<String> sortedKeys = new ArrayList<String>(seqTALEs.keySet());
        Collections.sort(sortedKeys);
        int seqCounter = 0;
        for (String seqName : sortedKeys) {
            ++seqCounter;
            correctedSequences.append(">" + seqName + "\n");
            String seq = seqTALEs.get(seqName).toString();
            char[] acgt = new char[]{'a', 'c', 'g', 't'};
            int pos = -1;
            int posBefore = -2;
            int countWigFiles = 0;
            boolean isFirst = true;
            ArrayList<Integer> posList = new ArrayList<Integer>();
            char[] cArray = acgt;
            int n = acgt.length;
            int n2 = 0;
            while (n2 < n) {
                char sub = cArray[n2];
                int i2 = 0;
                while (i2 < seq.length()) {
                    pos = seq.indexOf(sub, i2) + 1;
                    if (pos < 1) break;
                    insertionList.append(String.valueOf(seqName) + "\t" + pos + "\t" + sub + "\n");
                    if (pos == posBefore + 1 || isFirst) {
                        posList.add(pos);
                        isFirst = false;
                    } else {
                        Collections.sort(posList);
                        igvToolsCountScript.append("wigFile=$pathFiles\"out.igvtools.count." + seqCounter + "_" + ++countWigFiles + ".wig\"\n");
                        igvToolsCountScript.append("igvtools count -w 1 --bases --minMapQuality 1 --query=\"" + seqName + ":" + posList.get(0) + "-" + posList.get(posList.size() - 1) + "\" $bamFile $wigFile $assembly\n");
                        posList.clear();
                        posList.add(pos);
                    }
                    posBefore = pos;
                    i2 = pos;
                    ++i2;
                }
                ++n2;
            }
            correctedSequences.append(String.valueOf(seq) + "\n");
        }
        System.out.println("countDifferneces commonChar vs HomopolymerSubstitution: " + countDifferneces_commonChar_vs_HomopolymerSubstitution);
        protocol.append("\nWriting outputs.\n");
        TextResult fr1 = new TextResult("correctedTALEs", "Output with corrected sequences.", new FileParameter.FileRepresentation("", correctedSequences.toString()), "fa", this.getToolName(), "fasta/dna", true);
        TextResult fr2 = new TextResult("insertionList_inCorrectedFile", "List of insertions with positions in corrected sequences.", new FileParameter.FileRepresentation("", insertionList.toString()), "tsv", this.getToolName(), "tsv", true);
        TextResult fr3 = new TextResult("igvtools.count.Substitution.script", "Shell script to use for substition polishing.", new FileParameter.FileRepresentation("", igvToolsCountScript.toString()), "sh", this.getToolName(), "sh", true);
        TextResult fr4 = new TextResult("substitionList", "List of substitutions with positions within uncorrected sequences.", new FileParameter.FileRepresentation("", subList.toString()), "tsv", this.getToolName(), "tsv", true);
        ResultSet set = new ResultSet(new Result[][]{{fr1, fr2, fr3, fr4}});
        return new ToolResult("Result of " + this.getToolName(), String.valueOf(this.getToolName()) + " on \"" + nanoAssemblyWithTALEs.getFilename() + "\"", null, set, parameters, this.getToolName(), new Date(System.currentTimeMillis()));
    }

    private static String skipLinesBufferedReader(BufferedReader BR, int numLines) throws IOException {
        String line = "";
        int s = 0;
        while (s < numLines) {
            line = BR.readLine();
            ++s;
        }
        return line;
    }

    private static String getHomopolymerBefore(String SeqBeforeInsertion) {
        char charAtEnd = SeqBeforeInsertion.charAt(SeqBeforeInsertion.length() - 1);
        String homopolymer = SeqBeforeInsertion.substring(SeqBeforeInsertion.length() - 1, SeqBeforeInsertion.length());
        int i = SeqBeforeInsertion.length() - 1;
        while (SeqBeforeInsertion.charAt(i) == charAtEnd) {
            homopolymer = SeqBeforeInsertion.substring(i, SeqBeforeInsertion.length());
            --i;
        }
        return homopolymer;
    }

    private static String getHomopolymerAfter(String SeqAfterInsertion) {
        char charAtStart = SeqAfterInsertion.charAt(0);
        String homopolymer = SeqAfterInsertion.substring(0, 1);
        int i = 0;
        while (SeqAfterInsertion.charAt(i) == charAtStart && i < SeqAfterInsertion.length()) {
            homopolymer = SeqAfterInsertion.substring(0, i + 1);
            ++i;
        }
        return homopolymer;
    }

    private static void searchForCorrections(String nHmmerPath, double minAcc, int minScore, HashMap<String, Sequence> seqTALEs, ArrayList<nHMMERCorrection> correctionList, int maxConitgousCorrection, AlphabetContainer con2, String type) throws Exception {
        BufferedReader BR = new BufferedReader(new FileReader(nHmmerPath));
        String line = null;
        boolean readHeader = true;
        boolean parseResult = false;
        String aktResultConsensusSeq = "";
        int aktResultStartPosConsensusSeq = -1;
        String aktResultPosteriorProbability = "";
        String nHmmerOutSeqname = "";
        while ((line = BR.readLine()) != null) {
            if (readHeader) {
                if (!line.startsWith("Annotation for each hit  (and alignments):")) continue;
                readHeader = false;
                continue;
            }
            if (line.equals("")) {
                line = CorrectTALESequences.skipLinesBufferedReader(BR, 1);
            }
            int hmmTo = -1;
            if (line.startsWith(">>")) {
                parseResult = false;
                nHmmerOutSeqname = line.split(" ")[1];
                int s = 0;
                while (s < 3) {
                    line = BR.readLine();
                    ++s;
                }
                line = line.trim().replaceAll(" +", " ");
                String[] lineSplit = line.split(" ");
                hmmTo = Integer.parseInt(lineSplit[5]);
                double acc = Double.parseDouble(lineSplit[14]);
                double score = Double.parseDouble(lineSplit[1]);
                if (!(acc >= minAcc) || !(score >= (double)minScore)) continue;
                parseResult = true;
                line = CorrectTALESequences.skipLinesBufferedReader(BR, 3);
                continue;
            }
            if (!parseResult) continue;
            while (line.equals("")) {
                line = CorrectTALESequences.skipLinesBufferedReader(BR, 1);
            }
            if (line.startsWith("Internal pipeline statistics summary")) break;
            line = line.trim();
            line = line.replaceAll(" +", " ");
            aktResultConsensusSeq = line.split(" ")[2];
            aktResultStartPosConsensusSeq = Integer.parseInt(line.split(" ")[1]);
            line = CorrectTALESequences.skipLinesBufferedReader(BR, 2);
            if (!(line = line.trim().replaceAll(" +", " ")).split(" ")[1].equals("-")) {
                int p;
                ArrayList<Integer> indexDifferences;
                int startPosTALE = Integer.parseInt(line.split(" ")[1]);
                int endPosTALE = Integer.parseInt(line.split(" ")[3]);
                int strand = 0;
                strand = endPosTALE - startPosTALE < 0 ? -1 : 1;
                Sequence subSeqTALEnHmmer = Sequence.create(con2, line.split(" ")[2]);
                nHmmerResult aktResult = new nHmmerResult(subSeqTALEnHmmer, aktResultStartPosConsensusSeq, startPosTALE, endPosTALE, strand);
                line = BR.readLine();
                aktResultPosteriorProbability = line = line.trim().replaceAll(" +", " ");
                if (aktResultPosteriorProbability.contains(".")) {
                    indexDifferences = new ArrayList<Integer>();
                    p = aktResultPosteriorProbability.indexOf(".");
                    while (p < aktResultPosteriorProbability.length()) {
                        if (aktResult.getStartPosConsensus() + p <= 864 && type == "N" || type != "N") {
                            if (aktResultPosteriorProbability.charAt(p) == '.' && aktResult.getSequence().toString().charAt(p) == '-') {
                                indexDifferences.add(p);
                            }
                            if (aktResultPosteriorProbability.charAt(p) == '.') {
                                aktResult.getSequence().toString().charAt(p);
                            }
                        }
                        ++p;
                    }
                    int countContiguousPositions = 1;
                    int p2 = 0;
                    while (p2 < indexDifferences.size() - 1) {
                        int indexP = (Integer)indexDifferences.get(p2);
                        if (indexP + 1 == (Integer)indexDifferences.get(p2 + 1)) {
                            ++countContiguousPositions;
                        }
                        if (indexP + 1 != (Integer)indexDifferences.get(p2 + 1) || p2 == indexDifferences.size() - 2) {
                            if (countContiguousPositions > maxConitgousCorrection) {
                                int d = 0;
                                while (d < countContiguousPositions) {
                                    indexDifferences.remove(p2 + 1 - d);
                                    ++d;
                                }
                            }
                            countContiguousPositions = 1;
                        }
                        ++p2;
                    }
                    p2 = 0;
                    while (p2 < indexDifferences.size()) {
                        int posInInputSeqWithP;
                        int posInInputSeq;
                        int posInNHMMER = (Integer)indexDifferences.get(p2);
                        if (aktResult.getStrand() == -1) {
                            posInInputSeq = aktResult.getStartPos() + ((posInNHMMER - p2) * aktResult.getStrand() + 1);
                            posInInputSeqWithP = aktResult.getStartPos() + (posInNHMMER * aktResult.getStrand() + 1);
                        } else {
                            posInInputSeq = aktResult.getStartPos() + (posInNHMMER - p2) * aktResult.getStrand();
                            posInInputSeqWithP = aktResult.getStartPos() + posInNHMMER * aktResult.getStrand();
                        }
                        char fromChar = 'N';
                        String substitution = aktResultConsensusSeq.substring(posInNHMMER, posInNHMMER + 1).toUpperCase();
                        Sequence substition_seq = null;
                        if (aktResult.getStrand() == -1) {
                            fromChar = aktResult.getSequence().complement().toString().substring(posInNHMMER, posInNHMMER + 1).charAt(0);
                            substition_seq = Sequence.create(con2, substitution).complement();
                        } else {
                            fromChar = aktResult.getSequence().toString().substring(posInNHMMER, posInNHMMER + 1).charAt(0);
                            substition_seq = Sequence.create(con2, substitution);
                        }
                        correctionList.add(new nHMMERCorrection(nHmmerOutSeqname, posInInputSeqWithP, posInInputSeq, fromChar, substition_seq.toString().charAt(0), 'i'));
                        ++p2;
                    }
                }
                if (!aktResultConsensusSeq.matches("[acgt]*[.][acgt]*") && !aktResultConsensusSeq.matches("[acgt]*[..][acgt]*")) continue;
                indexDifferences = new ArrayList();
                p = aktResultConsensusSeq.indexOf(".");
                while (p < aktResultConsensusSeq.length()) {
                    if (aktResultConsensusSeq.charAt(p) == '.') {
                        indexDifferences.add(p);
                    }
                    ++p;
                }
                p = 0;
                while (p < indexDifferences.size()) {
                    int posInNHMMER = (Integer)indexDifferences.get(p);
                    int posInInputSeq = aktResult.getStartPos() + posInNHMMER * aktResult.getStrand();
                    char fromChar = 'N';
                    fromChar = aktResult.getStrand() == -1 ? (char)aktResult.getSequence().complement().toString().substring(posInNHMMER, posInNHMMER + 1).charAt(0) : (char)aktResult.getSequence().toString().substring(posInNHMMER, posInNHMMER + 1).charAt(0);
                    correctionList.add(new nHMMERCorrection(nHmmerOutSeqname, posInInputSeq, posInInputSeq, fromChar, 'd'));
                    ++p;
                }
                continue;
            }
            line = CorrectTALESequences.skipLinesBufferedReader(BR, 1);
        }
        BR.close();
    }

    @Override
    public String getToolName() {
        return "CorrectTALESequences";
    }

    @Override
    public String getToolVersion() {
        return "0.1";
    }

    @Override
    public String getShortName() {
        return "correct";
    }

    @Override
    public String getDescription() {
        return "Corrects TALE sequences.";
    }

    @Override
    public String getHelpText() {
        return null;
    }

    @Override
    public JstacsTool.ResultEntry[] getDefaultResultInfos() {
        return null;
    }

    @Override
    public ToolResult[] getTestCases(String path) {
        return null;
    }

    @Override
    public void clear() {
    }

    @Override
    public String[] getReferences() {
        return null;
    }

    private static class nHMMERCorrection {
        private String seqName;
        private int position;
        private int correctionPosition;
        private char fromChar;
        private char toChar;
        private char type;

        public nHMMERCorrection(String seqName, int position, int correctionPosition, char fromChar, char toChar, char type) {
            this(seqName, position, correctionPosition, fromChar, type);
            this.toChar = toChar;
        }

        public nHMMERCorrection(String seqName, int position, int correctionPosition, char fromChar, char type) {
            this.seqName = seqName;
            this.position = position;
            this.correctionPosition = correctionPosition;
            this.fromChar = fromChar;
            this.toChar = (char)45;
            this.type = type;
        }

        public String getSeqName() {
            return this.seqName;
        }

        public int getPosition() {
            return this.position;
        }

        public int getCorrectionPosition() {
            return this.correctionPosition;
        }

        public char getFromChar() {
            return this.fromChar;
        }

        public char getToChar() {
            return this.toChar;
        }

        public String toString() {
            return String.valueOf(this.seqName) + " (" + this.correctionPosition + "): " + this.fromChar + " -> " + this.toChar;
        }

        public char getType() {
            return this.type;
        }
    }

    private static class nHmmerResult {
        private int startPosConsensus;
        private int startPos;
        private int endPos;
        private Sequence seq;
        private int strand = 0;

        public nHmmerResult(Sequence seq, int startPosConsensus, int startPos, int endPos, int strand) {
            this.seq = seq;
            this.startPosConsensus = startPosConsensus;
            this.startPos = startPos;
            this.endPos = endPos;
            this.strand = endPos - startPos < 0 ? -1 : 1;
        }

        public int getStartPosConsensus() {
            return this.startPosConsensus;
        }

        public int getStartPos() {
            return this.startPos;
        }

        public int getEndPos() {
            return this.endPos;
        }

        public Sequence getSequence() {
            return this.seq;
        }

        public int getStrand() {
            return this.strand;
        }

        public String toString() {
            String result = String.valueOf(this.startPos) + " " + this.seq + " " + this.endPos;
            return result;
        }
    }
}

