package projects.xanthogenomes.tools;

import de.jstacs.DataType;
import de.jstacs.data.AlphabetContainer;
import de.jstacs.data.DataSet;
import de.jstacs.data.EmptyDataSetException;
import de.jstacs.data.WrongAlphabetException;
import de.jstacs.data.alphabets.DNAAlphabetContainer;
import de.jstacs.data.alphabets.DiscreteAlphabet;
import de.jstacs.data.bioJava.BioJavaAdapter;
import de.jstacs.data.sequences.Sequence;
import de.jstacs.data.sequences.annotation.SequenceAnnotation;
import de.jstacs.data.sequences.annotation.SequenceAnnotationParser;
import de.jstacs.data.sequences.annotation.SimpleSequenceAnnotationParser;
import de.jstacs.io.FileManager;
import de.jstacs.io.SparseStringExtractor;
import de.jstacs.parameters.FileParameter;
import de.jstacs.parameters.SimpleParameter;
import de.jstacs.results.Result;
import de.jstacs.results.ResultSet;
import de.jstacs.results.TextResult;
import de.jstacs.tools.JstacsTool;
import de.jstacs.tools.ProgressUpdater;
import de.jstacs.tools.Protocol;
import de.jstacs.tools.ToolParameterSet;
import de.jstacs.tools.ToolResult;
import de.jstacs.utils.Pair;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.batik.dom.svg.SVGPathSegConstants;
import org.apache.batik.util.SVGConstants;
import org.apache.batik.util.XMLConstants;
import org.apache.fop.pdf.PDFGState;
import org.biojava.bio.SimpleAnnotation;
import org.biojava.bio.seq.SequenceIterator;
import org.biojavax.RichObjectFactory;
import org.biojavax.SimpleNamespace;
import org.biojavax.SimpleNote;
import org.biojavax.bio.seq.RichFeature;
import org.biojavax.bio.seq.RichLocation;
import org.biojavax.bio.seq.RichSequence;
import org.biojavax.bio.seq.RichSequenceIterator;
import org.biojavax.bio.seq.SimplePosition;
import org.biojavax.bio.seq.SimpleRichFeature;
import org.biojavax.bio.seq.SimpleRichLocation;
import projects.xanthogenomes.NHMMer;
import projects.xanthogenomes.Tools;

/* loaded from: input_file:projects/xanthogenomes/tools/TALEPredictionTool.class */
public class TALEPredictionTool implements JstacsTool {
    @Override // de.jstacs.tools.JstacsTool
    public ToolParameterSet getToolParameters() {
        try {
            return new ToolParameterSet(getShortName(), new FileParameter("Genome", "The input Xanthomonas genome in FastA or Genbank format", "fasta,fa,fas,gb,gbk,genbank", true), new SimpleParameter(DataType.STRING, "Strain", "The name of the strain, will be used for annotated TALEs", false));
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    /* JADX WARN: Type inference failed for: r2v24, types: [de.jstacs.results.Result[], de.jstacs.results.Result[][]] */
    /* JADX WARN: Type inference failed for: r2v34, types: [de.jstacs.results.Result[], de.jstacs.results.Result[][]] */
    @Override // de.jstacs.tools.JstacsTool
    public ToolResult run(ToolParameterSet toolParameterSet, Protocol protocol, ProgressUpdater progressUpdater, int i) throws Exception {
        DataSet sequenceIteratorToDataSet;
        RichSequence[] richSequenceArr;
        Result resultForName;
        String str;
        progressUpdater.setLast(1.0d);
        progressUpdater.setCurrent(0.0d);
        SimpleParameter simpleParameter = (SimpleParameter) toolParameterSet.getParameterAt(1);
        String str2 = "";
        String str3 = "";
        if (simpleParameter.isSet() && (str = (String) simpleParameter.getValue()) != null && str.trim().length() > 0) {
            str2 = " (" + str + ")";
            str3 = String.valueOf(str) + "-";
        }
        FileParameter.FileRepresentation fileContents = ((FileParameter) toolParameterSet.getParameterAt(0)).getFileContents();
        String content = fileContents.getContent();
        SimpleNamespace simpleNamespace = new SimpleNamespace("biojava");
        protocol.append("Loading input genome...\n");
        AlphabetContainer alphabetContainer = new AlphabetContainer(new DiscreteAlphabet(true, "A", "C", SVGConstants.SVG_G_VALUE, "T", "N", "W", SVGPathSegConstants.PATHSEG_CURVETO_CUBIC_SMOOTH_ABS_LETTER, "M", "K", SVGConstants.SVG_R_VALUE, "Y", SVGConstants.SVG_B_VALUE, PDFGState.GSTATE_DASH_PATTERN, "H", "V"));
        if (content.trim().startsWith(XMLConstants.XML_CLOSE_TAG_END)) {
            try {
                sequenceIteratorToDataSet = new DataSet(alphabetContainer, new SparseStringExtractor((Reader) new BufferedReader(new StringReader(content)), '>', "", (SequenceAnnotationParser) new SimpleSequenceAnnotationParser()));
                SequenceIterator dataSetToSequenceIterator = BioJavaAdapter.dataSetToSequenceIterator(sequenceIteratorToDataSet, false, true);
                richSequenceArr = new RichSequence[sequenceIteratorToDataSet.getNumberOfElements()];
                for (int i2 = 0; i2 < richSequenceArr.length; i2++) {
                    richSequenceArr[i2] = (RichSequence) dataSetToSequenceIterator.nextSequence();
                }
            } catch (Exception e) {
                protocol.appendWarning("... loading failed.\n\n");
                protocol.appendThrowable(e);
                throw new Exception("Input not in expected format");
            }
        } else {
            try {
                BufferedReader bufferedReader = new BufferedReader(new StringReader(content));
                sequenceIteratorToDataSet = BioJavaAdapter.sequenceIteratorToDataSet(RichSequence.IOTools.readGenbankDNA(bufferedReader, simpleNamespace), null, alphabetContainer);
                bufferedReader.close();
                RichSequenceIterator readGenbankDNA = RichSequence.IOTools.readGenbankDNA(new BufferedReader(new StringReader(content)), simpleNamespace);
                LinkedList linkedList = new LinkedList();
                while (readGenbankDNA.hasNext()) {
                    linkedList.add(readGenbankDNA.nextRichSequence());
                }
                richSequenceArr = (RichSequence[]) linkedList.toArray(new RichSequence[0]);
            } catch (Exception e2) {
                protocol.appendWarning("... loading failed.\n\n");
                protocol.appendThrowable(e2);
                throw new Exception("Input not in expected format");
            }
        }
        Pair<DataSet, ArrayList<Integer>[]> preprocess = preprocess(sequenceIteratorToDataSet);
        DataSet firstElement = preprocess.getFirstElement();
        ArrayList<Integer>[] secondElement = preprocess.getSecondElement();
        protocol.append("Scanning genome for TALEs...\n");
        int[][] run = NHMMer.run(new InputStreamReader(TALEPredictionTool.class.getClassLoader().getResourceAsStream("projects/xanthogenomes/data/repeats.hmm")), new InputStreamReader(TALEPredictionTool.class.getClassLoader().getResourceAsStream("projects/xanthogenomes/data/starts.hmm")), new InputStreamReader(TALEPredictionTool.class.getClassLoader().getResourceAsStream("projects/xanthogenomes/data/ends.hmm")), firstElement, progressUpdater);
        protocol.append("...finished.\n\n");
        for (int i3 = 0; i3 < run.length; i3++) {
            int i4 = run[i3][0];
            int i5 = run[i3][1];
            int i6 = run[i3][2];
            boolean z = false;
            for (int i7 = 0; i7 < secondElement[i4].size(); i7++) {
                int intValue = secondElement[i4].get(i7).intValue();
                if (intValue >= i5 && intValue <= i6) {
                    z = true;
                }
            }
            if (z) {
                protocol.appendWarning(String.valueOf(str3) + "tempTALE" + (i3 + 1) + " contained \"N\"s in predicted CDS, which have been replaced. Please use with care.\n");
            }
        }
        protocol.append("Writing GFF output.\n");
        StringBuffer stringBuffer = new StringBuffer();
        for (int i8 = 0; i8 < run.length; i8++) {
            Sequence elementAt = firstElement.getElementAt(run[i8][0]);
            String str4 = null;
            SequenceAnnotation sequenceAnnotationByType = elementAt.getSequenceAnnotationByType("unparsed comment line", 0);
            if (sequenceAnnotationByType != null) {
                str4 = sequenceAnnotationByType.getResultAt(0).getValue().toString().trim();
            } else {
                SequenceAnnotation sequenceAnnotationByTypeAndIdentifier = elementAt.getSequenceAnnotationByTypeAndIdentifier("BioJava RichSequence Annotation", BioJavaAdapter.ANNOTATION_ID);
                if (sequenceAnnotationByTypeAndIdentifier != null && (resultForName = sequenceAnnotationByTypeAndIdentifier.getResultForName("Name")) != null) {
                    str4 = resultForName.getValue().toString();
                }
            }
            stringBuffer.append(String.valueOf(str4) + "\tTALE-prediction\tmRNA\t" + (run[i8][4] + 1) + "\t" + run[i8][5] + "\t.\t" + (run[i8][3] < 0 ? "-" : "+") + "\t.\tId=" + str3 + "tempTALE" + (i8 + 1) + (run[i8][6] == 0 ? "" : "; Note=putative pseudo gene") + "\n");
            stringBuffer.append(String.valueOf(str4) + "\tTALE-prediction\tCDS\t" + (run[i8][1] + 1) + "\t" + run[i8][2] + "\t.\t" + (run[i8][3] < 0 ? "-" : "+") + "\t.\tParent=" + str3 + "tempTALE" + (i8 + 1) + "\n");
        }
        TextResult textResult = new TextResult("GFF: TALE predictions" + str2, "TALE predictions in GFF format", new FileParameter.FileRepresentation("", stringBuffer.toString()), "gff3", "TALE Prediction", null, true);
        protocol.append("Writing Genbank output.\n");
        for (int i9 = 0; i9 < run.length; i9++) {
            RichFeature.Template template = new RichFeature.Template();
            template.location = new SimpleRichLocation(new SimplePosition(run[i9][1] + 1), new SimplePosition(run[i9][2]), i9, run[i9][3] < 0 ? RichLocation.Strand.NEGATIVE_STRAND : RichLocation.Strand.POSITIVE_STRAND);
            template.source = "TALE-prediction";
            template.type = "CDS";
            template.annotation = new SimpleAnnotation();
            template.featureRelationshipSet = new HashSet();
            template.rankedCrossRefs = new HashSet();
            SimpleRichFeature simpleRichFeature = new SimpleRichFeature(richSequenceArr[run[i9][0]], template);
            simpleRichFeature.getNoteSet().add(new SimpleNote(RichObjectFactory.getDefaultOntology().getOrCreateTerm("gene"), String.valueOf(str3) + "tempTALE" + (i9 + 1), 1));
            richSequenceArr[run[i9][0]].getFeatureSet().add(simpleRichFeature);
            template.type = "mRNA";
            template.location = new SimpleRichLocation(new SimplePosition(run[i9][4] + 1), new SimplePosition(run[i9][5]), i9, run[i9][3] < 0 ? RichLocation.Strand.NEGATIVE_STRAND : RichLocation.Strand.POSITIVE_STRAND);
            SimpleRichFeature simpleRichFeature2 = new SimpleRichFeature(richSequenceArr[run[i9][0]], template);
            simpleRichFeature2.getNoteSet().add(new SimpleNote(RichObjectFactory.getDefaultOntology().getOrCreateTerm("gene"), String.valueOf(str3) + "tempTALE" + (i9 + 1), 1));
            if (run[i9][6] == 1) {
                simpleRichFeature2.getNoteSet().add(new SimpleNote(RichObjectFactory.getDefaultOntology().getOrCreateTerm("note"), "putative pseudo gene", 2));
            }
            richSequenceArr[run[i9][0]].getFeatureSet().add(simpleRichFeature2);
        }
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        for (RichSequence richSequence : richSequenceArr) {
            RichSequence.IOTools.writeGenbank(byteArrayOutputStream, richSequence, simpleNamespace);
        }
        TextResult textResult2 = new TextResult("Genbank: TALE predictions" + str2, "Annotated TALEs in Genbank format", new FileParameter.FileRepresentation("", byteArrayOutputStream.toString("UTF-8")), "gb", "TALE Prediction", null, true);
        StringBuffer stringBuffer2 = new StringBuffer();
        StringBuffer stringBuffer3 = new StringBuffer();
        StringBuffer stringBuffer4 = new StringBuffer();
        protocol.append("Writing FastA outputs.\n");
        for (int i10 = 0; i10 < run.length; i10++) {
            Sequence subSequence = firstElement.getElementAt(run[i10][0]).getSubSequence(run[i10][1], run[i10][2] - run[i10][1]);
            if (run[i10][3] < 0) {
                subSequence = subSequence.reverseComplement();
            }
            String str5 = "[" + run[i10][1] + "-" + run[i10][2] + ":" + run[i10][3] + "]";
            stringBuffer3.append(XMLConstants.XML_CLOSE_TAG_END + str3 + "tempTALE" + (i10 + 1) + (run[i10][6] == 1 ? " (Pseudo)" : "") + " " + str5 + "\n" + subSequence + "\n");
            stringBuffer4.append(XMLConstants.XML_CLOSE_TAG_END + str3 + "tempTALE" + (i10 + 1) + (run[i10][6] == 1 ? " (Pseudo)" : "") + " " + str5 + "\n" + Tools.Translator.DEFAULT.translate(subSequence, 0) + "\n");
            if (run[i10][6] == 1) {
                Sequence subSequence2 = firstElement.getElementAt(run[i10][0]).getSubSequence(run[i10][4], run[i10][5] - run[i10][4]);
                if (run[i10][3] < 0) {
                    subSequence2 = subSequence2.reverseComplement();
                }
                String str6 = "[" + run[i10][4] + "-" + run[i10][5] + ":" + run[i10][3] + "]";
                stringBuffer2.append(XMLConstants.XML_CLOSE_TAG_END + str3 + "tempTALE" + (i10 + 1) + (run[i10][6] == 1 ? " (Pseudo)" : "") + " " + str6 + "\n" + subSequence2 + "\n");
                for (int i11 = 0; i11 < 3; i11++) {
                    stringBuffer2.append(XMLConstants.XML_CLOSE_TAG_END + str3 + "tempTALE" + (i10 + 1) + (run[i10][6] == 1 ? " (Pseudo)" : "") + " frame: " + i11 + " " + str6 + "\n" + Tools.Translator.DEFAULT.translate(subSequence2, i11) + "\n");
                }
            }
        }
        TextResult textResult3 = new TextResult("TALE DNA sequences" + str2, "The DNA sequences of the TALE CDS", new FileParameter.FileRepresentation("", stringBuffer3.toString()), "fasta", "TALE Prediction", "fasta/dna", true);
        TextResult textResult4 = new TextResult("TALE protein sequences" + str2, "The protein sequences of the TALE CDS", new FileParameter.FileRepresentation("", stringBuffer4.toString()), "fasta", "TALE Prediction", "fasta/as", true);
        return new ToolResult("Result of " + getToolName() + str2, String.valueOf(getToolName()) + (fileContents.getFilename() != null ? " on " + fileContents.getFilename() : ""), null, stringBuffer2.length() > 0 ? new ResultSet((Result[][]) new Result[]{new Result[]{textResult, textResult2, textResult3, textResult4, new TextResult("TALE pseudo gene matches" + str2, "The complete matching sequences of the TALE pseudo genes as DNA and translated in all three reading frames", new FileParameter.FileRepresentation("", stringBuffer2.toString()), "fasta", "TALE Prediction", "fasta/as", true)}}) : new ResultSet((Result[][]) new Result[]{new Result[]{textResult, textResult2, textResult3, textResult4}}), toolParameterSet, getToolName(), new Date(System.currentTimeMillis()));
    }

    private Pair<DataSet, ArrayList<Integer>[]> preprocess(DataSet dataSet) throws IllegalArgumentException, WrongAlphabetException, EmptyDataSetException {
        Sequence[] allElements = dataSet.getAllElements();
        Pattern compile = Pattern.compile("[^ACGTacgt]");
        ArrayList[] arrayListArr = new ArrayList[dataSet.getNumberOfElements()];
        for (int i = 0; i < allElements.length; i++) {
            arrayListArr[i] = new ArrayList();
            String sequence = allElements[i].toString();
            SequenceAnnotation[] annotation = allElements[i].getAnnotation();
            Matcher matcher = compile.matcher(sequence);
            while (matcher.find()) {
                arrayListArr[i].add(Integer.valueOf(matcher.start()));
            }
            allElements[i] = Sequence.create(DNAAlphabetContainer.SINGLETON, annotation, matcher.replaceAll("A"), "");
        }
        return new Pair<>(new DataSet(dataSet.getAnnotation(), allElements), arrayListArr);
    }

    @Override // de.jstacs.tools.JstacsTool
    public String getToolName() {
        return "TALE Prediction";
    }

    @Override // de.jstacs.tools.JstacsTool
    public String getShortName() {
        return "predict";
    }

    @Override // de.jstacs.tools.JstacsTool
    public String getDescription() {
        return "Predicts TALE in a genome";
    }

    @Override // de.jstacs.tools.JstacsTool
    public String getHelpText() {
        try {
            return FileManager.readInputStream(TALEPredictionTool.class.getClassLoader().getResourceAsStream("projects/xanthogenomes/tools/TALEPredictionTool.txt")).toString();
        } catch (IOException e) {
            e.printStackTrace();
            return "";
        }
    }

    @Override // de.jstacs.tools.JstacsTool
    public String getToolVersion() {
        return XMLConstants.XML_VERSION_11;
    }

    @Override // de.jstacs.tools.JstacsTool
    public JstacsTool.ResultEntry[] getDefaultResultInfos() {
        return null;
    }

    @Override // de.jstacs.tools.JstacsTool
    public ToolResult[] getTestCases(String str) {
        return null;
    }

    @Override // de.jstacs.tools.JstacsTool
    public void clear() {
    }

    @Override // de.jstacs.tools.JstacsTool
    public String[] getReferences() {
        return null;
    }
}
