package projects.xanthogenomes.tools;

import cern.colt.matrix.impl.AbstractFormatter;
import de.jstacs.DataType;
import de.jstacs.clustering.hierachical.ClusterTree;
import de.jstacs.data.DataSet;
import de.jstacs.data.alphabets.DNAAlphabetContainer;
import de.jstacs.data.sequences.annotation.SequenceAnnotationParser;
import de.jstacs.data.sequences.annotation.SimpleSequenceAnnotationParser;
import de.jstacs.io.FileManager;
import de.jstacs.io.SparseStringExtractor;
import de.jstacs.io.XMLParser;
import de.jstacs.parameters.FileParameter;
import de.jstacs.parameters.ParameterException;
import de.jstacs.parameters.SimpleParameter;
import de.jstacs.parameters.validation.NumberValidator;
import de.jstacs.results.PlotGeneratorResult;
import de.jstacs.results.Result;
import de.jstacs.results.ResultSet;
import de.jstacs.results.ResultSetResult;
import de.jstacs.results.TextResult;
import de.jstacs.tools.JstacsTool;
import de.jstacs.tools.ProgressUpdater;
import de.jstacs.tools.Protocol;
import de.jstacs.tools.ToolParameterSet;
import de.jstacs.tools.ToolResult;
import de.jstacs.utils.Pair;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Arrays;
import java.util.Date;
import java.util.HashSet;
import java.util.LinkedList;
import projects.tals.ScanForTBSCLI;
import projects.tals.TALgetterDiffSM;
import projects.xanthogenomes.BuildFamilies;
import projects.xanthogenomes.FamilyGroupPlotter;
import projects.xanthogenomes.RVDAlphabetContainer;
import projects.xanthogenomes.SplitTALEs;
import projects.xanthogenomes.TALE;
import projects.xanthogenomes.TALEFamilyBuilder;
import projects.xanthogenomes.Tools;

/* loaded from: input_file:projects/xanthogenomes/tools/ClassBuilderTool.class */
public class ClassBuilderTool implements JstacsTool {
    @Override // de.jstacs.tools.JstacsTool
    public ToolParameterSet getToolParameters() {
        try {
            FileParameter fileParameter = new FileParameter("TALE sequences", "The sequences of the TALEs (DNA or protein), or \"TALE DNA parts\" or \"TALE Protein parts\" output of \"TALE Analysis\", or RVD sequences.", "fasta,fa,fas", true);
            fileParameter.setExtendedType("fasta/dna");
            return new ToolParameterSet(getShortName(), fileParameter, new SimpleParameter(DataType.DOUBLE, "Cutoff", "Cutoff value defining the maximum distance of a TALE class", true, new NumberValidator(Double.valueOf(0.0d), Double.valueOf(Double.MAX_VALUE)), Double.valueOf(5.0d)), new SimpleParameter(DataType.DOUBLE, "Significance level", "Cutoff value on the p-value representing alignment significance", true, new NumberValidator(Double.valueOf(0.0d), Double.valueOf(1.0d)), Double.valueOf(0.01d)));
        } catch (ParameterException e) {
            e.printStackTrace();
            return null;
        }
    }

    public static TALE[] readProteinTALEs(FileParameter.FileRepresentation fileRepresentation, Protocol protocol) throws Exception {
        DataSet dataSet;
        String content = fileRepresentation.getContent();
        try {
            TALE[] parseTALEsFromParts = TALEAnalysisTool.parseTALEsFromParts(content, protocol);
            protocol.append("Loaded TALEs from \"TALE Analysis\" parts.\n");
            return parseTALEsFromParts;
        } catch (Exception e) {
            try {
                try {
                    dataSet = new DataSet(DNAAlphabetContainer.SINGLETON, new SparseStringExtractor((Reader) new BufferedReader(new StringReader(content)), '>', "", (SequenceAnnotationParser) new SimpleSequenceAnnotationParser()));
                } catch (Exception e2) {
                    dataSet = new DataSet(Tools.ProteinAlphabetContainer.SINGLETON, new SparseStringExtractor((Reader) new BufferedReader(new StringReader(content)), '>', "", (SequenceAnnotationParser) new SimpleSequenceAnnotationParser()));
                }
                LinkedList linkedList = new LinkedList();
                HashSet hashSet = new HashSet();
                for (int i = 0; i < dataSet.getNumberOfElements(); i++) {
                    String obj = dataSet.getElementAt(i).getSequenceAnnotationByType("unparsed comment line", 0).getResultAt(0).getValue().toString();
                    protocol.append(String.valueOf(obj) + AbstractFormatter.DEFAULT_ROW_SEPARATOR);
                    TALE tale = SplitTALEs.split(obj, dataSet.getElementAt(i), protocol)[1];
                    if (tale != null) {
                        int i2 = 0;
                        String id = tale.getId();
                        while (hashSet.contains(tale.getId())) {
                            if (i2 == 0) {
                                protocol.appendWarning("Duplicate ID " + id + AbstractFormatter.DEFAULT_ROW_SEPARATOR);
                            }
                            i2++;
                            tale.setId(String.valueOf(id) + i2);
                        }
                    }
                    if (tale != null && tale.getNumberOfRepeats() > 0) {
                        linkedList.add(tale);
                    } else if (tale != null) {
                        protocol.appendWarning("Removed putative pseudo gene " + tale.getId() + ", because it has zero repeats.\n");
                    } else {
                        protocol.appendWarning("Removed putative pseudo gene " + obj + ", because it could not be translated.\n");
                    }
                }
                protocol.append("Loaded TALEs from complete sequences.\n");
                return (TALE[]) linkedList.toArray(new TALE[0]);
            } catch (Exception e3) {
                DataSet dataSet2 = new DataSet(RVDAlphabetContainer.SINGLETON, new SparseStringExtractor((Reader) new BufferedReader(new StringReader(content)), '>', "", (SequenceAnnotationParser) new SimpleSequenceAnnotationParser()), "-");
                TALE[] taleArr = new TALE[dataSet2.getNumberOfElements()];
                for (int i3 = 0; i3 < dataSet2.getNumberOfElements(); i3++) {
                    String obj2 = dataSet2.getElementAt(i3).getSequenceAnnotationByType("unparsed comment line", 0).getResultAt(0).getValue().toString();
                    protocol.append(String.valueOf(obj2) + AbstractFormatter.DEFAULT_ROW_SEPARATOR);
                    taleArr[i3] = new TALE(obj2, dataSet2.getElementAt(i3), true, true);
                }
                protocol.appendWarning("Loaded TALEs from RVD sequences. Some properties of TALEs (aberrant repeats, codon mismatches) cannot be displayed due to lacking information.\n");
                return taleArr;
            }
        }
    }

    /* JADX WARN: Type inference failed for: r2v8, types: [de.jstacs.results.Result[], de.jstacs.results.Result[][]] */
    /* JADX WARN: Type inference failed for: r7v9, types: [de.jstacs.results.Result[], de.jstacs.results.Result[][]] */
    @Override // de.jstacs.tools.JstacsTool
    public ToolResult run(ToolParameterSet toolParameterSet, Protocol protocol, ProgressUpdater progressUpdater, int i) throws Exception {
        progressUpdater.setLast(1.0d);
        progressUpdater.setCurrent(0.0d);
        FileParameter.FileRepresentation fileContents = ((FileParameter) toolParameterSet.getParameterAt(0)).getFileContents();
        double doubleValue = ((Double) toolParameterSet.getParameterAt(1).getValue()).doubleValue();
        double doubleValue2 = ((Double) toolParameterSet.getParameterAt(2).getValue()).doubleValue();
        TALE[] readProteinTALEs = readProteinTALEs(fileContents, protocol);
        progressUpdater.setLast(0.3d);
        protocol.append("Building classes.\n");
        Pair<TALEFamilyBuilder, BuildFamilies.FamilyResult[]> build = BuildFamilies.build(readProteinTALEs, doubleValue, doubleValue2);
        progressUpdater.setCurrent(0.7d);
        BuildFamilies.FamilyResult[] secondElement = build.getSecondElement();
        Arrays.sort(secondElement);
        TALgetterDiffSM tALgetterDiffSM = (TALgetterDiffSM) XMLParser.extractObjectForTags(FileManager.readInputStream(ScanForTBSCLI.class.getClassLoader().getResourceAsStream("projects/xanthogenomes/talfinder_obg2_hyp_bg.xml")), "model");
        ClusterTree<TALEFamilyBuilder.TALEFamily> clusterFamilies = build.getFirstElement().clusterFamilies();
        progressUpdater.setCurrent(0.9d);
        Result[] resultArr = new Result[secondElement.length + 2];
        resultArr[0] = new TextResult("Class builder", "TALE class builder definition", new FileParameter.FileRepresentation("", build.getFirstElement().toXML().toString()), "xml", "TALE Class Builder", TALEFamilyBuilder.class.getName(), true);
        resultArr[1] = new PlotGeneratorResult("Tree of classes", "The tree of class similarities", new FamilyGroupPlotter.FamilyGroupPlotGenerator(clusterFamilies), true);
        protocol.append("Generating reports and plots for " + secondElement.length + " classes...\n");
        for (int i2 = 0; i2 < secondElement.length; i2++) {
            TALEFamilyBuilder.TALEFamily family = secondElement[i2].getFamily();
            protocol.append(String.valueOf(family.getFamilyId()) + AbstractFormatter.DEFAULT_ROW_SEPARATOR);
            int i3 = i2 + 2;
            resultArr[i3] = new ResultSetResult("Class " + family.getFamilyId(), "Collection of results for class " + family.getFamilyId(), null, new ResultSet((Result[][]) new Result[]{new Result[]{new TextResult("Class report for " + family.getFamilyId(), "Report for class " + family.getFamilyId(), new FileParameter.FileRepresentation("", secondElement[i2].toString(tALgetterDiffSM, build.getFirstElement())), "txt", "TALE Class Builder", null, false), new PlotGeneratorResult("Class tree for " + family.getFamilyId(), "Plot of the tree of the TALEs in this class", family, true)}}));
        }
        return new ToolResult("Result of " + getToolName(), "", null, new ResultSet((Result[][]) new Result[]{resultArr}), toolParameterSet, getToolName(), new Date(System.currentTimeMillis()));
    }

    @Override // de.jstacs.tools.JstacsTool
    public String getToolName() {
        return "TALE Class Builder";
    }

    @Override // de.jstacs.tools.JstacsTool
    public String getShortName() {
        return "build";
    }

    @Override // de.jstacs.tools.JstacsTool
    public String getDescription() {
        return "Creates classes from a set of input TALEs";
    }

    @Override // de.jstacs.tools.JstacsTool
    public String getHelpText() {
        try {
            return FileManager.readInputStream(ClassBuilderTool.class.getClassLoader().getResourceAsStream("projects/xanthogenomes/tools/ClassBuilderTool.txt")).toString();
        } catch (IOException e) {
            e.printStackTrace();
            return "";
        }
    }

    @Override // de.jstacs.tools.JstacsTool
    public String getToolVersion() {
        return "1.4.1";
    }

    @Override // de.jstacs.tools.JstacsTool
    public JstacsTool.ResultEntry[] getDefaultResultInfos() {
        return null;
    }

    @Override // de.jstacs.tools.JstacsTool
    public ToolResult[] getTestCases(String str) {
        return null;
    }

    @Override // de.jstacs.tools.JstacsTool
    public void clear() {
    }

    @Override // de.jstacs.tools.JstacsTool
    public String[] getReferences() {
        return new String[]{"@article{grau16annotale,\n\ttitle = {{AnnoTALE}: bioinformatics tools for identification, annotation, and nomenclature of {TALEs} from \\emph{Xanthomonas} genomic sequences},\n\tauthor = {Grau, Jan and Reschke, Maik and Erkes, Annett and Streubel, Jana and Morgan, Richard D. and Wilson, Geoffrey G. and Koebnik, Ralf and Boch, Jens},\n\tjournal = {Scientific Reports},\n\tyear = {2016},\n\tvolume = {6},\n\tpages = {21077},\n\tdoi = {10.1038/srep21077}\n\t}\n"};
    }
}
