package projects.motifComp;

import de.jstacs.DataType;
import de.jstacs.clustering.distances.DeBruijnMotifComparison;
import de.jstacs.clustering.hierachical.ClusterTree;
import de.jstacs.data.DataSet;
import de.jstacs.data.alphabets.DNAAlphabetContainer;
import de.jstacs.data.sequences.annotation.SequenceAnnotationParser;
import de.jstacs.io.FileManager;
import de.jstacs.io.NonParsableException;
import de.jstacs.io.SparseStringExtractor;
import de.jstacs.io.XMLParser;
import de.jstacs.parameters.FileParameter;
import de.jstacs.parameters.ParameterSet;
import de.jstacs.parameters.SelectionParameter;
import de.jstacs.parameters.SimpleParameter;
import de.jstacs.parameters.SimpleParameterSet;
import de.jstacs.parameters.validation.NumberValidator;
import de.jstacs.results.CategoricalResult;
import de.jstacs.results.ListResult;
import de.jstacs.results.PlotGeneratorResult;
import de.jstacs.results.Result;
import de.jstacs.results.ResultSet;
import de.jstacs.results.ResultSetResult;
import de.jstacs.sequenceScores.statisticalModels.StatisticalModel;
import de.jstacs.sequenceScores.statisticalModels.trainable.PFMWrapperTrainSM;
import de.jstacs.tools.JstacsTool;
import de.jstacs.tools.ProgressUpdater;
import de.jstacs.tools.Protocol;
import de.jstacs.tools.ToolResult;
import de.jstacs.tools.ui.galaxy.Galaxy;
import de.jstacs.tools.ui.galaxy.MultilineSimpleParameter;
import de.jstacs.utils.ComparableElement;
import de.jstacs.utils.PFMComparator;
import de.jstacs.utils.Pair;
import de.jstacs.utils.SeqLogoPlotter;
import de.jstacs.utils.ToolBox;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.text.DecimalFormat;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedList;
import org.apache.batik.svggen.SVGSyntax;
import org.apache.batik.util.XMLConstants;
import projects.motifComp.MotifTreePlotter;

/* loaded from: input_file:projects/dimont/DimontGenomeScan.jar:projects/motifComp/FindPWMsAndClusters.class */
public class FindPWMsAndClusters implements JstacsTool {
    private static int n = 8;
    private ClusterTree<StatisticalModel>[] trees = (ClusterTree[]) XMLParser.extractObjectForTags(FileManager.readInputStream(FindPWMsAndClusters.class.getClassLoader().getResourceAsStream("projects/motifComp/data/clusters.xml")), "trees");
    private HashMap<String, String[]> expMap;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:projects/dimont/DimontGenomeScan.jar:projects/motifComp/FindPWMsAndClusters$MotifMatch.class */
    public static class MotifMatch {
        private ClusterTree<StatisticalModel> tree;
        private ComparableElement<String, Double>[] matches;

        public MotifMatch(ClusterTree<StatisticalModel> clusterTree, ComparableElement<String, Double>[] comparableElementArr) {
            this.tree = clusterTree;
            this.matches = (ComparableElement[]) comparableElementArr.clone();
            Arrays.sort(this.matches);
        }
    }

    public static void main(String[] strArr) throws Exception {
        new Galaxy(" -Xms512M -Xmx2G", false, new FindPWMsAndClusters()).run(strArr);
    }

    public FindPWMsAndClusters() throws IOException, NonParsableException {
        StringBuffer readInputStream = FileManager.readInputStream(FindPWMsAndClusters.class.getClassLoader().getResourceAsStream("projects/motifComp/data/encode_ids.txt"));
        this.expMap = new HashMap<>();
        String[] split = readInputStream.toString().split("\n");
        for (int i = 1; i < split.length; i++) {
            String[] split2 = split[i].split("\t");
            this.expMap.put(split2[0], split2);
        }
    }

    private StatisticalModel getModel(DataSet dataSet) throws CloneNotSupportedException {
        return new PFMWrapperTrainSM(DNAAlphabetContainer.SINGLETON, "motif estimated from data set", PFMComparator.getPFM(dataSet), 4.0d);
    }

    /* JADX WARN: Type inference failed for: r2v1, types: [de.jstacs.results.Result[], de.jstacs.results.Result[][]] */
    /* JADX WARN: Type inference failed for: r8v1, types: [de.jstacs.results.Result[], de.jstacs.results.Result[][]] */
    private ResultSet find(double d, Pair<String, double[]>[] pairArr, LinkedList<double[][]> linkedList) throws Exception {
        LinkedList linkedList2 = new LinkedList();
        for (int i = 0; i < pairArr.length; i++) {
            LinkedList<Result> find = find(d, pairArr[i].getFirstElement(), pairArr[i].getSecondElement());
            if (linkedList.get(i) != null) {
                find.addFirst(new PlotGeneratorResult("Motif", "Sequence logo of " + pairArr[i].getFirstElement(), new SeqLogoPlotter.SeqLogoPlotGenerator(linkedList.get(i), 200), true));
            }
            linkedList2.add(new ResultSetResult("Matches for " + pairArr[i].getFirstElement(), "", null, new ResultSet((Result[][]) new Result[]{(Result[]) find.toArray(new Result[0])})));
        }
        return new ResultSet((Result[][]) new Result[]{(Result[]) linkedList2.toArray(new Result[0])});
    }

    /* JADX WARN: Type inference failed for: r2v10, types: [de.jstacs.results.Result[], de.jstacs.results.Result[][]] */
    /* JADX WARN: Type inference failed for: r7v3, types: [de.jstacs.results.Result[], de.jstacs.results.Result[][]] */
    private LinkedList<Result> find(double d, String str, double[] dArr) throws Exception {
        double length = dArr.length;
        int round = (int) Math.round(Math.log(length) / Math.log(4.0d));
        if (Math.pow(4.0d, round) != length) {
            throw new RuntimeException();
        }
        LinkedList linkedList = new LinkedList();
        for (int i = 0; i < this.trees.length; i++) {
            StatisticalModel[] clusterElements = this.trees[i].getClusterElements();
            double d2 = 0.0d;
            LinkedList linkedList2 = new LinkedList();
            for (int i2 = 0; i2 < clusterElements.length; i2++) {
                double max = Math.max(DeBruijnMotifComparison.compare(DeBruijnMotifComparison.getProfilesForMotif(clusterElements[i2], round, false, false)[0], dArr, clusterElements[i2].getLength()).getSecondElement().doubleValue(), DeBruijnMotifComparison.compare(DeBruijnMotifComparison.getProfilesForMotif(clusterElements[i2], round, true, false)[0], dArr, clusterElements[i2].getLength()).getSecondElement().doubleValue());
                if (max > d) {
                    linkedList2.add(new ComparableElement(clusterElements[i2].toString(), Double.valueOf(max)));
                }
                d2 += max;
            }
            double length2 = d2 / clusterElements.length;
            if (linkedList2.size() > 0) {
                linkedList.add(new ComparableElement(new MotifMatch(this.trees[i], (ComparableElement[]) linkedList2.toArray(new ComparableElement[0])), Double.valueOf(length2)));
            }
        }
        ComparableElement[] comparableElementArr = (ComparableElement[]) linkedList.toArray(new ComparableElement[0]);
        Arrays.sort(comparableElementArr);
        LinkedList<Result> linkedList3 = new LinkedList<>();
        for (int length3 = comparableElementArr.length - 1; length3 >= 0; length3--) {
            PlotGeneratorResult plotGeneratorResult = new PlotGeneratorResult("Cluster tree", "", new MotifTreePlotter.MotifTreePlotGenerator(((MotifMatch) comparableElementArr[length3].getElement()).tree, 100, round), true);
            LinkedList linkedList4 = new LinkedList();
            for (int length4 = ((MotifMatch) comparableElementArr[length3].getElement()).matches.length - 1; length4 >= 0; length4--) {
                String str2 = (String) ((MotifMatch) comparableElementArr[length3].getElement()).matches[length4].getElement();
                String format = DecimalFormat.getInstance().format(((Double) ((MotifMatch) comparableElementArr[length3].getElement()).matches[length4].getWeight()).doubleValue());
                String substring = str2.substring(str2.indexOf(SVGSyntax.OPEN_PARENTHESIS) + 1);
                String[] strArr = this.expMap.get(substring.substring(0, substring.indexOf("-")));
                linkedList4.add(new ResultSet((Result[][]) new Result[]{new Result[]{new CategoricalResult("Motif", "", str2), new CategoricalResult("Similarity", "", format), new CategoricalResult("Type", "", strArr[1]), new CategoricalResult("Target", "", strArr[2]), new CategoricalResult("Description", "", strArr[3]), new CategoricalResult("Lab", "", strArr[4]), new CategoricalResult("Link", "", "<a href=\"" + strArr[5] + "\" target=\"_blank\">" + strArr[5] + "</a>")}}));
            }
            linkedList3.add(new ResultSetResult("Matching cluster " + (comparableElementArr.length - length3), "", null, new ResultSet((Result[][]) new Result[]{new Result[]{plotGeneratorResult, new ListResult("List of motif matches", "", null, (ResultSet[]) linkedList4.toArray(new ResultSet[0]))}})));
        }
        return linkedList3;
    }

    @Override // de.jstacs.tools.JstacsTool
    public ParameterSet getToolParameters() {
        try {
            return new SimpleParameterSet(new SelectionParameter(DataType.PARAMETERSET, new String[]{"PFMs/PWMs", "Binding sites", "Score profiles"}, new ParameterSet[]{new SimpleParameterSet(new MultilineSimpleParameter("PWMs/PFMs", "PWMs/PFMs in Jaspar format. PWMs contain nucleotide probabilities and PFMs contain nucleotide frequencies.", true)), new SimpleParameterSet(new FileParameter("Data set", "A data set of aligned binding sites", "fasta", true)), new SimpleParameterSet(new FileParameter("Score profiles", "Score profiles in pseudo-FastA format", "txt", true))}, "Motif source", "You may specify the query motifs as PWMs/PFMs/PSSMs, by a data set of aligned binding sites that are used to build a PWM, or by a score profile computed on one of the supplied de Bruijn sequences.", true), new SimpleParameter(DataType.DOUBLE, "Similarity threshold", "The threshold on the correlation of score profiles, between 0.5 and 1.0.", true, new NumberValidator(Double.valueOf(0.5d), Double.valueOf(1.0d)), Double.valueOf(0.9d)));
        } catch (Exception e) {
            e.printStackTrace();
            throw new RuntimeException();
        }
    }

    @Override // de.jstacs.tools.JstacsTool
    public ToolResult run(ParameterSet parameterSet, Protocol protocol, ProgressUpdater progressUpdater, int i) throws Exception {
        LinkedList<double[][]> linkedList = new LinkedList<>();
        Pair<String, double[]>[] profiles = getProfiles(parameterSet, protocol, linkedList);
        double doubleValue = ((Double) parameterSet.getParameterAt(1).getValue()).doubleValue();
        protocol.append("Searching for matches of " + profiles.length + " motifs.\n");
        ResultSet find = find(doubleValue, profiles, linkedList);
        protocol.append("Finished.\n");
        return new ToolResult("Motif matches", "", null, find, parameterSet, getToolName(), new Date());
    }

    private Pair<String, double[]>[] getProfiles(ParameterSet parameterSet, Protocol protocol, LinkedList<double[][]> linkedList) throws Exception {
        ArrayList<AbstractMap.SimpleEntry<String, double[][]>> readPFMsFromJasparFastA;
        SelectionParameter selectionParameter = (SelectionParameter) parameterSet.getParameterAt(0);
        if (selectionParameter.getSelected() == 2) {
            String[] split = ((FileParameter) ((ParameterSet) selectionParameter.getValue()).getParameterAt(0)).getFileContents().getContent().split("\n");
            String str = null;
            LinkedList linkedList2 = new LinkedList();
            for (int i = 0; i < split.length; i++) {
                if (split[i].startsWith(XMLConstants.XML_CLOSE_TAG_END)) {
                    str = split[i].substring(1).trim();
                } else {
                    String[] split2 = split[i].split("\\s");
                    double[] dArr = new double[split2.length];
                    for (int i2 = 0; i2 < dArr.length; i2++) {
                        dArr[i2] = Double.parseDouble(split2[i2]);
                    }
                    linkedList2.add(new Pair(str, dArr));
                }
            }
            linkedList.add(null);
            protocol.append("Loaded " + linkedList2.size() + " score profiles.\n");
            return (Pair[]) linkedList2.toArray(new Pair[0]);
        }
        if (selectionParameter.getSelected() != 0 && selectionParameter.getSelected() != 1) {
            return null;
        }
        if (selectionParameter.getSelected() == 1) {
            readPFMsFromJasparFastA = new ArrayList<>();
            DataSet dataSet = new DataSet(DNAAlphabetContainer.SINGLETON, new SparseStringExtractor((Reader) new StringReader(((FileParameter) ((ParameterSet) selectionParameter.getValue()).getParameterAt(0)).getFileContents().getContent()), '>', "", (SequenceAnnotationParser) null));
            readPFMsFromJasparFastA.add(new AbstractMap.SimpleEntry<>("PFM build from data", PFMComparator.getPFM(dataSet)));
            protocol.append("Built PWM from " + dataSet.getNumberOfElements() + " sequences of length " + dataSet.getElementLength() + ".\n");
        } else {
            readPFMsFromJasparFastA = PFMComparator.readPFMsFromJasparFastA(new BufferedReader(new StringReader((String) ((ParameterSet) selectionParameter.getValue()).getParameterAt(0).getValue())));
            protocol.append("Loaded " + readPFMsFromJasparFastA.size() + " PWMs/PFM from Jaspar format.\n");
        }
        Pair<String, double[]>[] pairArr = new Pair[readPFMsFromJasparFastA.size()];
        for (int i3 = 0; i3 < pairArr.length; i3++) {
            String key = readPFMsFromJasparFastA.get(i3).getKey();
            double[][] value = readPFMsFromJasparFastA.get(i3).getValue();
            boolean z = true;
            int i4 = 0;
            while (true) {
                if (i4 >= value.length) {
                    break;
                }
                if (ToolBox.sum(value[i4]) > 1.000001d) {
                    z = false;
                    break;
                }
                i4++;
            }
            PFMWrapperTrainSM pFMWrapperTrainSM = new PFMWrapperTrainSM(DNAAlphabetContainer.SINGLETON, key, value, z ? 0.0d : 4.0d);
            linkedList.add(pFMWrapperTrainSM.getPWM());
            double[] dArr2 = DeBruijnMotifComparison.getProfilesForMotif((StatisticalModel) pFMWrapperTrainSM, n, false, false)[0];
            protocol.append("Determined score profile for " + key + ".\n");
            pairArr[i3] = new Pair<>(key, dArr2);
        }
        return pairArr;
    }

    @Override // de.jstacs.tools.JstacsTool
    public String getToolName() {
        return "DBcorrDB";
    }

    @Override // de.jstacs.tools.JstacsTool
    public String getShortName() {
        return "dbcorrdb";
    }

    @Override // de.jstacs.tools.JstacsTool
    public String getDescription() {
        return "search a data base of motifs by similarity of score profiles on de Bruijn sequences";
    }

    @Override // de.jstacs.tools.JstacsTool
    public String getHelpText() {
        try {
            return FileManager.readInputStream(FindPWMsAndClusters.class.getClassLoader().getResourceAsStream("projects/motifComp/FindPWMsAndClusters.txt")).toString();
        } catch (IOException e) {
            e.printStackTrace();
            return "";
        }
    }

    @Override // de.jstacs.tools.JstacsTool
    public String getToolVersion() {
        return "1.0";
    }

    @Override // de.jstacs.tools.JstacsTool
    public JstacsTool.ResultEntry[] getDefaultResultInfos() {
        return null;
    }
}
