package projects.tals;

import de.jstacs.DataType;
import de.jstacs.algorithms.optimization.termination.SmallDifferenceOfFunctionEvaluationsCondition;
import de.jstacs.data.AlphabetContainer;
import de.jstacs.data.DNADataSet;
import de.jstacs.data.DataSet;
import de.jstacs.data.alphabets.DiscreteAlphabet;
import de.jstacs.data.sequences.Sequence;
import de.jstacs.data.sequences.annotation.ReferenceSequenceAnnotationParser;
import de.jstacs.data.sequences.annotation.SequenceAnnotation;
import de.jstacs.io.FileManager;
import de.jstacs.io.XMLParser;
import de.jstacs.parameters.FileParameter;
import de.jstacs.parameters.ParameterSet;
import de.jstacs.parameters.SelectionParameter;
import de.jstacs.parameters.SimpleParameterSet;
import de.jstacs.results.CategoricalResult;
import de.jstacs.results.DataSetResult;
import de.jstacs.results.ImageResult;
import de.jstacs.results.ListResult;
import de.jstacs.results.Result;
import de.jstacs.results.ResultSet;
import de.jstacs.results.StorableResult;
import de.jstacs.sequenceScores.statisticalModels.differentiable.AbstractDifferentiableStatisticalModel;
import de.jstacs.sequenceScores.statisticalModels.trainable.DifferentiableStatisticalModelWrapperTrainSM;
import de.jstacs.tools.ui.galaxy.GalaxyAdaptor;
import de.jstacs.utils.Normalisation;
import de.jstacs.utils.PFMComparator;
import de.jstacs.utils.Pair;
import de.jstacs.utils.SeqLogoPlotter;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.batik.util.XMLConstants;
import org.biojavax.bio.seq.Position;
import projects.tals.TBSScanner;

/* loaded from: input_file:projects/tals/ScanForTBSWeb.class */
public class ScanForTBSWeb {
    /* JADX WARN: Type inference failed for: r11v11, types: [de.jstacs.results.Result[], de.jstacs.results.Result[][]] */
    /* JADX WARN: Type inference failed for: r11v13, types: [de.jstacs.results.Result[], de.jstacs.results.Result[][]] */
    /* JADX WARN: Type inference failed for: r11v15, types: [de.jstacs.results.Result[], de.jstacs.results.Result[][]] */
    /* JADX WARN: Type inference failed for: r11v17, types: [de.jstacs.results.Result[], de.jstacs.results.Result[][]] */
    /* JADX WARN: Type inference failed for: r11v19, types: [de.jstacs.results.Result[], de.jstacs.results.Result[][]] */
    /* JADX WARN: Type inference failed for: r11v21, types: [de.jstacs.results.Result[], de.jstacs.results.Result[][]] */
    /* JADX WARN: Type inference failed for: r11v5, types: [de.jstacs.results.Result[], de.jstacs.results.Result[][]] */
    /* JADX WARN: Type inference failed for: r11v7, types: [de.jstacs.results.Result[], de.jstacs.results.Result[][]] */
    /* JADX WARN: Type inference failed for: r11v9, types: [de.jstacs.results.Result[], de.jstacs.results.Result[][]] */
    public static void main(String[] strArr) throws Exception {
        TBSScanner.TBSScannerParameterSet tBSScannerParameterSet = new TBSScanner.TBSScannerParameterSet();
        SelectionParameter selectionParameter = new SelectionParameter(DataType.INT, new String[]{"TALgetter", "TALgetter13"}, new Integer[]{1, 2}, "Model type", "TALgetter is the default model that uses individual binding specificities for each RVD. TALgetter13 uses binding specificities that only depend on amino acid 13, i.e., the second amino acid of the repat.While TALgetter is recommended in most cases, the use of TALgetter13 may be beneficial if you search for target sites of TAL effector with many rare RVDs, for instance YG, HH, or S*.", true);
        selectionParameter.setDefault("TALgetter");
        tBSScannerParameterSet.addParameter(tBSScannerParameterSet.getNumberOfParameters(), new SelectionParameter(DataType.PARAMETERSET, new String[]{"Use standard model", "Use previously trained model", "Train model on training data"}, new ParameterSet[]{new SimpleParameterSet(selectionParameter), new SimpleParameterSet(new FileParameter("Model", "Choose a TALgetter model from your history", "xml", true)), new SimpleParameterSet(selectionParameter, new FileParameter("Training data", "The training data, annotated FastA format. The required format is described in the help section, where we also provide the data set used to train the default models.", "fasta", true))}, "Model training", "You can either use the standard TALgetter model, re-use a TALgetter model that has already been trained on given pairs of TAL effectors and target sites, or you can provide your own training data. ", true));
        GalaxyAdaptor galaxyAdaptor = new GalaxyAdaptor(new SimpleParameterSet(tBSScannerParameterSet.getAllParameters()), null, new boolean[]{true, true, true, false, false, true, true}, "TALgetter", "TALgetter (TAL effector target site finder) is a tool for predicting target sites given the RVD sequence of a TAL effector.", XMLConstants.XML_VERSION_11, "java -Xms256M -Xmx2G -jar " + System.getProperty("user.dir") + System.getProperty("file.separator") + "TALgetterWeb.jar", "jobname");
        galaxyAdaptor.setHelp(FileManager.readInputStream(ScanForTBSWeb.class.getClassLoader().getResourceAsStream("projects/tals/help.txt")).toString());
        if (!galaxyAdaptor.parse(strArr, false)) {
            System.exit(1);
        }
        AbstractDifferentiableStatisticalModel abstractDifferentiableStatisticalModel = ((SelectionParameter) tBSScannerParameterSet.getParameterForName("Model training")).getSelected() != 1 ? ((SelectionParameter) ((SimpleParameterSet) tBSScannerParameterSet.getParameterForName("Model training").getValue()).getParameterAt(0)).getValue().equals(1) ? (AbstractDifferentiableStatisticalModel) XMLParser.extractObjectForTags(FileManager.readInputStream(ScanForTBSWeb.class.getClassLoader().getResourceAsStream("projects/tals/talfinder_obg2_hyp_bg.xml")), "model") : (AbstractDifferentiableStatisticalModel) XMLParser.extractObjectForTags(FileManager.readInputStream(ScanForTBSWeb.class.getClassLoader().getResourceAsStream("projects/tals/talfinder_obg2_hyp_bg_map.xml")), "model") : null;
        GalaxyAdaptor.Protocol protocol = galaxyAdaptor.getProtocol(false);
        protocol.getOutputStream();
        String[] strArr2 = {"NI", "NG", "NN", "NS", "N*", "ND", "NK", "NC", "NV", "NA", "NH", "HD", "HG", "HA", "H*", "HH", "HI", "HN", "S*", "SN", "SS", "IG", "YG", "NP", "NT", "IS"};
        AlphabetContainer alphabetContainer = new AlphabetContainer(new DiscreteAlphabet(true, strArr2));
        if (((SelectionParameter) tBSScannerParameterSet.getParameterForName("Model training")).getSelected() == 1) {
            abstractDifferentiableStatisticalModel = new TALgetterDiffSM(FileManager.readFile((String) ((FileParameter) ((SimpleParameterSet) tBSScannerParameterSet.getParameterForName("Model training").getValue()).getParameterAt(0)).getValue()));
        } else if (((SelectionParameter) tBSScannerParameterSet.getParameterForName("Model training")).getSelected() == 2) {
            DNADataSet dNADataSet = new DNADataSet((String) ((SimpleParameterSet) tBSScannerParameterSet.getParameterForName("Model training").getValue()).getParameterAt(1).getValue(), '>', new ReferenceSequenceAnnotationParser("seq", alphabetContainer, ":", XMLConstants.XML_CHAR_REF_SUFFIX, "-"));
            double[] dArr = new double[dNADataSet.getNumberOfElements()];
            Arrays.fill(dArr, 1.0d);
            for (int i = 0; i < dArr.length; i++) {
                SequenceAnnotation sequenceAnnotationByType = dNADataSet.getElementAt(i).getSequenceAnnotationByType("weight", 0);
                if (sequenceAnnotationByType != null) {
                    dArr[i] = Double.parseDouble(sequenceAnnotationByType.getIdentifier());
                }
            }
            DifferentiableStatisticalModelWrapperTrainSM differentiableStatisticalModelWrapperTrainSM = new DifferentiableStatisticalModelWrapperTrainSM(abstractDifferentiableStatisticalModel, 1, (byte) 20, new SmallDifferenceOfFunctionEvaluationsCondition(1.0E-12d), 1.0E-12d, 1.0E-4d);
            differentiableStatisticalModelWrapperTrainSM.setOutputStream(null);
            differentiableStatisticalModelWrapperTrainSM.train(dNADataSet, dArr);
            abstractDifferentiableStatisticalModel = (AbstractDifferentiableStatisticalModel) differentiableStatisticalModelWrapperTrainSM.getFunction();
            String[] strArr3 = (String[]) strArr2.clone();
            Arrays.sort(strArr3, new Comparator<String>() { // from class: projects.tals.ScanForTBSWeb.1
                @Override // java.util.Comparator
                public int compare(String str, String str2) {
                    int compareTo = Character.valueOf(str.charAt(1)).compareTo(Character.valueOf(str2.charAt(1)));
                    if (compareTo == 0) {
                        return Character.valueOf(str.charAt(0)).compareTo(Character.valueOf(str2.charAt(0)));
                    }
                    if (str.charAt(1) == '*') {
                        return 1;
                    }
                    if (str2.charAt(1) == '*') {
                        return -1;
                    }
                    return compareTo;
                }
            });
            StringBuffer stringBuffer = new StringBuffer();
            for (int i2 = 0; i2 < strArr3.length; i2++) {
                stringBuffer.append(strArr3[i2]);
                if (i2 < strArr3.length - 1) {
                    stringBuffer.append("-");
                }
            }
            Pair<double[][], double[]> specificitiesAndImportances = ((TALgetterDiffSM) abstractDifferentiableStatisticalModel).getSpecificitiesAndImportances(Sequence.create(alphabetContainer, stringBuffer.toString(), "-"));
            galaxyAdaptor.addResult(new ImageResult("Model logo", "Logo plot representing specificities and importances learned from the training data", SeqLogoPlotter.plotTALgetterLogoToBufferedImage(SeqLogoPlotter.getHeight(750, specificitiesAndImportances.getFirstElement()), specificitiesAndImportances.getFirstElement(), specificitiesAndImportances.getSecondElement(), ("0-" + stringBuffer.toString()).split("-"))), false, true);
            protocol.appendHeading("Training TALgetter model");
            protocol.append("Trained on " + dArr.length + " pairs of TAL effector and target site.<br /><br />");
            protocol.append("Model parameters: <br />");
            protocol.append(abstractDifferentiableStatisticalModel.toString().replaceAll("\\n", "<br />"));
            protocol.append("<br /><br />");
            galaxyAdaptor.addResult(new StorableResult("TALgetter model", "TALgetter model", abstractDifferentiableStatisticalModel), true, false);
        }
        ((TALgetterDiffSM) abstractDifferentiableStatisticalModel).fix();
        DataSet inputSequences = tBSScannerParameterSet.getInputSequences();
        protocol.appendHeading("Search");
        protocol.append("Searching for targets of the TAL effector with RVD sequence<br />");
        protocol.append(XMLConstants.XML_TAB + tBSScannerParameterSet.getTALSequence() + ".<br />using " + (abstractDifferentiableStatisticalModel instanceof TALgetter13DiffSM ? "TALgetter13" : "TALgetter") + ".<br /><br />");
        protocol.append("Reporting at most " + tBSScannerParameterSet.getN() + " target sites in " + inputSequences.getNumberOfElements() + " input sequences.<br /><br />");
        TBSScanner.ResultList[] scan = TBSScanner.scan((TALgetterDiffSM) abstractDifferentiableStatisticalModel, tBSScannerParameterSet, inputSequences);
        galaxyAdaptor.addResult(new ListResult("Description of output columns", "The output can also be downloaded as a tab-separated file.", null, new ResultSet((Result[][]) new Result[]{new Result[]{new CategoricalResult("Column", "", "ID"), new CategoricalResult("Description", "", "The ID of the input sequence as given in FastA header")}}), new ResultSet((Result[][]) new Result[]{new Result[]{new CategoricalResult("Column", "", "Position"), new CategoricalResult("Description", "", "Position in the given sequence")}}), new ResultSet((Result[][]) new Result[]{new Result[]{new CategoricalResult("Column", "", "Distance to end"), new CategoricalResult("Description", "", "The distance to the right end of the sequence")}}), new ResultSet((Result[][]) new Result[]{new Result[]{new CategoricalResult("Column", "", "Sequence"), new CategoricalResult("Description", "", "The sequence of the target site")}}), new ResultSet((Result[][]) new Result[]{new Result[]{new CategoricalResult("Column", "", "Matches"), new CategoricalResult("Description", "", "Categories of matches per position: M/m first position match/mismatch; | match; : weak match; x mismatch")}}), new ResultSet((Result[][]) new Result[]{new Result[]{new CategoricalResult("Column", "", "Score"), new CategoricalResult("Description", "", "Score returned by the model")}}), new ResultSet((Result[][]) new Result[]{new Result[]{new CategoricalResult("Column", "", "Strand"), new CategoricalResult("Description", "", "Strand of the predicted target site")}}), new ResultSet((Result[][]) new Result[]{new Result[]{new CategoricalResult("Column", "", "p-value"), new CategoricalResult("Description", "", "Empirical p-value of the score")}}), new ResultSet((Result[][]) new Result[]{new Result[]{new CategoricalResult("Column", "", "E-value"), new CategoricalResult("Description", "", "Empirical E-value of the score")}})), false, true);
        DataSet bindingSites = scan[0].getBindingSites();
        ResultSet[] array = scan[0].toArray();
        if (array.length > 0) {
            galaxyAdaptor.addResult(new ListResult("Predictions - tabular", "", null, array), true, false);
        }
        galaxyAdaptor.addResult(new ListResult("Predictions", "The predictions are also available as a tab-seperated file and as a FastA-file containing all predicted target sites from your history.", null, scan[1].toArray()), false, true);
        if (array.length > 0) {
            galaxyAdaptor.addResult(new DataSetResult("Binding sites - FastA", "The top " + array.length + " binding sites", bindingSites), true, false);
        }
        Pair<double[][], double[]> specificitiesAndImportances2 = ((TALgetterDiffSM) abstractDifferentiableStatisticalModel).getSpecificitiesAndImportances(Sequence.create(alphabetContainer, tBSScannerParameterSet.getTALSequence(), "-"));
        int height = SeqLogoPlotter.getHeight(750, specificitiesAndImportances2.getFirstElement());
        galaxyAdaptor.addResult(new ImageResult("Theoretical target site logo", "Logo plot representing specificities and importances for the input TAL effector according to model parameters", SeqLogoPlotter.plotTALgetterLogoToBufferedImage(height, specificitiesAndImportances2.getFirstElement(), specificitiesAndImportances2.getSecondElement(), ("0-" + tBSScannerParameterSet.getTALSequence()).split("-"))), false, true);
        if (array.length > 0) {
            double[][] pfm = PFMComparator.getPFM(bindingSites);
            for (double[] dArr2 : pfm) {
                Normalisation.sumNormalisation(dArr2);
            }
            galaxyAdaptor.addResult(new ImageResult("Predicted target site logo", "Sequence logo of the predicted target sites, depends on the threshold on the p-values and the maximum number of reported target sites", SeqLogoPlotter.plotLogoToBufferedImage(height, pfm)), false, true);
        }
        protocol.appendHeading("Finished...<br /><br />");
        protocol.appendHeading("Result");
        protocol.append("Found " + scan[0].getNumberOfResults() + " target sites with scores between " + scan[0].getBestScore() + " and " + scan[0].getWorstScore() + ((tBSScannerParameterSet.computePValues() == TBSScanner.PVals.NONE || tBSScannerParameterSet.getPValue() <= 0.0d) ? "" : " with a p-Value of at most " + tBSScannerParameterSet.getPValue()) + Position.IN_RANGE);
        galaxyAdaptor.writeOutput();
    }
}
