/*
 * Decompiled with CFR 0.152.
 */
package projects.tals;

import de.jstacs.DataType;
import de.jstacs.algorithms.optimization.termination.SmallDifferenceOfFunctionEvaluationsCondition;
import de.jstacs.data.AlphabetContainer;
import de.jstacs.data.DNADataSet;
import de.jstacs.data.DataSet;
import de.jstacs.data.alphabets.Alphabet;
import de.jstacs.data.alphabets.DiscreteAlphabet;
import de.jstacs.data.sequences.Sequence;
import de.jstacs.data.sequences.annotation.ReferenceSequenceAnnotationParser;
import de.jstacs.data.sequences.annotation.SequenceAnnotation;
import de.jstacs.io.FileManager;
import de.jstacs.io.XMLParser;
import de.jstacs.parameters.FileParameter;
import de.jstacs.parameters.ParameterSet;
import de.jstacs.parameters.SelectionParameter;
import de.jstacs.parameters.SimpleParameterSet;
import de.jstacs.results.CategoricalResult;
import de.jstacs.results.DataSetResult;
import de.jstacs.results.ImageResult;
import de.jstacs.results.ListResult;
import de.jstacs.results.Result;
import de.jstacs.results.ResultSet;
import de.jstacs.results.StorableResult;
import de.jstacs.sequenceScores.statisticalModels.differentiable.AbstractDifferentiableStatisticalModel;
import de.jstacs.sequenceScores.statisticalModels.trainable.DifferentiableStatisticalModelWrapperTrainSM;
import de.jstacs.tools.ui.galaxy.GalaxyAdaptor;
import de.jstacs.utils.Normalisation;
import de.jstacs.utils.PFMComparator;
import de.jstacs.utils.Pair;
import de.jstacs.utils.SeqLogoPlotter;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.util.Arrays;
import java.util.Comparator;
import projects.tals.TALgetter13DiffSM;
import projects.tals.TALgetterDiffSM;
import projects.tals.TBSScanner;

public class ScanForTBSWeb {
    public static void main(String[] args) throws Exception {
        TBSScanner.TBSScannerParameterSet params = new TBSScanner.TBSScannerParameterSet();
        SelectionParameter mod2 = new SelectionParameter(DataType.INT, new String[]{"TALgetter", "TALgetter13"}, new Integer[]{1, 2}, "Model type", "TALgetter is the default model that uses individual binding specificities for each RVD. TALgetter13 uses binding specificities that only depend on amino acid 13, i.e., the second amino acid of the repat.While TALgetter is recommended in most cases, the use of TALgetter13 may be beneficial if you search for target sites of TAL effector with many rare RVDs, for instance YG, HH, or S*.", true);
        mod2.setDefault("TALgetter");
        SelectionParameter train = new SelectionParameter(DataType.PARAMETERSET, new String[]{"Use standard model", "Use previously trained model", "Train model on training data"}, new ParameterSet[]{new SimpleParameterSet(mod2), new SimpleParameterSet(new FileParameter("Model", "Choose a TALgetter model from your history", "xml", true)), new SimpleParameterSet(mod2, new FileParameter("Training data", "The training data, annotated FastA format. The required format is described in the help section, where we also provide the data set used to train the default models.", "fasta", true))}, "Model training", "You can either use the standard TALgetter model, re-use a TALgetter model that has already been trained on given pairs of TAL effectors and target sites, or you can provide your own training data. ", true);
        params.addParameter(params.getNumberOfParameters(), train);
        boolean[] blArray = new boolean[8];
        blArray[0] = true;
        blArray[1] = true;
        blArray[2] = true;
        blArray[5] = true;
        blArray[6] = true;
        boolean[] line = blArray;
        SimpleParameterSet ps = new SimpleParameterSet(params.getAllParameters());
        GalaxyAdaptor ga = new GalaxyAdaptor(ps, null, line, "TALgetter", "TALgetter (TAL effector target site finder) is a tool for predicting target sites given the RVD sequence of a TAL effector.", "1.1", "java -Xms256M -Xmx2G -jar " + System.getProperty("user.dir") + System.getProperty("file.separator") + "TALgetterWeb.jar", "jobname");
        ga.setHelp(FileManager.readInputStream(ScanForTBSWeb.class.getClassLoader().getResourceAsStream("projects/tals/help.txt")).toString());
        if (!ga.parse(args, false)) {
            System.exit(1);
        }
        AbstractDifferentiableStatisticalModel model = null;
        if (((SelectionParameter)params.getParameterForName("Model training")).getSelected() != 1) {
            mod2 = (SelectionParameter)((SimpleParameterSet)params.getParameterForName("Model training").getValue()).getParameterAt(0);
            model = mod2.getValue().equals(1) ? (AbstractDifferentiableStatisticalModel)XMLParser.extractObjectForTags(FileManager.readInputStream(ScanForTBSWeb.class.getClassLoader().getResourceAsStream("projects/tals/talfinder_obg2_hyp_bg.xml")), "model") : (AbstractDifferentiableStatisticalModel)XMLParser.extractObjectForTags(FileManager.readInputStream(ScanForTBSWeb.class.getClassLoader().getResourceAsStream("projects/tals/talfinder_obg2_hyp_bg_map.xml")), "model");
        }
        GalaxyAdaptor.Protocol prot = ga.getProtocol(false);
        ByteArrayOutputStream baos = prot.getOutputStream();
        String[] alph = new String[]{"NI", "NG", "NN", "NS", "N*", "ND", "NK", "NC", "NV", "NA", "NH", "HD", "HG", "HA", "H*", "HH", "HI", "HN", "S*", "SN", "SS", "IG", "YG", "NP", "NT", "IS"};
        AlphabetContainer alphabetsRVD = new AlphabetContainer((Alphabet)new DiscreteAlphabet(true, alph));
        if (((SelectionParameter)params.getParameterForName("Model training")).getSelected() == 1) {
            FileParameter fp = (FileParameter)((SimpleParameterSet)params.getParameterForName("Model training").getValue()).getParameterAt(0);
            String fn = (String)fp.getValue();
            StringBuffer sb = FileManager.readFile(fn);
            model = new TALgetterDiffSM(sb);
        } else if (((SelectionParameter)params.getParameterForName("Model training")).getSelected() == 2) {
            String filename = (String)((SimpleParameterSet)params.getParameterForName("Model training").getValue()).getParameterAt(1).getValue();
            DNADataSet trainDs = new DNADataSet(filename, '>', new ReferenceSequenceAnnotationParser("seq", alphabetsRVD, ":", ";", "-"));
            double[] weights = new double[trainDs.getNumberOfElements()];
            Arrays.fill(weights, 1.0);
            int i = 0;
            while (i < weights.length) {
                Sequence seq = trainDs.getElementAt(i);
                SequenceAnnotation ann = seq.getSequenceAnnotationByType("weight", 0);
                if (ann != null) {
                    weights[i] = Double.parseDouble(ann.getIdentifier());
                }
                ++i;
            }
            DifferentiableStatisticalModelWrapperTrainSM trainer = new DifferentiableStatisticalModelWrapperTrainSM(model, 1, 20, new SmallDifferenceOfFunctionEvaluationsCondition(1.0E-12), 1.0E-12, 1.0E-4);
            trainer.setOutputStream(null);
            trainer.train(trainDs, weights);
            model = (AbstractDifferentiableStatisticalModel)trainer.getFunction();
            String[] alph2 = (String[])alph.clone();
            Arrays.sort(alph2, new Comparator<String>(){

                @Override
                public int compare(String o1, String o2) {
                    int c = Character.valueOf(o1.charAt(1)).compareTo(Character.valueOf(o2.charAt(1)));
                    if (c == 0) {
                        return Character.valueOf(o1.charAt(0)).compareTo(Character.valueOf(o2.charAt(0)));
                    }
                    if (o1.charAt(1) == '*') {
                        return 1;
                    }
                    if (o2.charAt(1) == '*') {
                        return -1;
                    }
                    return c;
                }
            });
            StringBuffer modSeq = new StringBuffer();
            int i2 = 0;
            while (i2 < alph2.length) {
                modSeq.append(alph2[i2]);
                if (i2 < alph2.length - 1) {
                    modSeq.append("-");
                }
                ++i2;
            }
            Pair<double[][], double[]> specNImpAll = ((TALgetterDiffSM)model).getSpecificitiesAndImportances(Sequence.create(alphabetsRVD, modSeq.toString(), "-"));
            int height = SeqLogoPlotter.getHeight(750, specNImpAll.getFirstElement());
            BufferedImage img = SeqLogoPlotter.plotTALgetterLogoToBufferedImage(height, specNImpAll.getFirstElement(), specNImpAll.getSecondElement(), ("0-" + modSeq.toString()).split("-"));
            ga.addResult(new ImageResult("Model logo", "Logo plot representing specificities and importances learned from the training data", img), false, true);
            prot.appendHeading("Training TALgetter model");
            prot.append("Trained on " + weights.length + " pairs of TAL effector and target site.<br /><br />");
            prot.append("Model parameters: <br />");
            prot.append(model.toString().replaceAll("\\n", "<br />"));
            prot.append("<br /><br />");
            ga.addResult(new StorableResult("TALgetter model", "TALgetter model", model), true, false);
        }
        ((TALgetterDiffSM)model).fix();
        DataSet ds = params.getInputSequences();
        prot.appendHeading("Search");
        prot.append("Searching for targets of the TAL effector with RVD sequence<br />");
        prot.append("    " + params.getTALSequence() + ".<br />" + "using " + (model instanceof TALgetter13DiffSM ? "TALgetter13" : "TALgetter") + ".<br /><br />");
        prot.append("Reporting at most " + params.getN() + " target sites in " + ds.getNumberOfElements() + " input sequences.<br /><br />");
        TBSScanner.ResultList[] rls = TBSScanner.scan((TALgetterDiffSM)model, params, ds);
        ga.addResult(new ListResult("Description of output columns", "The output can also be downloaded as a tab-separated file.", null, new ResultSet(new Result[][]{{new CategoricalResult("Column", "", "ID"), new CategoricalResult("Description", "", "The ID of the input sequence as given in FastA header")}}), new ResultSet(new Result[][]{{new CategoricalResult("Column", "", "Position"), new CategoricalResult("Description", "", "Position in the given sequence")}}), new ResultSet(new Result[][]{{new CategoricalResult("Column", "", "Distance to end"), new CategoricalResult("Description", "", "The distance to the right end of the sequence")}}), new ResultSet(new Result[][]{{new CategoricalResult("Column", "", "Sequence"), new CategoricalResult("Description", "", "The sequence of the target site")}}), new ResultSet(new Result[][]{{new CategoricalResult("Column", "", "Matches"), new CategoricalResult("Description", "", "Categories of matches per position: M/m first position match/mismatch; | match; : weak match; x mismatch")}}), new ResultSet(new Result[][]{{new CategoricalResult("Column", "", "Score"), new CategoricalResult("Description", "", "Score returned by the model")}}), new ResultSet(new Result[][]{{new CategoricalResult("Column", "", "Strand"), new CategoricalResult("Description", "", "Strand of the predicted target site")}}), new ResultSet(new Result[][]{{new CategoricalResult("Column", "", "p-value"), new CategoricalResult("Description", "", "Empirical p-value of the score")}}), new ResultSet(new Result[][]{{new CategoricalResult("Column", "", "E-value"), new CategoricalResult("Description", "", "Empirical E-value of the score")}})), false, true);
        DataSet bs = rls[0].getBindingSites();
        ResultSet[] res = rls[0].toArray();
        if (res.length > 0) {
            ga.addResult(new ListResult("Predictions - tabular", "", null, res), true, false);
        }
        ga.addResult(new ListResult("Predictions", "The predictions are also available as a tab-seperated file and as a FastA-file containing all predicted target sites from your history.", null, rls[1].toArray()), false, true);
        if (res.length > 0) {
            ga.addResult(new DataSetResult("Binding sites - FastA", "The top " + res.length + " binding sites", bs), true, false);
        }
        Pair<double[][], double[]> specNImp = ((TALgetterDiffSM)model).getSpecificitiesAndImportances(Sequence.create(alphabetsRVD, params.getTALSequence(), "-"));
        int height = SeqLogoPlotter.getHeight(750, specNImp.getFirstElement());
        BufferedImage img = SeqLogoPlotter.plotTALgetterLogoToBufferedImage(height, specNImp.getFirstElement(), specNImp.getSecondElement(), ("0-" + params.getTALSequence()).split("-"));
        ga.addResult(new ImageResult("Theoretical target site logo", "Logo plot representing specificities and importances for the input TAL effector according to model parameters", img), false, true);
        if (res.length > 0) {
            double[][] pfm = PFMComparator.getPFM(bs);
            int i = 0;
            while (i < pfm.length) {
                Normalisation.sumNormalisation(pfm[i]);
                ++i;
            }
            BufferedImage bsLogo = SeqLogoPlotter.plotLogoToBufferedImage(height, pfm);
            ga.addResult(new ImageResult("Predicted target site logo", "Sequence logo of the predicted target sites, depends on the threshold on the p-values and the maximum number of reported target sites", bsLogo), false, true);
        }
        prot.appendHeading("Finished...<br /><br />");
        prot.appendHeading("Result");
        prot.append("Found " + rls[0].getNumberOfResults() + " target sites with scores between " + rls[0].getBestScore() + " and " + rls[0].getWorstScore() + (params.computePValues() != TBSScanner.PVals.NONE && params.getPValue() > 0.0 ? " with a p-Value of at most " + params.getPValue() : "") + ".");
        ga.writeOutput();
    }
}

