/*
 * Decompiled with CFR 0.152.
 */
package projects.dimont;

import de.jstacs.DataType;
import de.jstacs.Storable;
import de.jstacs.algorithms.optimization.ConstantStartDistance;
import de.jstacs.algorithms.optimization.NegativeDifferentiableFunction;
import de.jstacs.algorithms.optimization.Optimizer;
import de.jstacs.algorithms.optimization.termination.CombinedCondition;
import de.jstacs.algorithms.optimization.termination.IterationCondition;
import de.jstacs.algorithms.optimization.termination.MultipleIterationsCondition;
import de.jstacs.algorithms.optimization.termination.SmallDifferenceOfFunctionEvaluationsCondition;
import de.jstacs.classifiers.differentiableSequenceScoreBased.OptimizableFunction;
import de.jstacs.classifiers.differentiableSequenceScoreBased.gendismix.GenDisMixClassifier;
import de.jstacs.classifiers.differentiableSequenceScoreBased.gendismix.GenDisMixClassifierParameterSet;
import de.jstacs.classifiers.differentiableSequenceScoreBased.gendismix.LearningPrinciple;
import de.jstacs.classifiers.differentiableSequenceScoreBased.gendismix.LogGenDisMixFunction;
import de.jstacs.classifiers.differentiableSequenceScoreBased.logPrior.CompositeLogPrior;
import de.jstacs.classifiers.differentiableSequenceScoreBased.logPrior.LogPrior;
import de.jstacs.data.AlphabetContainer;
import de.jstacs.data.DataSet;
import de.jstacs.data.EmptyDataSetException;
import de.jstacs.data.WrongAlphabetException;
import de.jstacs.data.alphabets.Alphabet;
import de.jstacs.data.alphabets.ContinuousAlphabet;
import de.jstacs.data.alphabets.DNAAlphabet;
import de.jstacs.data.alphabets.DNAAlphabetContainer;
import de.jstacs.data.sequences.ArbitraryFloatSequence;
import de.jstacs.data.sequences.Sequence;
import de.jstacs.data.sequences.SparseSequence;
import de.jstacs.data.sequences.WrongSequenceTypeException;
import de.jstacs.data.sequences.annotation.ReferenceSequenceAnnotation;
import de.jstacs.data.sequences.annotation.SequenceAnnotation;
import de.jstacs.data.sequences.annotation.SequenceAnnotationParser;
import de.jstacs.data.sequences.annotation.SplitSequenceAnnotationParser;
import de.jstacs.io.FileManager;
import de.jstacs.io.SparseStringExtractor;
import de.jstacs.motifDiscovery.MotifDiscoverer;
import de.jstacs.motifDiscovery.MutableMotifDiscoverer;
import de.jstacs.motifDiscovery.MutableMotifDiscovererToolbox;
import de.jstacs.motifDiscovery.SignificantMotifOccurrencesFinder;
import de.jstacs.parameters.FileParameter;
import de.jstacs.parameters.Parameter;
import de.jstacs.parameters.SimpleParameter;
import de.jstacs.parameters.validation.NumberValidator;
import de.jstacs.results.CategoricalResult;
import de.jstacs.results.ListResult;
import de.jstacs.results.NumericalResult;
import de.jstacs.results.PlotGeneratorResult;
import de.jstacs.results.Result;
import de.jstacs.results.ResultSet;
import de.jstacs.results.ResultSetResult;
import de.jstacs.results.TextResult;
import de.jstacs.sequenceScores.differentiable.DifferentiableSequenceScore;
import de.jstacs.sequenceScores.statisticalModels.differentiable.DifferentiableStatisticalModel;
import de.jstacs.sequenceScores.statisticalModels.differentiable.directedGraphicalModels.MarkovModelDiffSM;
import de.jstacs.sequenceScores.statisticalModels.differentiable.homogeneous.HomogeneousMMDiffSM;
import de.jstacs.sequenceScores.statisticalModels.differentiable.homogeneous.UniformHomogeneousDiffSM;
import de.jstacs.tools.JstacsTool;
import de.jstacs.tools.ProgressUpdater;
import de.jstacs.tools.Protocol;
import de.jstacs.tools.ToolParameterSet;
import de.jstacs.tools.ToolResult;
import de.jstacs.utils.ComparableElement;
import de.jstacs.utils.DoubleList;
import de.jstacs.utils.PFMComparator;
import de.jstacs.utils.Pair;
import de.jstacs.utils.SafeOutputStream;
import de.jstacs.utils.SeqLogoPlotter;
import de.jstacs.utils.ToolBox;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import javax.naming.OperationNotSupportedException;
import projects.dimont.AbstractSingleMotifChIPper;
import projects.dimont.HeuristicOneDataSetLogGenDisMixFunction;
import projects.dimont.Interpolation;
import projects.dimont.ThresholdedStrandChIPper;

public class DimontTool
implements JstacsTool {
    private static final double ALPHA = 0.001;

    @Override
    public ToolParameterSet getToolParameters() {
        LinkedList<Parameter> parameters = new LinkedList<Parameter>();
        parameters.add(new FileParameter("Input file", "The file name of the file containing the input sequences in annotated FastA format (see readme)", "fasta,fa,fas", true));
        try {
            parameters.add(new SimpleParameter(DataType.STRING, "Position tag", "The tag for the position information in the FastA-annotation of the input file", true, "peak"));
            parameters.add(new SimpleParameter(DataType.STRING, "Value tag", "The tag for the value information in the FastA-annotation of the input file", true, "signal"));
            parameters.add(new SimpleParameter(DataType.DOUBLE, "Standard deviation", "The standard deviation of the position distribution centered at the position specified by the position tag", true, new NumberValidator<Double>(1.0, 10000.0), 75.0));
            parameters.add(new SimpleParameter(DataType.STRING, "Weighting factor", "The value for weighting the data; either a value between 0 and 1, or a description relative to the standard deviation (e.g. +4sd)", true, "0.2"));
            parameters.add(new SimpleParameter(DataType.INT, "Starts", "The number of pre-optimization runs.", true, new NumberValidator<Integer>(1, 100), 20));
            parameters.add(new SimpleParameter(DataType.INT, "Initial motif width", "The motif width that is used initially, may be adjusted during optimization.", true, new NumberValidator<Integer>(1, 50), 15));
            parameters.add(new SimpleParameter(DataType.INT, "Markov order of motif model", "The Markov order of the model for the motif.", true, new NumberValidator<Integer>(0, 3), 0));
            parameters.add(new SimpleParameter(DataType.INT, "Markov order of background model", "The Markov order of the model for the background sequence and the background sequence, -1 defines uniform distribution.", true, new NumberValidator<Integer>(-1, 5), -1));
            parameters.add(new SimpleParameter(DataType.DOUBLE, "Equivalent sample size", "Reflects the strength of the prior on the model parameters.", true, new NumberValidator<Double>(0.0, Double.POSITIVE_INFINITY), 4.0));
            parameters.add(new SimpleParameter(DataType.BOOLEAN, "Delete BSs from profile", "A switch for deleting binding site positions of discovered motifs from the profile before searching for futher motifs.", true, true));
        }
        catch (Exception e) {
            throw new RuntimeException();
        }
        return new ToolParameterSet(this.getShortName(), parameters.toArray(new Parameter[0]));
    }

    @Override
    public ToolResult run(ToolParameterSet parameters, Protocol protocol, ProgressUpdater progress, int threads) throws Exception {
        double wf;
        progress.setLast(1.0);
        progress.setCurrent(0.0);
        DataSet fgData = SparseSequence.getDataSet((AlphabetContainer)DNAAlphabetContainer.SINGLETON, new SparseStringExtractor(new StringReader(((FileParameter)parameters.getParameterAt(0)).getFileContents().getContent()), '>', "", (SequenceAnnotationParser)new SplitSequenceAnnotationParser(":", ";")));
        String position = parameters.getParameterAt(1).getValue().toString();
        String value = parameters.getParameterAt(2).getValue().toString();
        double sd = (Double)parameters.getParameterAt(3).getValue();
        String weightingFactor = parameters.getParameterAt(4).getValue().toString();
        int restarts = (Integer)parameters.getParameterAt(5).getValue();
        int motifLength = (Integer)parameters.getParameterAt(6).getValue();
        int fgOrder = (Integer)parameters.getParameterAt(7).getValue();
        int bgOrder = (Integer)parameters.getParameterAt(8).getValue();
        double ess = (Double)parameters.getParameterAt(9).getValue();
        boolean delete = (Boolean)parameters.getParameterAt(10).getValue();
        double filterThreshold = 0.3;
        double filterThresholdEnd = 0.3;
        DNAAlphabetContainer con = DNAAlphabetContainer.SINGLETON;
        boolean free = false;
        DataSet[] data = new DataSet[]{fgData};
        Sequence[] annotated = new Sequence[data[0].getNumberOfElements()];
        Object weights = new double[2][data[0].getNumberOfElements()];
        double[] raw = (double[])weights[0].clone();
        double[] mean = new double[weights[0].length];
        Arrays.fill(mean, Double.NaN);
        int j = 0;
        while (j < weights[0].length) {
            annotated[j] = data[0].getElementAt(j);
            SequenceAnnotation[] seqAn = annotated[j].getAnnotation();
            mean[j] = Double.NaN;
            int i = 0;
            while (i < seqAn.length) {
                if (seqAn[i].getType().equals(value)) {
                    raw[j] = Double.parseDouble(seqAn[i].getIdentifier());
                } else if (seqAn[i].getType().equals(position)) {
                    mean[j] = Double.parseDouble(seqAn[i].getIdentifier());
                }
                ++i;
            }
            ++j;
        }
        if (weightingFactor.endsWith("sd")) {
            double h = Double.parseDouble(weightingFactor.substring(0, weightingFactor.length() - 2));
            double meanRaw = ToolBox.sum(raw) / (double)raw.length;
            double sdRaw = 0.0;
            int i = 0;
            while (i < raw.length) {
                sdRaw += (raw[i] - meanRaw) * (raw[i] - meanRaw);
                ++i;
            }
            sdRaw = Math.sqrt(sdRaw / (double)raw.length);
            h = meanRaw + h * sdRaw;
            double anz = 0.0;
            int i2 = 0;
            while (i2 < raw.length) {
                if (raw[i2] >= h) {
                    anz += 1.0;
                }
                ++i2;
            }
            anz = Math.max(50.0, anz);
            wf = anz / (double)raw.length;
        } else {
            wf = Double.parseDouble(weightingFactor);
        }
        weights[0] = Interpolation.getWeight(data[0], raw, wf, Interpolation.RANK_LOG);
        weights[1] = Interpolation.getBgWeight(weights[0]);
        boolean[][] allowed = new boolean[annotated.length][];
        int i = 0;
        while (i < annotated.length) {
            allowed[i] = new boolean[annotated[i].getLength()];
            Arrays.fill(allowed[i], true);
            ++i;
        }
        data[0] = DimontTool.annotate(annotated, weights, mean, sd, allowed);
        DataSet completeData = data[0];
        double[][] completeWeight = new double[][]{weights[0], weights[1]};
        MarkovModelDiffSM motif = new MarkovModelDiffSM((AlphabetContainer)con, motifLength, ess, true, fgOrder, null);
        ThresholdedStrandChIPper fg = new ThresholdedStrandChIPper(1, 0.5, motif);
        fg.initializeFunctionRandomly(false);
        double fac = (1.0 - wf) / wf;
        DifferentiableStatisticalModel bg = DimontTool.getBgSF(con, bgOrder, ess * fac, data[0].getAverageElementLength());
        bg.initializeFunction(0, false, data, (double[][])weights);
        DifferentiableSequenceScore[] score = new DifferentiableStatisticalModel[]{fg, bg};
        double[] beta = LearningPrinciple.getBeta(LearningPrinciple.MSP);
        LearningPrinciple initKey = beta[0] > 0.0 ? LearningPrinciple.MCL : LearningPrinciple.ML;
        HeuristicOneDataSetLogGenDisMixFunction initObjective = new HeuristicOneDataSetLogGenDisMixFunction(threads, score, completeData, (double[][])completeWeight.clone(), (LogPrior)new CompositeLogPrior(), LearningPrinciple.getBeta(initKey), true, free);
        HeuristicOneDataSetLogGenDisMixFunction objective = new HeuristicOneDataSetLogGenDisMixFunction(threads, score, data[0], (double[][])weights, (LogPrior)new CompositeLogPrior(), beta, true, free);
        NegativeDifferentiableFunction neg = new NegativeDifferentiableFunction(objective);
        double eps = 1.0E-4;
        CombinedCondition stop = new CombinedCondition(2, new MultipleIterationsCondition(5, new SmallDifferenceOfFunctionEvaluationsCondition(eps)), new IterationCondition(100));
        ConstantStartDistance start = new ConstantStartDistance(1.0);
        Object params = null;
        double[][] pwm = null;
        double[] entropy = new double[motifLength];
        double[] kl = new double[motifLength];
        byte algo = 18;
        GenDisMixClassifierParameterSet genDisMixParams = new GenDisMixClassifierParameterSet(con, 0, algo, eps, eps * 0.1, 1.0, free, OptimizableFunction.KindOfParameter.PLUGIN, true, threads);
        objective.reset(score);
        DataSet smallData = null;
        Object smallWeight = new double[2][];
        double[] p = null;
        Pair<DataSet, double[][]> small = DimontTool.getSmallDataSets(completeWeight, annotated, 0.3, 1000);
        smallData = small.getFirstElement();
        smallWeight = small.getSecondElement();
        initObjective.setDataAndWeights(new DataSet[]{smallData}, (double[][])smallWeight);
        initObjective.reset(score);
        double percentKmers = 1.0;
        Object[] sortedPars = new ComparableElement[restarts];
        int numKmers = 0;
        if (percentKmers > 0.0) {
            int k = 7;
            ComparableElement<String, Double>[] array = DimontTool.getKmereSequenceStatistic(Math.max(50, (int)Math.ceil(percentKmers * (double)restarts)), k, smallData, smallWeight[0]);
            Object[] array2 = new ComparableElement[array.length];
            int a = 4;
            double d = 0.1 / (double)(a - 1);
            d = (1.0 - (double)a * d) / ((double)a * d);
            double h = d * motif.getESS();
            int z = 0;
            while (z < array.length) {
                ((AbstractSingleMotifChIPper)score[0]).initializeMotif(0, new DataSet("", Sequence.create(con, array[z].getElement())), new double[]{h});
                p = objective.getParameters(OptimizableFunction.KindOfParameter.PLUGIN);
                initObjective.reset(score);
                initObjective.resetHeuristics();
                double val = initObjective.evaluateFunction(p);
                array2[z] = new ComparableElement<double[], Double>(p, val);
                ++z;
            }
            Arrays.sort(array2);
            numKmers = Math.min(array2.length, (int)Math.ceil((double)restarts * percentKmers));
            z = 0;
            while (z < numKmers) {
                sortedPars[z] = array2[array2.length - 1 - z];
                ++z;
            }
        }
        if (numKmers != sortedPars.length) {
            MutableMotifDiscovererToolbox.InitMethodForDiffSM[] initMeth = new MutableMotifDiscovererToolbox.InitMethodForDiffSM[]{MutableMotifDiscovererToolbox.InitMethodForDiffSM.PLUG_IN, MutableMotifDiscovererToolbox.InitMethodForDiffSM.NOTHING};
            ComparableElement<double[], Double>[] sortedPars2 = MutableMotifDiscovererToolbox.getSortedInitialParameters(score, initMeth, initObjective, Math.max(100, restarts), SafeOutputStream.getSafeOutputStream(null), 0);
            int z = 0;
            while (z < sortedPars.length - numKmers) {
                sortedPars[numKmers + z] = sortedPars2[sortedPars2.length - 1 - z];
                ++z;
            }
            Arrays.sort(sortedPars);
        }
        CombinedCondition stop2 = new CombinedCondition(2, new MultipleIterationsCondition(5, new SmallDifferenceOfFunctionEvaluationsCondition(eps)), new IterationCondition(25));
        Object[] preOpt = new ComparableElement[restarts];
        int r = 0;
        while (r < restarts) {
            progress.setCurrent(0.5 / (double)restarts * (double)r);
            data[0] = smallData;
            weights = smallWeight;
            objective.setDataAndWeights(data, (double[][])weights);
            objective.resetHeuristics();
            protocol.append("-----------------------------------------\npre-optimization " + r + "\n");
            start.reset();
            p = (double[])((ComparableElement)sortedPars[sortedPars.length - 1 - r]).getElement();
            objective.setParams(p);
            Optimizer.optimize(algo, neg, p, stop, eps * 0.1, start, null);
            data[0] = completeData;
            weights = completeWeight;
            objective.setDataAndWeights(data, (double[][])weights);
            preOpt[r] = new ComparableElement<double[], Double>(p, objective.evaluateFunction(p));
            protocol.append("consensus: " + DimontTool.getConsensus(DNAAlphabetContainer.SINGLETON, ((MarkovModelDiffSM)((AbstractSingleMotifChIPper)score[0]).getFunction(0)).getPWM()) + "\n");
            protocol.append("score: " + ((ComparableElement)preOpt[r]).getWeight() + "\n");
            ((AbstractSingleMotifChIPper)score[0]).resetPositions();
            ++r;
        }
        protocol.append("-----------------------------------------\n");
        progress.setCurrent(0.5);
        Arrays.sort(preOpt);
        ArrayList<ComparableElement<double[], Double>> list = DimontTool.filter2((AbstractSingleMotifChIPper)score[0], preOpt, smallData, filterThreshold, motifLength, protocol);
        MutableMotifDiscoverer[] best = new MutableMotifDiscoverer[list.size()];
        Storable[] storables = new Storable[best.length];
        double[] opts = new double[best.length];
        Pair[] pairs = new Pair[best.length];
        int m = 0;
        while (m < best.length) {
            Pair<double[][][], int[][]> pair;
            progress.setCurrent(0.5 + (double)m * 1.0 / ((double)best.length + 1.0));
            protocol.append("+++++++++++++++++++++++++++++++++++++++++++++++++++\n\nfinalOpt " + m + " -----------------------------------------\n");
            best[m] = (MutableMotifDiscoverer)((Object)score[0]);
            int j2 = 0;
            while (j2 < best[m].getNumberOfMotifs()) {
                if (best[m].getMotifLength(j2) != motifLength) {
                    best[m].modifyMotif(j2, 0, motifLength - best[m].getMotifLength(j2));
                }
                ++j2;
            }
            ((AbstractSingleMotifChIPper)score[0]).reset();
            ((AbstractSingleMotifChIPper)score[0]).resetPositions();
            objective.reset(score);
            objective.resetHeuristics();
            start.reset();
            p = list.get(m).getElement();
            objective.setParams(p);
            data[0] = DimontTool.annotate(annotated, weights, mean, sd, allowed);
            objective.setDataAndWeights(data, (double[][])weights);
            Optimizer.optimize(algo, neg, p, stop, eps * 0.1, start, SafeOutputStream.getSafeOutputStream(null));
            double[] sds = new double[1];
            DimontTool.heuristic((MutableMotifDiscoverer)((Object)score[0]), completeData, completeWeight, objective, mean, sds, protocol);
            ((AbstractSingleMotifChIPper)score[0]).reset();
            double newSd = Math.sqrt(sds[0] * sd);
            if (Double.isNaN(newSd) || Double.isInfinite(newSd) || newSd <= 0.0) {
                protocol.append("Did not adjust sd to " + newSd + " using " + sds[0] + " and " + sd + "\n");
                newSd = sd;
            }
            data[0] = DimontTool.annotate(annotated, weights, mean, newSd, allowed);
            objective.setDataAndWeights(data, (double[][])weights);
            ((AbstractSingleMotifChIPper)score[0]).resetPositions();
            objective.reset(score);
            p = objective.getParameters(OptimizableFunction.KindOfParameter.LAST);
            objective.setParams(p);
            Optimizer.optimize(algo, neg, p, stop, eps * 0.1, start, SafeOutputStream.getSafeOutputStream(null));
            protocol.append("consensus: " + DimontTool.getConsensus(DNAAlphabetContainer.SINGLETON, ((MarkovModelDiffSM)((AbstractSingleMotifChIPper)score[0]).getFunction(0)).getPWM()) + "\n");
            best[m] = (MutableMotifDiscoverer)((Object)score[0]).clone();
            opts[m] = objective.evaluateFunction(p);
            GenDisMixClassifier cl = new GenDisMixClassifier(genDisMixParams, (LogPrior)new CompositeLogPrior(), opts[m], LearningPrinciple.getBeta(LearningPrinciple.MSP), (DifferentiableStatisticalModel[])score);
            cl.setClassWeights(false, objective.getClassParams(p));
            storables[m] = cl;
            SignificantMotifOccurrencesFinder smof = new SignificantMotifOccurrencesFinder(best[m], completeData, completeWeight[1], 0.001);
            pairs[m] = pair = smof.getPWMAndPositions(0, completeData, completeWeight[0], 0, 0);
            if (delete && m + 1 < best.length) {
                DimontTool.delete(pair.getSecondElement(), allowed, motifLength);
            }
            ++m;
        }
        int[] o = ToolBox.rank(opts, ToolBox.TiedRanks.IN_ORDER);
        int[] index = new int[o.length];
        int i3 = 0;
        while (i3 < o.length) {
            index[o[i3]] = i3;
            ++i3;
        }
        boolean[] use = DimontTool.postFilter(best, index, smallData, filterThresholdEnd, motifLength);
        LinkedList<ResultSetResult> results = new LinkedList<ResultSetResult>();
        int m2 = 0;
        int n = 0;
        while (m2 < best.length) {
            if (use[m2]) {
                LinkedList<Result> result = new LinkedList<Result>();
                result.add(new TextResult("Dimont " + (n + 1), "The Dimont classifier", new FileParameter.FileRepresentation("", storables[index[m2]].toXML().toString()), "xml", "Dimont", null, true));
                result.add(DimontTool.getTextResult(fgData, completeWeight[0], pairs[index[m2]], ((ThresholdedStrandChIPper)((GenDisMixClassifier)storables[index[m2]]).getDifferentiableSequenceScore(0)).getMotifLength(0), n));
                pwm = ((double[][][])pairs[index[m2]].getFirstElement())[0];
                if (!Double.isNaN(pwm[0][0])) {
                    try {
                        result.add(new PlotGeneratorResult("Sequence logo " + (n + 1), "Sequence logo of motif " + (n + 1), new SeqLogoPlotter.SeqLogoPlotGenerator(pwm, 1000), true));
                        result.add(new PlotGeneratorResult("Sequence logo " + (n + 1) + " (rc)", "Sequence logo of the reverse complement of motif " + (n + 1), new SeqLogoPlotter.SeqLogoPlotGenerator(PFMComparator.getReverseComplement(DNAAlphabet.SINGLETON, pwm), 1000), true));
                    }
                    catch (Exception exception) {
                    }
                    catch (InternalError internalError) {
                        // empty catch block
                    }
                }
                if (fgOrder == 0) {
                    double[][] modelPwm = ((MarkovModelDiffSM)((ThresholdedStrandChIPper)((GenDisMixClassifier)storables[index[m2]]).getDifferentiableSequenceScore(0)).getMotifModel()).getPWM();
                    StringBuffer sb = new StringBuffer();
                    sb.append(">Motif" + (n + 1) + "\n");
                    int i4 = 0;
                    while (i4 < modelPwm.length) {
                        int j3 = 0;
                        while (j3 < modelPwm[i4].length) {
                            if (j3 > 0) {
                                sb.append("\t");
                            }
                            sb.append(modelPwm[i4][j3]);
                            ++j3;
                        }
                        sb.append("\n");
                        ++i4;
                    }
                    TextResult trPwm = new TextResult("Model PWM", "The model PWM in HOCOMOCO format", new FileParameter.FileRepresentation("", sb.toString()), "pwm", this.getToolName(), null, true);
                    result.add(trPwm);
                }
                ResultSetResult rsr = new ResultSetResult("Motif " + (n + 1), "The Dimont results for motif " + (n + 1), null, new ResultSet(result));
                results.add(rsr);
                ++n;
            }
            ++m2;
        }
        initObjective.stopThreads();
        objective.stopThreads();
        progress.setCurrent(1.0);
        return new ToolResult("Result of " + this.getToolName(), this.getToolName(), null, new ResultSet(results), parameters, this.getToolName(), new Date(System.currentTimeMillis()));
    }

    @Override
    public String getToolName() {
        return "Dimont";
    }

    @Override
    public String getToolVersion() {
        return "1.2";
    }

    @Override
    public String getShortName() {
        return "dimont";
    }

    @Override
    public String getDescription() {
        return "a universal tool for de-novo motif discovery";
    }

    @Override
    public String getHelpText() {
        try {
            return FileManager.readInputStream(DimontTool.class.getClassLoader().getResourceAsStream("projects/dimont/help.txt")).toString();
        }
        catch (IOException e) {
            e.printStackTrace();
            return "";
        }
    }

    @Override
    public JstacsTool.ResultEntry[] getDefaultResultInfos() {
        return new JstacsTool.ResultEntry[]{new JstacsTool.ResultEntry(TextResult.class, "xml", "Dimont 1"), new JstacsTool.ResultEntry(TextResult.class, "tsv", "Predictions for motif 1")};
    }

    public static TextResult getTextResult(DataSet data, double[] weights, Pair<double[][][], int[][]> pair, int motifLength, int motifIndex) throws OperationNotSupportedException {
        SplitSequenceAnnotationParser pars = new SplitSequenceAnnotationParser(":", ";");
        StringBuffer sb = new StringBuffer();
        sb.append("Sequence index\tPosition\tStrand\tp-value\tBinding site\tAdjusted binding site\tSequence annotation\n");
        int[][] pos = pair.getSecondElement();
        double[][] pvals = pair.getFirstElement()[1];
        int i = 0;
        while (i < pos.length) {
            int j = 0;
            while (j < pos[i].length) {
                Sequence sub;
                int curr = pos[i][j];
                boolean rc = false;
                if (curr < 0) {
                    curr = -curr - 1;
                    rc = true;
                }
                Sequence sub2 = sub = data.getElementAt(i).getSubSequence(curr, motifLength);
                if (rc) {
                    sub2 = sub.reverseComplement();
                }
                sb.append(String.valueOf(i + 1) + "\t" + (curr + 1) + "\t" + (rc ? "-" : "+") + "\t" + pvals[i][j] + "\t" + sub.toString() + "\t" + sub2.toString() + "\t" + pars.parseAnnotationToComment(' ', data.getElementAt(i).getAnnotation()).substring(1) + "\n");
                ++j;
            }
            ++i;
        }
        FileParameter.FileRepresentation file = new FileParameter.FileRepresentation("", sb.toString());
        TextResult tr = new TextResult("Predictions for motif " + (motifIndex + 1), "", file, "tsv", "Dimont", null, true);
        return tr;
    }

    public static ListResult getListResult(DataSet data, double[] weights, Pair<double[][][], int[][]> pair, int motifLength, int motifIndex) throws Exception {
        SplitSequenceAnnotationParser pars = new SplitSequenceAnnotationParser(":", ";");
        LinkedList<ResultSet> set = new LinkedList<ResultSet>();
        int[][] pos = pair.getSecondElement();
        double[][] pvals = pair.getFirstElement()[1];
        int i = 0;
        while (i < pos.length) {
            int j = 0;
            while (j < pos[i].length) {
                Sequence sub;
                int curr = pos[i][j];
                boolean rc = false;
                if (curr < 0) {
                    curr = -curr - 1;
                    rc = true;
                }
                Sequence sub2 = sub = data.getElementAt(i).getSubSequence(curr, motifLength);
                if (rc) {
                    sub2 = sub.reverseComplement();
                }
                ResultSet rs = new ResultSet(new Result[][]{{new NumericalResult("Sequence index", "The index of the sequence", i + 1), new NumericalResult("Position", "The starting position of the motif within the sequence", curr + 1), new CategoricalResult("Strand", "The strand of the predicted BS", rc ? "-" : "+"), new NumericalResult("p-value", "The p-value of the predicted BS", pvals[i][j]), new CategoricalResult("Binding site", "The binding site as in the sequence", sub.toString()), new CategoricalResult("Adjusted binding site", "The binding site in predicted orientation", sub2.toString()), new CategoricalResult("Sequence annotation", "The annotation of the original sequence", pars.parseAnnotationToComment(' ', data.getElementAt(i).getAnnotation()).substring(1))}});
                set.add(rs);
                ++j;
            }
            ++i;
        }
        ListResult lr = new ListResult("Predictions for motif " + (motifIndex + 1), "", null, set.toArray(new ResultSet[0]));
        lr.setExport(true);
        return lr;
    }

    private static void delete(int[][] positions, boolean[][] allowed, int length) {
        int i = 0;
        while (i < positions.length) {
            int j = 0;
            while (j < positions[i].length) {
                int pos = positions[i][j];
                if (pos < 0) {
                    pos = -pos - 1;
                }
                Arrays.fill(allowed[i], Math.max(0, pos - length / 2), Math.min(allowed[i].length, pos + length / 2), false);
                ++j;
            }
            ++i;
        }
    }

    private static Pair<DataSet, double[][]> getSmallDataSets(double[][] completeWeight, Sequence[] data, double percent, int maxN) throws EmptyDataSetException, WrongAlphabetException {
        int[] ofg = ToolBox.order(completeWeight[0], false);
        int[] obg = ToolBox.order(completeWeight[1], false);
        boolean[] used = new boolean[completeWeight[0].length];
        double[] sums = new double[]{ToolBox.sum(completeWeight[0]), ToolBox.sum(completeWeight[1])};
        double[] currSums = new double[2];
        int idxFg = ofg.length;
        int idxBg = obg.length;
        int totN = 0;
        int nfg = 0;
        LinkedList<Sequence> seqs = new LinkedList<Sequence>();
        DoubleList w = new DoubleList();
        while (totN < maxN && currSums[0] + completeWeight[0][ofg[idxFg - 1]] < sums[0] * percent) {
            currSums[0] = currSums[0] + completeWeight[0][ofg[idxFg - 1]];
            if (!used[ofg[idxFg - 1]]) {
                seqs.add(data[ofg[idxFg - 1]]);
                w.add(completeWeight[0][ofg[idxFg - 1]]);
                used[ofg[idxFg - 1]] = true;
                ++totN;
                ++nfg;
            }
            --idxFg;
            double tempPerc = currSums[0] / sums[0];
            while (totN < maxN && currSums[1] + completeWeight[1][obg[idxBg - 1]] < sums[1] * tempPerc) {
                currSums[1] = currSums[1] + completeWeight[1][obg[idxBg - 1]];
                if (!used[obg[idxBg - 1]]) {
                    seqs.add(data[obg[idxBg - 1]]);
                    w.add(completeWeight[0][obg[idxBg - 1]]);
                    used[obg[idxBg - 1]] = true;
                    ++totN;
                }
                --idxBg;
            }
        }
        double[] rw = w.toArray();
        return new Pair<DataSet, double[][]>(new DataSet("", seqs.toArray(new Sequence[0])), new double[][]{rw, Interpolation.getBgWeight(rw)});
    }

    private static DataSet annotate(Sequence[] annotated, double[][] weights, double[] mean, double sd, boolean[][] allowed) throws WrongAlphabetException, WrongSequenceTypeException, EmptyDataSetException {
        AlphabetContainer ref = new AlphabetContainer((Alphabet)new ContinuousAlphabet());
        float[][] histogram = new float[annotated.length][];
        int j = 0;
        while (j < weights[0].length) {
            histogram[j] = new float[annotated[j].getLength()];
            float max = 0.0f;
            float sum = 0.0f;
            int i = 0;
            while (i < histogram[j].length) {
                if (allowed[j][i]) {
                    histogram[j][i] = (float)(((double)i - mean[j]) / sd);
                    histogram[j][i] = (float)Math.exp(-0.5 * (double)histogram[j][i] * (double)histogram[j][i]);
                    sum += histogram[j][i];
                }
                ++i;
            }
            i = 0;
            while (i < histogram[j].length) {
                float[] fArray = histogram[j];
                int n = i;
                fArray[n] = fArray[n] / sum;
                if (histogram[j][i] > max) {
                    max = histogram[j][i];
                }
                ++i;
            }
            float[] histBg = (float[])histogram[j].clone();
            sum = 0.0f;
            int i2 = 0;
            while (i2 < histBg.length) {
                if (allowed[j][i2]) {
                    histBg[i2] = max - histBg[i2];
                    sum += histBg[i2];
                }
                ++i2;
            }
            i2 = 0;
            while (i2 < histBg.length) {
                int n = i2++;
                histBg[n] = histBg[n] / sum;
            }
            i2 = 0;
            while (i2 < histogram[j].length) {
                histogram[j][i2] = (float)(weights[0][j] * (double)histogram[j][i2] + weights[1][j] * (double)histBg[i2]);
                ++i2;
            }
            annotated[j] = annotated[j].annotate(false, new ReferenceSequenceAnnotation("reads", new ArbitraryFloatSequence(ref, histogram[j]), new Result[0]));
            ++j;
        }
        return new DataSet("", annotated);
    }

    private static boolean heuristic(MutableMotifDiscoverer best, DataSet completeData, double[][] completeWeight, LogGenDisMixFunction objective, double[] mean, double[] sds, Protocol protocol) throws Exception {
        SignificantMotifOccurrencesFinder smof = new SignificantMotifOccurrencesFinder(best, completeData, completeWeight[1], 0.001);
        boolean modified = false;
        double log2 = Math.log(2.0);
        int im = 0;
        while (im < best.getNumberOfMotifs()) {
            Pair<double[][], double[]> pair = smof.getPWMAndPosDist(im, completeData, completeWeight[0], mean, 0, 0);
            double[][] pwm = pair.getFirstElement();
            sds[im] = pair.getSecondElement()[0];
            double[] entropy = new double[pwm.length];
            double[] kl = new double[pwm.length];
            double[] bgDistr = DimontTool.getCounts(completeData, completeWeight[1]);
            PFMComparator.normalize(bgDistr);
            int i = 0;
            while (i < pwm.length) {
                kl[i] = 0.0;
                entropy[i] = Math.log(pwm[i].length) / log2;
                int j = 0;
                while (j < pwm[i].length) {
                    if (pwm[i][j] > 0.0) {
                        int n = i;
                        entropy[n] = entropy[n] + pwm[i][j] * Math.log(pwm[i][j]) / log2;
                        int n2 = i;
                        kl[n2] = kl[n2] + pwm[i][j] * Math.log(pwm[i][j] / bgDistr[j]);
                    }
                    ++j;
                }
                ++i;
            }
            int left = 0;
            int right = 0;
            double thresh = 0.2;
            while (left < kl.length && kl[left] < thresh) {
                ++left;
            }
            while (right > -kl.length && kl[kl.length - 1 + right] < thresh) {
                --right;
            }
            if (left == 0 && kl[0] > 4.0 * thresh) {
                --left;
            }
            if (right == 0 && kl[kl.length - 1] > 4.0 * thresh) {
                ++right;
            }
            protocol.append("left: " + left + ", right: " + right + "\n");
            if (left == kl.length || right == -kl.length) {
                protocol.append("tried to remove the complete motif: no modifications\n");
            } else {
                double normOld = ((DifferentiableStatisticalModel)((Object)best)).getLogNormalizationConstant();
                if (left != 0 || right != 0) {
                    if (best.modifyMotif(im, left, right)) {
                        double w = normOld - ((DifferentiableStatisticalModel)((Object)best)).getLogNormalizationConstant();
                        objective.addTermToClassParameter(0, w);
                    }
                    protocol.append("modified motif\n");
                    modified = true;
                } else {
                    protocol.append("no modifications for the motif\n");
                }
            }
            ++im;
        }
        return modified;
    }

    private static double[] getCounts(DataSet completeData, double[] ds) {
        double[] counts = new double[(int)completeData.getAlphabetContainer().getAlphabetLengthAt(0)];
        int i = 0;
        while (i < completeData.getNumberOfElements()) {
            Sequence seq = completeData.getElementAt(i);
            Sequence ref = ((ReferenceSequenceAnnotation)seq.getSequenceAnnotationByTypeAndIdentifier("reference", "reads")).getReferenceSequence();
            int j = 0;
            while (j < seq.getLength()) {
                int n = seq.discreteVal(j);
                counts[n] = counts[n] + ds[i] * ref.continuousVal(j);
                ++j;
            }
            ++i;
        }
        return counts;
    }

    private static DifferentiableStatisticalModel getBgSF(AlphabetContainer con, int order, double ess, double length) throws Exception {
        if (order >= 0) {
            return new HomogeneousMMDiffSM(con, order, ess, (int)Math.round(length));
        }
        return new UniformHomogeneousDiffSM(con, ess);
    }

    private static ArrayList<ComparableElement<double[], Double>> filter2(AbstractSingleMotifChIPper chipper, ComparableElement<double[], Double>[] pars, DataSet fg, double t, int length, Protocol protocol) throws Exception {
        double[][] current;
        ArrayList<ComparableElement<double[], Double>> list = new ArrayList<ComparableElement<double[], Double>>(10);
        ArrayList<double[][]> profiles = new ArrayList<double[][]>();
        int i = pars.length - 1;
        while (i >= 0) {
            block9: {
                chipper.setParameters(pars[i].getElement(), 2);
                current = ((MarkovModelDiffSM)chipper.getFunction(0)).getPWM();
                double max = Double.NEGATIVE_INFINITY;
                int j = 0;
                while (j < current.length) {
                    double m = ToolBox.max(current[j]);
                    if (m > max) {
                        max = m;
                    }
                    ++j;
                }
                if (!(max < 0.4)) {
                    double[][] profile = new double[fg.getNumberOfElements()][];
                    int k = 0;
                    while (k < profile.length) {
                        profile[k] = chipper.getProfileOfScoresFor(0, 0, fg.getElementAt(k), 0, MotifDiscoverer.KindOfProfile.UNNORMALIZED_JOINT);
                        ++k;
                    }
                    int j2 = 0;
                    while (j2 < profiles.size()) {
                        double corr = DimontTool.getCorrelation((double[][])profiles.get(j2), profile, length);
                        if (!(corr > t)) {
                            ++j2;
                            continue;
                        }
                        break block9;
                    }
                    profiles.add(profile);
                    list.add(pars[i]);
                    protocol.append("added: " + DimontTool.getConsensus(chipper.getAlphabetContainer(), current) + "\n");
                }
            }
            --i;
        }
        if (list.size() == 0) {
            i = pars.length - 1;
            double[][] profile = new double[fg.getNumberOfElements()][];
            int k = 0;
            while (k < profile.length) {
                profile[k] = chipper.getProfileOfScoresFor(0, 0, fg.getElementAt(k), 0, MotifDiscoverer.KindOfProfile.UNNORMALIZED_JOINT);
                ++k;
            }
            profiles.add(profile);
            list.add(pars[i]);
            chipper.setParameters(pars[i].getElement(), 2);
            current = ((MarkovModelDiffSM)chipper.getFunction(0)).getPWM();
            protocol.append("added: " + DimontTool.getConsensus(chipper.getAlphabetContainer(), current) + "\n");
        }
        protocol.append("number of motifs: " + list.size() + "\n");
        return list;
    }

    private static boolean[] postFilter(MutableMotifDiscoverer[] disc, int[] order, DataSet fg, double t, int length) throws Exception {
        ArrayList<double[][]> profiles = new ArrayList<double[][]>();
        boolean[] use = new boolean[disc.length];
        int i = 0;
        while (i < order.length) {
            block4: {
                double[][] current = ((MarkovModelDiffSM)((AbstractSingleMotifChIPper)disc[order[i]]).getFunction(0)).getPWM();
                double[][] profile = new double[fg.getNumberOfElements()][];
                int k = 0;
                while (k < profile.length) {
                    profile[k] = disc[order[i]].getProfileOfScoresFor(0, 0, fg.getElementAt(k), 0, MotifDiscoverer.KindOfProfile.UNNORMALIZED_JOINT);
                    ++k;
                }
                int j = 0;
                while (j < profiles.size()) {
                    double corr = DimontTool.getCorrelation((double[][])profiles.get(j), profile, length);
                    if (!(corr > t)) {
                        ++j;
                        continue;
                    }
                    break block4;
                }
                profiles.add(profile);
                use[i] = true;
            }
            ++i;
        }
        return use;
    }

    private static double getCorrelation(double[][] ds, double[][] profile, int length) throws Exception {
        double max = Double.NEGATIVE_INFINITY;
        int off = 0;
        while (off < length) {
            double currCorr1 = 0.0;
            double currCorr2 = 0.0;
            int i = 0;
            while (i < ds.length) {
                double p1 = ToolBox.pearsonCorrelation(ds[i], profile[i], 0, off);
                double p2 = ToolBox.pearsonCorrelation(ds[i], profile[i], off, 0);
                currCorr1 += p1;
                currCorr2 += p2;
                ++i;
            }
            if (currCorr1 > max) {
                max = currCorr1;
            }
            if (currCorr2 > max) {
                max = currCorr2;
            }
            ++off;
        }
        return max / (double)ds.length;
    }

    public static String getConsensus(AlphabetContainer con, double[][] pfm) {
        String c = "";
        int l = 0;
        while (l < pfm.length) {
            int m = pfm[l][0] > pfm[l][1] ? 0 : 1;
            int s = 1 - m;
            int p = 2;
            while (p < pfm[l].length) {
                if (pfm[l][m] < pfm[l][p]) {
                    s = m;
                    m = p;
                } else if (pfm[l][s] < pfm[l][p]) {
                    s = p;
                }
                ++p;
            }
            c = pfm[l][m] > 0.4 ? (pfm[l][m] - pfm[l][s] > 0.1 ? String.valueOf(c) + con.getSymbol(l, m) : String.valueOf(c) + con.getSymbol(l, m).toLowerCase()) : String.valueOf(c) + "N";
            ++l;
        }
        return c;
    }

    public static ComparableElement<String, Double>[] getKmereSequenceStatistic(int numWanted, int k, DataSet data, double[] weights) throws Exception {
        AlphabetContainer con = data.getAlphabetContainer();
        if (!con.isSimple() || !con.isDiscrete()) {
            throw new WrongAlphabetException();
        }
        Hashtable<String, double[]> res = new Hashtable<String, double[]>();
        HashSet<String> used = new HashSet<String>();
        String[] s = new String[2];
        int n = 0;
        while (n < weights.length) {
            Sequence seq = data.getElementAt(n);
            s[0] = seq.toString();
            s[1] = seq.reverseComplement().toString();
            int m = seq.getLength() - k + 1;
            used.clear();
            int l = 0;
            while (l < m) {
                String h1;
                String h0 = s[0].substring(l, l + k);
                String string = h0 = h0.compareTo(h1 = s[1].substring(s[0].length() - k - l, s[0].length() - l)) < 0 ? h0 : h1;
                if (!used.contains(h0)) {
                    used.add(h0);
                }
                ++l;
            }
            Iterator it = used.iterator();
            while (it.hasNext()) {
                s[0] = (String)it.next();
                if (res.containsKey(s[0])) {
                    double[] h = (double[])res.get(s[0]);
                    h[0] = h[0] + weights[n];
                    h[1] = h[1] + (1.0 - weights[n]);
                    continue;
                }
                res.put(s[0], new double[]{weights[n], 1.0 - weights[n]});
            }
            ++n;
        }
        double sumFg = ToolBox.sum(weights);
        Object[] array = new ComparableElement[res.size()];
        Iterator it = res.entrySet().iterator();
        int a = 0;
        while (a < array.length) {
            Map.Entry e = it.next();
            double[] val = (double[])e.getValue();
            array[a] = new ComparableElement<String, Double>((String)e.getKey(), Math.log(val[0] + 1.0) * (val[0] + 1.0) / (val[1] + 1.0));
            ++a;
        }
        Arrays.sort(array);
        if (numWanted > array.length) {
            numWanted = array.length;
        }
        ComparableElement[] resArray = new ComparableElement[numWanted];
        Sequence[] prevs = new Sequence[numWanted];
        int j = resArray.length - 1;
        int i = array.length - 1;
        while (i >= 0) {
            block11: {
                String curr = (String)((ComparableElement)array[i]).getElement();
                Sequence currs = Sequence.create(DNAAlphabetContainer.SINGLETON, curr);
                int a2 = resArray.length - 1;
                while (a2 > j) {
                    Sequence prev = prevs[a2];
                    if (DimontTool.getMinimumHammingDistance(currs, prev) >= 2) {
                        --a2;
                        continue;
                    }
                    break block11;
                }
                resArray[j] = array[i];
                prevs[j] = currs;
                if (--j < 0) break;
            }
            --i;
        }
        return resArray;
    }

    private static int getMinimumHammingDistance(Sequence curr, Sequence seq2) throws Exception {
        int d2;
        int d1;
        Sequence sub2;
        Sequence subRc;
        Sequence sub1;
        int min = Integer.MAX_VALUE;
        int i = 0;
        while (i <= curr.getLength() / 3) {
            sub1 = curr.getSubSequence(i);
            subRc = curr.reverseComplement().getSubSequence(i);
            sub2 = seq2.getSubSequence(0, seq2.getLength() - i);
            d1 = sub1.getHammingDistance(sub2);
            d2 = subRc.getHammingDistance(sub2);
            if (d1 < min) {
                min = d1;
            }
            if (d2 < min) {
                min = d2;
            }
            ++i;
        }
        i = 1;
        while (i <= curr.getLength() / 3) {
            sub1 = curr.getSubSequence(0, curr.getLength() - i);
            subRc = curr.reverseComplement().getSubSequence(0, curr.getLength() - i);
            sub2 = seq2.getSubSequence(i);
            d1 = sub1.getHammingDistance(sub2);
            d2 = subRc.getHammingDistance(sub2);
            if (d1 < min) {
                min = d1;
            }
            if (d2 < min) {
                min = d2;
            }
            ++i;
        }
        return min;
    }

    @Override
    public ToolResult[] getTestCases(String path) {
        try {
            return new ToolResult[]{new ToolResult(FileManager.readFile(String.valueOf(path) + File.separator + "xml/dimont.xml"))};
        }
        catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    @Override
    public void clear() {
    }

    @Override
    public String[] getReferences() {
        return new String[]{"@article{grau13a-general,\n\tAuthor = {Grau, Jan and Posch, Stefan and Grosse, Ivo and Keilwagen, Jens},\n\tJournal = {Nucleic Acids Research},\n\tNumber = {21},\n\tPages = {e197},\n\tTitle = {A general approach for discriminative de novo motif discovery from high-throughput data},\n\tVolume = {41},\n\tYear = {2013}}"};
    }
}

