/*
 * Decompiled with CFR 0.152.
 */
package projects.tals;

import de.jstacs.DataType;
import de.jstacs.data.AlphabetContainer;
import de.jstacs.data.DataSet;
import de.jstacs.data.alphabets.Alphabet;
import de.jstacs.data.alphabets.DNAAlphabetContainer;
import de.jstacs.data.alphabets.DiscreteAlphabet;
import de.jstacs.data.sequences.Sequence;
import de.jstacs.data.sequences.annotation.ReferenceSequenceAnnotation;
import de.jstacs.data.sequences.annotation.SequenceAnnotation;
import de.jstacs.data.sequences.annotation.SequenceAnnotationParser;
import de.jstacs.data.sequences.annotation.SimpleSequenceAnnotationParser;
import de.jstacs.io.NonParsableException;
import de.jstacs.io.SparseStringExtractor;
import de.jstacs.parameters.FileParameter;
import de.jstacs.parameters.Parameter;
import de.jstacs.parameters.ParameterSet;
import de.jstacs.parameters.SelectionParameter;
import de.jstacs.parameters.SimpleParameter;
import de.jstacs.parameters.SimpleParameterSet;
import de.jstacs.parameters.validation.NumberValidator;
import de.jstacs.results.CategoricalResult;
import de.jstacs.results.NumericalResult;
import de.jstacs.results.Result;
import de.jstacs.results.ResultSet;
import de.jstacs.sequenceScores.statisticalModels.differentiable.AbstractDifferentiableStatisticalModel;
import de.jstacs.sequenceScores.statisticalModels.trainable.discrete.homogeneous.HomogeneousMM;
import de.jstacs.sequenceScores.statisticalModels.trainable.discrete.homogeneous.parameters.HomMMParameterSet;
import de.jstacs.tools.ui.galaxy.MultilineSimpleParameter;
import de.jstacs.utils.ComparableElement;
import de.jstacs.utils.IntList;
import de.jstacs.utils.Pair;
import java.io.BufferedReader;
import java.io.StringReader;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import projects.tals.TALgetterDiffSM;

public class TBSScanner {
    public static ResultList[] scan(TALgetterDiffSM model, TBSScannerParameterSet params, DataSet ds) throws Exception {
        if (!params.hasDefaultOrIsSet()) {
            System.err.println("Some of the required parameters are not specified.");
            System.exit(1);
        }
        Pair<int[][], DataSet> pair = TBSScanner.preprocess(ds);
        int[][] offsets = pair.getFirstElement();
        ds = pair.getSecondElement();
        String[] stringArray = new String[]{"NI", "NG", "NN", "NS", "N*", "ND", "NK", "NC", "NV", "NA", "NH", "HD", "HG", "HA", "H*", "HH", "HI", "HN", "S*", "SN", "SS", "IG", "YG", "NP", "NT", "IS"};
        String[] alph = stringArray;
        AlphabetContainer alphabetsRVD = new AlphabetContainer((Alphabet)new DiscreteAlphabet(true, alph));
        Sequence tal = Sequence.create(alphabetsRVD, params.getTALSequence(), "-");
        double size = TBSScanner.getSize(ds, tal);
        if (size > 2.0E8) {
            System.err.println("Data set too large. Currently at most 90 Mb allowed");
        }
        int firstPos = params.getFirstPosition();
        int downstreamOff = params.getDownstreamOffset();
        ResultList rl = new ResultList(params.getN());
        ResultList rl2 = new ResultList(params.getN());
        PVals pvals = params.computePValues();
        BackgroundDistribution bgd = null;
        if (pvals != PVals.NONE) {
            bgd = new BackgroundDistribution(ds, model, tal, Math.max(10000000, (int)size), pvals);
        }
        double pValThresh = params.getPValue();
        int i = 0;
        while (i < ds.getNumberOfElements()) {
            Sequence seq = ds.getElementAt(i);
            String id = (String)seq.getSequenceAnnotationByType("unparsed comment line", 0).getResultForName("unparsed comment").getValue();
            id = id.trim();
            int j = 0;
            while (j < seq.getLength() - tal.getLength()) {
                Sequence sub = seq.getSubSequence(j, tal.getLength() + 1);
                sub = sub.annotate(true, seq.getAnnotation());
                double score = model.getLogScoreFor(sub = sub.annotate(true, new ReferenceSequenceAnnotation("seq", tal, new Result[0])));
                if (rl.better(score)) {
                    int off = j;
                    double pVal = 0.0;
                    if (bgd != null) {
                        pVal = bgd.getPValue(score);
                    }
                    if (pValThresh == 0.0 || pValThresh >= pVal) {
                        double eVal = pVal * size;
                        int dist = seq.getLength() - (j + tal.getLength() + 1) + offsets[1][i];
                        if (j >= firstPos && dist >= downstreamOff) {
                            String str = model.getMatchString(sub);
                            Result[] tr = new Result[]{new CategoricalResult("ID", "", id), new NumericalResult("Position", "", j + offsets[0][i]), new NumericalResult("Distance to end", "", seq.getLength() - (j + tal.getLength() + 1) + offsets[1][i]), new CategoricalResult("Sequence", "", sub.toString()), new CategoricalResult("Matches", "", str), new NumericalResult("Score", "", score), pvals == PVals.NONE ? new CategoricalResult("p-value", "", "NA") : new NumericalResult("p-value", "", pVal), pvals == PVals.NONE ? new CategoricalResult("E-value", "", "NA") : new NumericalResult("E-value", "", eVal)};
                            rl.add(new ResultSet(new Result[][]{tr}), score);
                            tr[0] = new GeneLinkResult("ID", "", id);
                            rl2.add(new ResultSet(new Result[][]{tr}), score);
                        }
                    }
                }
                ++j;
            }
            ++i;
        }
        return new ResultList[]{rl, rl2};
    }

    public static Pair<int[][], DataSet> preprocess(DataSet data) throws Exception {
        Pattern acgt = Pattern.compile("[ACGT]+", 2);
        LinkedList<Sequence> seqs = new LinkedList<Sequence>();
        IntList starts = new IntList();
        IntList ends = new IntList();
        int i = 0;
        while (i < data.getNumberOfElements()) {
            Sequence seq = data.getElementAt(i);
            String seqStr = seq.toString();
            Matcher match = acgt.matcher(seqStr);
            while (match.find()) {
                int start = match.start();
                int end = match.end();
                seqs.add(Sequence.create(DNAAlphabetContainer.SINGLETON, seq.getAnnotation(), seqStr.substring(start, end), ""));
                starts.add(start);
                ends.add(seq.getLength() - end);
            }
            ++i;
        }
        return new Pair<int[][], DataSet>(new int[][]{starts.toArray(), ends.toArray()}, new DataSet("", seqs.toArray(new Sequence[0])));
    }

    private static double getSize(DataSet data, Sequence tal) {
        double size = 0.0;
        int i = 0;
        while (i < data.getNumberOfElements()) {
            size += (double)(data.getElementAt(i).getLength() - tal.getLength());
            ++i;
        }
        return size;
    }

    private static HashSet<Sequence> makeHash(DataSet d) {
        HashSet<Sequence> set = new HashSet<Sequence>();
        int i = 0;
        while (i < d.getNumberOfElements()) {
            set.add(d.getElementAt(i));
            ++i;
        }
        return set;
    }

    private static class BackgroundDistribution {
        private static final int r = 10;
        double[] bestScores;
        double[] restScores;
        double pivot;
        double pivotP;

        private BackgroundDistribution(DataSet template, AbstractDifferentiableStatisticalModel model, Sequence tal, int n, PVals pvals) throws Exception {
            int m = Math.min(1000, n);
            int myR = 10;
            if (pvals == PVals.NONE) {
                throw new Exception();
            }
            if (pvals == PVals.COARSE) {
                myR = 1;
            }
            this.pivot = 0.0;
            this.restScores = new double[n];
            int off = 0;
            this.bestScores = new double[m * myR];
            int i = 0;
            while (i < myR) {
                double[] scores = this.getSortedScores(template, model, tal, n);
                System.arraycopy(scores, scores.length - m, this.bestScores, off, m);
                off += m;
                this.pivot += scores[scores.length - m];
                int j = 0;
                while (j < scores.length) {
                    int n2 = j;
                    this.restScores[n2] = this.restScores[n2] + scores[j];
                    ++j;
                }
                ++i;
            }
            Arrays.sort(this.bestScores);
            this.pivot /= (double)myR;
            this.pivotP = (double)m / (double)n;
            i = 0;
            while (i < this.restScores.length) {
                int n3 = i++;
                this.restScores[n3] = this.restScores[n3] / (double)myR;
            }
        }

        private double[] getSortedScores(DataSet template, AbstractDifferentiableStatisticalModel model, Sequence tal, int n) throws Exception {
            HomogeneousMM mm = new HomogeneousMM(new HomMMParameterSet(template.getAlphabetContainer(), 4.0, "", 2));
            mm.train(template);
            DataSet gen = mm.emitDataSet(1, n + tal.getLength());
            Sequence gs = gen.getElementAt(0);
            double[] scores = new double[n];
            int j = 0;
            while (j < n) {
                Sequence sub = gs.getSubSequence(j, tal.getLength() + 1);
                sub = sub.annotate(true, new ReferenceSequenceAnnotation("seq", tal, new Result[0]));
                scores[j] = model.getLogScoreFor(sub);
                ++j;
            }
            Arrays.sort(scores);
            return scores;
        }

        private double getPValue(double score) {
            if (score <= this.pivot) {
                int idx = Arrays.binarySearch(this.restScores, score);
                if (idx < 0) {
                    idx = -idx - 1;
                }
                idx = this.restScores.length - idx;
                return (double)idx / (double)this.restScores.length;
            }
            int idx = Arrays.binarySearch(this.bestScores, score);
            if (idx < 0) {
                idx = -idx - 1;
            }
            idx = this.bestScores.length - idx;
            double locP = (double)idx / (double)this.bestScores.length;
            return this.pivotP * locP;
        }
    }

    public static class GeneLinkResult
    extends CategoricalResult {
        private static Object[][] map = new Object[][]{{Pattern.compile("Os[0-9]+[a-zA-Z]+[0-9]+(\\.[0-9]+)?"), "http://rice.plantbiology.msu.edu/cgi-bin/ORF_infopage.cgi?orf=LOC_$$$"}, {Pattern.compile("AT[0-9]+[a-zA-Z]+[0-9]+(\\.[0-9]+)"), "http://www.arabidopsis.org/servlets/TairObject?name=$$$&type=gene"}, {Pattern.compile("AT[0-9]+[a-zA-Z]+[0-9]+"), "http://www.arabidopsis.org/servlets/TairObject?name=$$$&type=locus"}};

        public GeneLinkResult(String name, String comment, String result) {
            super(name, comment, result);
        }

        public GeneLinkResult(StringBuffer representation) throws NonParsableException {
            super(representation);
        }

        @Override
        public String getValue() {
            String str = super.getValue().toString();
            int i = 0;
            while (i < map.length) {
                Matcher m = ((Pattern)map[i][0]).matcher(str);
                if (m.find()) {
                    int start = m.start();
                    int end = m.end();
                    String url = ((String)map[i][1]).replaceAll("\\$\\$\\$", str.substring(start, end));
                    return String.valueOf(str.substring(0, start)) + "<a href=\"" + url + "\" target=\"_blank\">" + str.substring(start, end) + "</a>" + str.substring(end);
                }
                ++i;
            }
            return str;
        }
    }

    public static enum PVals {
        NONE,
        COARSE,
        FINE;

    }

    public static class ResultList {
        private ComparableElement<ResultSet, Double>[] list;
        private int curr;

        public ResultList(int n) {
            this.list = new ComparableElement[n];
            this.curr = 0;
        }

        public boolean better(double value) {
            if (this.list[this.curr] == null) {
                return true;
            }
            return -value < this.list[this.curr].getWeight();
        }

        public void add(ResultSet res, double value) {
            if (this.list[this.curr] == null) {
                this.list[this.curr] = new ComparableElement<ResultSet, Double>(res, -value);
                if (this.curr < this.list.length - 1) {
                    ++this.curr;
                    Arrays.sort(this.list, 0, this.curr);
                } else {
                    Arrays.sort(this.list);
                }
            } else if (-value < this.list[this.curr].getWeight()) {
                this.list[this.curr] = new ComparableElement<ResultSet, Double>(res, -value);
                Arrays.sort(this.list);
            }
        }

        public DataSet getBindingSites() throws Exception {
            if (this.getNumberOfResults() == 0) {
                return null;
            }
            Sequence[] seqs = new Sequence[this.getNumberOfResults()];
            int i = 0;
            while (i < seqs.length) {
                seqs[i] = Sequence.create(DNAAlphabetContainer.SINGLETON, new SequenceAnnotation[]{new SequenceAnnotation("ID", (String)this.list[i].getElement().getResultForName("ID").getValue(), (Result[][])new Result[0][]), new SequenceAnnotation("Position", this.list[i].getElement().getResultForName("Position").getValue().toString(), (Result[][])new Result[0][]), new SequenceAnnotation("Score", this.list[i].getElement().getResultForName("Score").getValue().toString(), (Result[][])new Result[0][])}, (String)this.list[i].getElement().getResultForName("Sequence").getValue(), "");
                ++i;
            }
            return new DataSet("binding sites", seqs);
        }

        public ResultSet[] toArray() {
            ResultSet[] res = new ResultSet[this.getNumberOfResults()];
            int i = 0;
            while (i < res.length) {
                res[i] = this.list[i].getElement();
                ++i;
            }
            return res;
        }

        public double getBestScore() {
            if (this.list[0] == null) {
                return Double.NEGATIVE_INFINITY;
            }
            return -this.list[0].getWeight().doubleValue();
        }

        public double getWorstScore() {
            if (this.curr == 0 && this.list[this.curr] == null) {
                return Double.NEGATIVE_INFINITY;
            }
            if (this.list[this.curr] == null) {
                return -this.list[this.curr - 1].getWeight().doubleValue();
            }
            return -this.list[this.curr].getWeight().doubleValue();
        }

        public int getNumberOfResults() {
            if (this.list[this.curr] == null) {
                return this.curr;
            }
            return this.curr + 1;
        }
    }

    public static class TBSScannerParameterSet
    extends ParameterSet {
        public TBSScannerParameterSet() throws Exception {
            this.parameters.add(new Parameter[]{new SelectionParameter(DataType.PARAMETERSET, new String[]{"Use a previously uploaded file", "Paste sequences in FastA format"}, new ParameterSet[]{new SimpleParameterSet(new FileParameter("FastA file", "The sequences to scan for TAL effector target sites, FastA format", "fasta", true)), new SimpleParameterSet(new MultilineSimpleParameter("FastA sequences", "The sequences to scan for TAL effector target sites, FastA format", true))}, "Input sequences", "You can either use a previously uploaded file (see task &quot;GetData&quot; -&gt; &quot;Upload File&quot;) or paste in sequences in FastA format", true)});
            this.parameters.add(new Parameter[]{new MultilineSimpleParameter("RVD sequence", "Sequence of RVDs, seperated by '-'", true, "NI-HD-HD-NG-NN-NK-NK")});
            this.parameters.add(new Parameter[]{new SimpleParameter(DataType.INT, "Upstream offset", "Number of positions ignored at 5' end of each sequence", true, 0)});
            this.parameters.add(new Parameter[]{new SimpleParameter(DataType.INT, "Downstream offset", "Number of positions ignored at 3' end of each sequence", true, 0)});
            this.parameters.add(new Parameter[]{new SimpleParameter(DataType.INT, "Maximum number of target sites", "Limits the total number of reported target sites in all input sequences, ranked by their score.", true, new NumberValidator<Integer>(1, 10000), 100)});
            SimpleParameter pValThresh = new SimpleParameter(DataType.DOUBLE, "p-Value", "Filter the reported hits by a maximum p-Value. A value of 0 or 1 switches off the filter.", true, new NumberValidator<Double>(0.0, 1.0), 1.0E-6);
            SelectionParameter sp = new SelectionParameter(DataType.PARAMETERSET, new String[]{"No p-Values (fastest)", "Coarse p-Values (faster but less accurate)", "Fine-grained p-Values (slower but more accurate)"}, new ParameterSet[]{new SimpleParameterSet(new Parameter[0]), new SimpleParameterSet(pValThresh), new SimpleParameterSet(pValThresh)}, "Computation of p-Values", "Mode to compute p-values for predicted target sites. If no p-values are computed, filtering for p-values is not available.", true);
            sp.setDefault("Fine-grained p-Values (slower but more accurate)");
            this.parameters.add(new Parameter[]{sp});
        }

        public Parameter[] getAllParameters() {
            return (Parameter[])this.parameters.toArray(new Parameter[0]);
        }

        public void addParameter(int i, Parameter par) {
            this.parameters.add(i, par);
        }

        public PVals computePValues() {
            int sel = ((SelectionParameter)this.parameters.get("Computation of p-Values")).getSelected();
            if (sel == 0) {
                return PVals.NONE;
            }
            if (sel == 1) {
                return PVals.COARSE;
            }
            if (sel == 2) {
                return PVals.FINE;
            }
            throw new RuntimeException("Computation of p-Values is required parameter.");
        }

        public String getTALSequence() {
            return (String)((Parameter)this.parameters.get("RVD sequence")).getValue();
        }

        public void setInputPath(String path) throws Exception {
            ((SelectionParameter)this.parameters.get("Input sequences")).setValue("Use a previously uploaded file");
            ((FileParameter)((SimpleParameterSet)((SelectionParameter)this.parameters.get("Input sequences")).getValue()).getParameterAt(0)).setValue(new FileParameter.FileRepresentation(path));
        }

        public DataSet getInputSequences() throws Exception {
            if (((SelectionParameter)this.parameters.get("Input sequences")).getSelected() == 0) {
                String filename = (String)((SimpleParameterSet)((SelectionParameter)this.parameters.get("Input sequences")).getValue()).getParameterAt(0).getValue();
                return new DataSet(new AlphabetContainer((Alphabet)new DiscreteAlphabet(true, "A", "C", "G", "T", "N", "W", "S", "M", "K", "R", "Y", "B", "D", "H", "V")), new SparseStringExtractor(filename, '>', (SequenceAnnotationParser)new SimpleSequenceAnnotationParser()));
            }
            String content = (String)((SimpleParameterSet)((SelectionParameter)this.parameters.get("Input sequences")).getValue()).getParameterAt(0).getValue();
            return new DataSet(new AlphabetContainer((Alphabet)new DiscreteAlphabet(true, "A", "C", "G", "T", "N", "W", "S", "M", "K", "R", "Y", "B", "D", "H", "V")), new SparseStringExtractor(new BufferedReader(new StringReader(content)), '>', null, (SequenceAnnotationParser)new SimpleSequenceAnnotationParser()));
        }

        public int getN() {
            return (Integer)((Parameter)this.parameters.get("Maximum number of target sites")).getValue();
        }

        public double getPValue() {
            ParameterSet ps = (ParameterSet)((Parameter)this.parameters.get("Computation of p-Values")).getValue();
            if (ps.getNumberOfParameters() == 0) {
                return 0.0;
            }
            return (Double)ps.getParameterAt(0).getValue();
        }

        private int getFirstPosition() {
            return (Integer)((Parameter)this.parameters.get("Upstream offset")).getValue();
        }

        private int getDownstreamOffset() {
            return (Integer)((Parameter)this.parameters.get("Downstream offset")).getValue();
        }
    }
}

